├── .devcontainer └── devcontainer.json ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── Dockerfile ├── LICENSE ├── README.md ├── deepsecrets ├── __init__.py ├── __main__.py ├── cli.py ├── config.py ├── core │ ├── engines │ │ ├── __init__.py │ │ ├── hashed_secret.py │ │ ├── iengine.py │ │ ├── regex.py │ │ └── semantic.py │ ├── helpers │ │ ├── content_analyzer.py │ │ └── entropy.py │ ├── model │ │ ├── __init__.py │ │ ├── file.py │ │ ├── finding.py │ │ ├── rules │ │ │ ├── __init__.py │ │ │ ├── exlcuded_path.py │ │ │ ├── false_finding.py │ │ │ ├── hashed_secret.py │ │ │ ├── hashing.py │ │ │ ├── regex.py │ │ │ ├── rule.py │ │ │ └── semantic.py │ │ ├── semantic.py │ │ └── token.py │ ├── modes │ │ └── iscan_mode.py │ ├── rulesets │ │ ├── excluded_paths.py │ │ ├── false_findings.py │ │ ├── hashed_secrets.py │ │ ├── ibuilder.py │ │ └── regex.py │ ├── tokenizers │ │ ├── __init__.py │ │ ├── full_content.py │ │ ├── helpers │ │ │ ├── __init__.py │ │ │ ├── semantic │ │ │ │ ├── __init__.py │ │ │ │ ├── language.py │ │ │ │ └── var_detection │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── detector.py │ │ │ │ │ └── rules.py │ │ │ ├── spot_improvements.py │ │ │ └── type_stream.py │ │ ├── itokenizer.py │ │ ├── lexer.py │ │ ├── per_line.py │ │ └── per_word.py │ └── utils │ │ ├── __init__.py │ │ ├── cpu.py │ │ ├── exceptions.py │ │ ├── file_analyzer.py │ │ ├── fs.py │ │ ├── guess_filetype.py │ │ ├── hashing.py │ │ └── lexer_finder.py ├── rules │ ├── excluded_paths.json │ └── regexes.json └── scan_modes │ └── cli.py ├── poetry.lock ├── pyproject.toml ├── setup.cfg └── tests ├── __init__.py ├── cli └── test_cli.py ├── config └── test_config.py ├── core ├── engines │ ├── hashed_secret │ │ └── test_hs.py │ ├── regex │ │ └── test_regex.py │ └── semantic │ │ └── test_semantic.py ├── helpers │ ├── test_content_analyzer.py │ └── test_entropy.py ├── model │ ├── test_file.py │ ├── test_finding.py │ └── test_token.py ├── tokenizers │ ├── lexer │ │ └── variable_detection │ │ │ ├── test_conf.py │ │ │ ├── test_cs.py │ │ │ ├── test_go.py │ │ │ ├── test_html.py │ │ │ ├── test_java.py │ │ │ ├── test_js.py │ │ │ ├── test_php.py │ │ │ ├── test_py.py │ │ │ ├── test_sh.py │ │ │ └── test_swift.py │ ├── test_full_content.py │ └── test_per_line.py └── utils │ ├── test_file_analyzer.py │ ├── test_fs.py │ └── test_lexer_finder.py ├── fixtures ├── 1.conf ├── 1.cs ├── 1.erb ├── 1.go ├── 1.html ├── 1.ini ├── 1.java ├── 1.js ├── 1.json ├── 1.jsx ├── 1.php ├── 1.pp ├── 1.py ├── 1.sh ├── 1.swift ├── 1.toml ├── 1.xml ├── 1.yaml ├── 1.yml ├── 2.go ├── 2.js ├── 2.json ├── 2.jsx ├── 2.py ├── 2.sh ├── 2.toml ├── 2.xml ├── 3.go ├── 3.js ├── 3.jsx ├── 3.py ├── 4.go ├── 4.js ├── 4.py ├── 5.go ├── 6.go ├── 7.go ├── excluded_paths.json ├── extless │ ├── json │ └── radius ├── false_findings.json ├── hashed_secrets.json ├── regex_checks.txt ├── regexes.json └── service.postman_collection.json └── scan_modes └── test_cli_scan_mode.py /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "DeepSecrets Devcontainer", 3 | "build": { 4 | "context": "..", 5 | "dockerfile": "../Dockerfile" 6 | }, 7 | 8 | "workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind,consistency=delegated", 9 | "workspaceFolder": "/app", 10 | "customizations": { 11 | "vscode": { 12 | "extensions": [ 13 | "ms-python.python", 14 | "ms-python.vscode-pylance" 15 | ] 16 | } 17 | }, 18 | "postCreateCommand": "poetry install --no-root --with test,dev" 19 | } 20 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | name: Build and run tests 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | 14 | - name: Checkout (GitHub) 15 | uses: actions/checkout@v3 16 | 17 | - name: Build and run dev container task 18 | uses: devcontainers/ci@v0.3 19 | with: 20 | runCmd: | 21 | pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=deepsecrets 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Installer logs 25 | pip-log.txt 26 | pip-delete-this-directory.txt 27 | 28 | # Unit test / coverage reports 29 | .pytest_cache/ 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .coverage.* 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | *,cover 38 | .hypothesis/ 39 | 40 | # dotenv 41 | .env 42 | 43 | # virtualenv 44 | venv/ 45 | 46 | # intellij ide 47 | .idea/ 48 | 49 | # auto generated docs 50 | doc.html 51 | 52 | # misc 53 | /workdir 54 | *.zip 55 | playground/ 56 | .mypy* 57 | *.db 58 | pytest* -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | 5 | { 6 | "name": "Python: Current File", 7 | "type": "python", 8 | "request": "launch", 9 | "program": "${file}", 10 | "console": "integratedTerminal", 11 | "justMyCode": true, 12 | "env": { 13 | "PYTHONPATH": "/app/deepsecrets/" 14 | } 15 | }, 16 | { 17 | "name": "Python: Module", 18 | "type": "python", 19 | "request": "launch", 20 | "module": "deepsecrets", 21 | "args": [ 22 | "--outfile", 23 | "test.json", 24 | "--target-dir", 25 | "/app/tests/fixtures/", 26 | "--outfile", 27 | "./fdsafad.json", 28 | "--verbose", 29 | "--reflect-findings-in-return-code" 30 | ], 31 | "justMyCode": true 32 | }, 33 | { 34 | "name": "Python: File", 35 | "type": "python", 36 | "request": "launch", 37 | "program": "${file}", 38 | "justMyCode": true 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.unittestEnabled": false, 3 | "python.testing.pytestEnabled": true, 4 | "python.testing.pytestArgs": [ 5 | "tests" 6 | ], 7 | "python.formatting.provider": "black", 8 | "editor.codeActionsOnSave": { 9 | "source.organizeImports": true, 10 | "source.unusedImports": true 11 | }, 12 | "python.linting.enabled": true, 13 | "python.linting.mypyEnabled": true, 14 | "python.linting.flake8Enabled": true 15 | } -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11.4-slim-bullseye 2 | 3 | ENV PROJECT_ROOT /app 4 | WORKDIR $PROJECT_ROOT 5 | 6 | RUN apt update && apt install -y gcc g++ 7 | RUN pip install poetry 8 | 9 | COPY pyproject.toml $PROJECT_ROOT/ 10 | COPY *.lock $PROJECT_ROOT/ 11 | 12 | RUN poetry config virtualenvs.create false 13 | RUN poetry update && poetry install --no-root 14 | 15 | COPY . $PROJECT_ROOT 16 | 17 | RUN PATH="$PROJECT_ROOT/bin:$PATH" 18 | RUN PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Avito 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepSecrets - a better tool for secret scanning 2 | 3 | > [!WARNING] 4 | > Active development was switched to the creator's fork at https://github.com/ntoskernel/deepsecrets. This repository will not receive any updates. -------------------------------------------------------------------------------- /deepsecrets/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import platform 4 | 5 | MODULE_NAME = 'deepsecrets' 6 | 7 | 8 | def build_logger() -> logging.Logger: 9 | logging.basicConfig(format=' %(message)s', level=logging.INFO) 10 | logger = logging.getLogger(MODULE_NAME) 11 | return logger 12 | 13 | 14 | logger = build_logger() 15 | 16 | 17 | def set_logging_level(level: int) -> None: 18 | logger.setLevel(level) 19 | for handler in logger.handlers: 20 | if isinstance(handler, type(logging.StreamHandler())): 21 | handler.setLevel(level) 22 | handler.setFormatter(logging.Formatter('DS-%(levelname)s: %(message)s')) 23 | 24 | logger.debug('Debug logging enabled') 25 | 26 | 27 | PROFILER_ON = False 28 | BASE_DIR = os.getcwd() 29 | 30 | PLATFORM = platform.system() -------------------------------------------------------------------------------- /deepsecrets/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from deepsecrets import logger 3 | 4 | message = \ 5 | '\n'\ 6 | '=================== REPOSITORY MOVED ===================\n' \ 7 | 'Active development was switched to the creator\'s fork at\n' \ 8 | ' https://github.com/ntoskernel/deepsecrets\n\n' \ 9 | ' This repository will no longer receive updates \n'\ 10 | '=========================================================\n' 11 | 12 | 13 | logger.error(message) 14 | 15 | sys.exit() 16 | -------------------------------------------------------------------------------- /deepsecrets/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import sys 5 | from argparse import RawTextHelpFormatter 6 | from typing import List 7 | 8 | from deepsecrets import MODULE_NAME, logger, set_logging_level 9 | from deepsecrets.config import Config, Output 10 | from deepsecrets.core.engines.regex import RegexEngine 11 | from deepsecrets.core.engines.semantic import SemanticEngine 12 | from deepsecrets.core.model.finding import Finding, FindingResponse 13 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder 14 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder 15 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder 16 | from deepsecrets.core.utils.fs import get_abspath, get_path_inside_package 17 | from deepsecrets.scan_modes.cli import CliScanMode 18 | 19 | DISABLED = 'disabled' 20 | FINDINGS_DETECTED_RETURN_CODE = 66 21 | 22 | 23 | class DeepSecretsCliTool: 24 | argparser: argparse.ArgumentParser 25 | 26 | def __init__(self, args: List[str]): 27 | self.args = args 28 | self._build_argparser() 29 | 30 | def say_hello(self) -> None: 31 | bar = '-' 32 | logger.info('') 33 | logger.info(f'{" "*8}{bar*25} DeepSecrets {bar*25}') 34 | logger.info(f'{" "*10}A better tool for secret scanning') 35 | logger.info(f'{" "*10}version 1.1') 36 | logger.info(f'') 37 | logger.info(f'{" "*8}{bar*63}') 38 | 39 | 40 | def _build_argparser(self) -> None: 41 | parser = argparse.ArgumentParser( 42 | prog=MODULE_NAME, 43 | description='DeepSecrets - a better tool for secrets search', 44 | formatter_class=RawTextHelpFormatter, 45 | ) 46 | 47 | parser.add_argument( 48 | '--target-dir', 49 | required=True, 50 | type=str, 51 | help="Path to the directory with code you'd like to analyze", 52 | ) 53 | 54 | parser.add_argument( 55 | '--regex-rules', 56 | nargs='*', 57 | type=str, 58 | help='Paths to your Regex Rulesets.\n' 59 | "- Set 'disable' to turn off regex checks\n" 60 | '- Ignore this argument to use the built-in ruleset.\n' 61 | "- Using your own rulesets disables the default one. Add 'built-in' to the args list to enable it\n" 62 | 'eq. --regex-rules built-in /root/my_regex_rules.json\n', 63 | default=['built-in'], 64 | ) 65 | 66 | parser.add_argument( 67 | '--hashed-values', 68 | nargs='*', 69 | type=str, 70 | help='Path to your Hashed Values set.\n' "Don't set any value to disable this checks\n", 71 | ) 72 | 73 | parser.add_argument( 74 | '--semantic-analysis', 75 | nargs='*', 76 | type=str, 77 | help='Controls semantic checks (enabled by default)\n' 78 | "- Set 'disable' to turn off semantic checks (not recommended)\n" 79 | 'eq. --semantic-analysis disable', 80 | default=['built-in'], 81 | ) 82 | 83 | parser.add_argument( 84 | '--excluded-paths', 85 | nargs='*', 86 | type=str, 87 | help='Paths to your Excluded Paths file.\n' 88 | "- Set 'disable' to scan everything (may affect performance)\n" 89 | '- Ignore this argument to use the built-in ruleset.\n' 90 | "- Using your own rulesets disables the default one. Add 'built-in' to the args list to enable it\n" 91 | 'eq. --excluded-paths built-in /root/my_excluded_paths.json\n', 92 | default=['built-in'], 93 | ) 94 | 95 | parser.add_argument( 96 | '--false-findings', 97 | nargs='*', 98 | type=str, 99 | help='Paths to your False Findings file.\n' 100 | 'Use to filter findings you sure are false positives\n' 101 | 'File syntax is the same as in regex rules\n' 102 | 'eq. --false-findings /root/my_false_findings.json\n', 103 | ) 104 | 105 | parser.add_argument( 106 | '-v', 107 | '--verbose', 108 | action='store_true', 109 | help='Verbose mode', 110 | ) 111 | 112 | parser.add_argument( 113 | '--reflect-findings-in-return-code', 114 | action='store_true', 115 | help='Return code of 66 if any findings are detected during scan', 116 | ) 117 | 118 | parser.add_argument( 119 | '--process-count', 120 | type=int, 121 | default=0, 122 | help='Number of processes in a pool for file analysis (one process per file)\n' 123 | 'Default: number of processor cores of your machine or cpu limit of your container from cgroup.\n' 124 | 'If all checks are failed the fallback value is 4' 125 | ) 126 | 127 | parser.add_argument('--outfile', required=True, type=str) 128 | parser.add_argument('--outformat', default='json', type=str, choices=['json']) 129 | self.argparser = parser 130 | 131 | def parse_arguments(self) -> None: 132 | user_args = self.argparser.parse_args(args=self.args[1:]) 133 | self.say_hello() 134 | 135 | if user_args.verbose: 136 | set_logging_level(logging.DEBUG) 137 | 138 | self.config = Config() 139 | self.config.set_workdir(user_args.target_dir) 140 | self.config.set_process_count(user_args.process_count) 141 | self.config.output = Output(type=user_args.outformat, path=user_args.outfile) 142 | 143 | if user_args.reflect_findings_in_return_code: 144 | self.config.return_code_if_findings = True 145 | 146 | EXCLUDE_PATHS_BUILTIN = get_path_inside_package('rules/excluded_paths.json') 147 | if user_args.excluded_paths is not None: 148 | rules = [rule.replace('built-in', EXCLUDE_PATHS_BUILTIN) for rule in user_args.excluded_paths] 149 | self.config.set_global_exclusion_paths(rules) 150 | 151 | self.config.engines = [] 152 | 153 | REGEX_BUILTIN_RULESET = get_path_inside_package('rules/regexes.json') 154 | if user_args.regex_rules is not None: 155 | rules = [rule.replace('built-in', REGEX_BUILTIN_RULESET) for rule in user_args.regex_rules] 156 | self.config.engines.append(RegexEngine) 157 | self.config.add_ruleset(RegexRulesetBuilder, rules) 158 | 159 | conf_semantic_analysis = user_args.semantic_analysis 160 | if conf_semantic_analysis is not None and conf_semantic_analysis != DISABLED: 161 | self.config.engines.append(SemanticEngine) 162 | 163 | conf_hashed_ruleset = user_args.hashed_values 164 | if conf_hashed_ruleset is not None and conf_hashed_ruleset != DISABLED: 165 | self.config.engines.append(RegexEngine) 166 | self.config.add_ruleset(HashedSecretsRulesetBuilder, conf_hashed_ruleset) 167 | 168 | conf_false_findings_ruleset = user_args.false_findings 169 | if conf_false_findings_ruleset is not None: 170 | self.config.add_ruleset(FalseFindingsBuilder, conf_false_findings_ruleset) 171 | 172 | def start(self) -> None: # pragma: nocover 173 | try: 174 | self.parse_arguments() 175 | except Exception as e: 176 | logger.exception(e) 177 | sys.exit(1) 178 | 179 | logger.info(f'Starting scan against {self.config.workdir_path} using {self.config.process_count} processes...') 180 | if self.config.return_code_if_findings is True: 181 | logger.info(f'[!] The tool will return code of {FINDINGS_DETECTED_RETURN_CODE} if any findings are detected\n') 182 | 183 | logger.info(80 * '=') 184 | findings: List[Finding] = CliScanMode(config=self.config).run() 185 | logger.info(80 * '=') 186 | logger.info('Scanning finished') 187 | logger.info(f'{len(findings)} potential secrets found') 188 | report_path = get_abspath(self.config.output.path) 189 | 190 | logger.info(f'Writing report to {report_path}') 191 | with open(report_path, 'w+') as f: 192 | json.dump(FindingResponse.from_list(findings), f) 193 | 194 | logger.info('Done') 195 | 196 | if len(findings) > 0 and self.config.return_code_if_findings: 197 | sys.exit(FINDINGS_DETECTED_RETURN_CODE) -------------------------------------------------------------------------------- /deepsecrets/config.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Type 2 | 3 | from pydantic import BaseModel 4 | from deepsecrets.core.utils.cpu import CpuHelper 5 | 6 | from deepsecrets.core.utils.exceptions import FileNotFoundException 7 | from deepsecrets.core.utils.fs import get_abspath, path_exists 8 | 9 | FALLBACK_PROCESS_COUNT = 4 10 | 11 | class Output(BaseModel): 12 | type: str 13 | path: str 14 | 15 | 16 | class Config: 17 | workdir_path: str 18 | engines: List[Type] = [] 19 | rulesets: Dict[Type, List[str]] = {} 20 | global_exclusion_paths: List[str] = [] 21 | output: Output 22 | process_count: int 23 | return_code_if_findings: bool 24 | 25 | def __init__(self) -> None: 26 | self.engines = [] 27 | self.rulesets = {} 28 | self.global_exclusion_paths = [] 29 | self.return_code_if_findings = False 30 | # equals to CPU count 31 | self.process_count = FALLBACK_PROCESS_COUNT 32 | 33 | def _set_path(self, path: str, field: str) -> None: 34 | if not path_exists(path): 35 | raise FileNotFoundException(f'{field} path does not exist ({path})') 36 | setattr(self, field, get_abspath(path)) 37 | 38 | def set_workdir(self, path: str) -> None: 39 | self._set_path(path, 'workdir_path') 40 | 41 | def set_process_count(self, count: int): 42 | if count > 0: 43 | self.process_count = count 44 | return 45 | 46 | count = CpuHelper().get_limit() 47 | if count > 0: 48 | self.process_count = count 49 | return 50 | 51 | self.process_count = FALLBACK_PROCESS_COUNT 52 | 53 | 54 | def set_global_exclusion_paths(self, paths: List[str]) -> None: 55 | for path in paths: 56 | if not path_exists(path): 57 | raise FileNotFoundException(f'global_exclusion_path does not exist ({path})') 58 | self.global_exclusion_paths.append(path) 59 | 60 | self.global_exclusion_paths = list(set(self.global_exclusion_paths)) 61 | 62 | def add_ruleset(self, type: Type, paths: List[str] = []) -> None: 63 | self._validate_paths(paths) 64 | self.rulesets[type] = [get_abspath(path) for path in paths] 65 | 66 | def _validate_paths(self, paths: List[str]) -> None: 67 | if paths is None: 68 | return 69 | 70 | for path in paths: 71 | if path_exists(path): 72 | continue 73 | raise FileNotFoundException(f'File {path} does not exist') 74 | 75 | return 76 | -------------------------------------------------------------------------------- /deepsecrets/core/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/engines/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/engines/hashed_secret.py: -------------------------------------------------------------------------------- 1 | from typing import List, Sequence 2 | 3 | from deepsecrets.core.engines.iengine import IEngine 4 | from deepsecrets.core.model.finding import Finding 5 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule 6 | from deepsecrets.core.model.token import Token 7 | 8 | 9 | class HashedSecretEngine(IEngine): 10 | name = 'hashed' 11 | description = 'Scans by regex patterns provided by HashedSecretRules' 12 | ruleset: Sequence[HashedSecretRule] 13 | 14 | def search(self, token: Token) -> List[Finding]: 15 | results = [] 16 | for rule in self.ruleset: 17 | if not self.is_rule_applicable(token=token, rule=rule): 18 | continue 19 | 20 | token.calculate_hashed_value(rule.algorithm) 21 | results.extend(self._check_rule(token, rule)) 22 | 23 | return results 24 | 25 | def is_rule_applicable(self, token: Token, rule: HashedSecretRule) -> bool: 26 | if rule.token_length != token.length: 27 | return False 28 | return super().is_rule_applicable(token=token, rule=rule) 29 | 30 | def _check_rule(self, token: Token, rule: HashedSecretRule) -> List[Finding]: 31 | findings: List[Finding] = [] 32 | 33 | if token.hashed_value != rule.hashed_val: 34 | return findings 35 | 36 | findings.append( 37 | Finding( 38 | rules=[rule], 39 | detection=token.content, 40 | start_pos=0, 41 | end_pos=token.length, 42 | file=None, # filled higher 43 | final_rule=None, # filled higher, 44 | full_line=None, # filled higher 45 | linum=None, # filled higher 46 | ) 47 | ) 48 | 49 | return findings 50 | -------------------------------------------------------------------------------- /deepsecrets/core/engines/iengine.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from typing import List 3 | 4 | from deepsecrets.core.model.finding import Finding 5 | from deepsecrets.core.model.rules.rule import Rule 6 | from deepsecrets.core.model.token import Token 7 | 8 | 9 | class IEngine: 10 | name: str 11 | ruleset: List[Rule] 12 | 13 | def __init__(self, ruleset: List = []) -> None: 14 | self.ruleset = ruleset 15 | 16 | @abstractmethod 17 | def search(self, token: Token) -> List[Finding]: 18 | pass 19 | 20 | def is_rule_applicable(self, token: Token, rule: Rule) -> bool: 21 | file_path = token.file.path 22 | if len(rule.applicable_file_patterns) == 0: 23 | return True 24 | 25 | for file_pattern in rule.applicable_file_patterns: 26 | matches = file_pattern.search(file_path) 27 | if matches is not None: 28 | return True 29 | 30 | return False 31 | 32 | def is_token_false_positive(self, token: Token) -> bool: 33 | for false_token in self.false_tokens: 34 | if len(false_token.match(token.content)) > 0: 35 | return True 36 | return False 37 | 38 | def __hash__(self) -> int: # pragma: nocover 39 | return hash(type(self)) 40 | 41 | def __repr__(self) -> str: # pragma: no cover 42 | return self.__class__.__name__ 43 | -------------------------------------------------------------------------------- /deepsecrets/core/engines/regex.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | from deepsecrets.core.engines.iengine import IEngine 4 | from deepsecrets.core.model.finding import Finding 5 | from deepsecrets.core.model.rules.regex import RegexRule 6 | from deepsecrets.core.model.token import Token 7 | 8 | 9 | class RegexEngine(IEngine): 10 | name = 'regex' 11 | description = 'Scans by regex patterns provided by RegexRules' 12 | 13 | def search(self, token: Token) -> List[Finding]: 14 | results = [] 15 | 16 | for rule in self.ruleset: 17 | if not self.is_rule_applicable(token=token, rule=rule): 18 | continue 19 | 20 | results.extend(self._check_rule(token, rule)) # type: ignore 21 | return results 22 | 23 | def _check_rule(self, token: Token, rule: RegexRule) -> List[Finding]: 24 | findings: List[Finding] = [] 25 | 26 | # rule.match returns an array of (start, end) spans 27 | detects: List[Tuple[int, int]] = rule.match(token) 28 | 29 | for start, end in detects: 30 | findings.append( 31 | Finding( 32 | rules=[rule], 33 | detection=token.content[start:end], 34 | start_pos=start, 35 | end_pos=end, 36 | ) 37 | ) 38 | 39 | return findings -------------------------------------------------------------------------------- /deepsecrets/core/engines/semantic.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import List 3 | 4 | from deepsecrets import logger 5 | from deepsecrets.core.engines.iengine import IEngine 6 | from deepsecrets.core.helpers.content_analyzer import ContentAnalyzer 7 | from deepsecrets.core.helpers.entropy import EntropyHelper 8 | from deepsecrets.core.model.finding import Finding 9 | from deepsecrets.core.model.rules.rule import Rule 10 | from deepsecrets.core.model.token import Token 11 | 12 | filenames_ignorelist = [ 13 | 'package-lock.json', 14 | 'package.json', 15 | ] 16 | 17 | false_starting_sequences = [ 18 | '${', 19 | 'true', 20 | '%env', 21 | ] 22 | 23 | useless_values = [ 24 | 'null', 25 | 'bearer', 26 | 'restore_password', 27 | ] 28 | 29 | var_name_showstoppers = [ 30 | 'public', 31 | 'path', 32 | 'location', 33 | 'field', 34 | 'data' 35 | ] 36 | 37 | 38 | class SemanticEngine(IEngine): 39 | name = 'semantic' 40 | entropy_threshold = 4.15 41 | dangerous_variable_regex = re.compile( 42 | r'(secret|passw|\bpass\b|\btoken\b|\baccess\b|\bpwd\b|rivateke|cesstoke|authkey|\bsecret\b|\bkey\b).{0,15}', 43 | re.IGNORECASE, 44 | ) 45 | useless_value_regex = re.compile(r'^[^A-Za-z0-9]*$|^%.*%$|^\[.*\]$|^{.*}$', re.IGNORECASE) 46 | subengine: IEngine 47 | 48 | def __init__(self, subengine: IEngine, **kwargs) -> None: 49 | super().__init__(**kwargs) 50 | self.subengine = subengine 51 | 52 | # token is a STRING with potential 'semantic' extension 53 | def search(self, token: Token) -> List[Finding]: 54 | findings: List[Finding] = [] 55 | 56 | if token.length == token.file.length: 57 | return findings 58 | 59 | for fname in filenames_ignorelist: 60 | if fname in token.file.path: 61 | return findings 62 | 63 | if token.semantic is not None and token.semantic.creds_probability == 9: 64 | findings.append( 65 | Finding( 66 | detection=token.content, 67 | start_pos=0, 68 | end_pos=len(token.content), 69 | rules=[Rule(id='S107', name='Dangerous condition', confidence=9)], 70 | ) 71 | ) 72 | 73 | try: 74 | dangerous_variable = self._if_dangerous_variable(token) 75 | 76 | if self.subengine is not None: # pragma: nocover 77 | content_findings = ContentAnalyzer(self.subengine).analyze(token) 78 | if content_findings is not None: 79 | findings.extend(content_findings) 80 | 81 | if not dangerous_variable: 82 | return findings 83 | 84 | if len(token.content) == 1: 85 | return findings 86 | 87 | if len(token.content.split(' ')) > 1: 88 | return findings 89 | 90 | if token.content in useless_values: 91 | return findings 92 | 93 | if len(re.findall(self.useless_value_regex, token.content)) > 0: 94 | return findings 95 | 96 | entropy = EntropyHelper.get_for_string(token.content) 97 | if self._is_high_entropy(entropy): 98 | findings.append( 99 | Finding( 100 | detection=token.content, 101 | start_pos=0, 102 | end_pos=len(token.content), 103 | rules=[Rule(id='S105', name='Entropy+Var naming', confidence=-1)], 104 | ) 105 | ) 106 | else: 107 | for fss in false_starting_sequences: 108 | if token.content.startswith(fss): 109 | return findings 110 | 111 | findings.append( 112 | Finding( 113 | detection=token.content, 114 | start_pos=0, 115 | end_pos=len(token.content), 116 | rules=[Rule(id='S106', name='Var naming', confidence=-1)], 117 | ) 118 | ) 119 | 120 | except Exception as e: 121 | logger.error('Problem during Entropy check on token') 122 | 123 | return findings 124 | 125 | def _is_high_entropy(self, entropy: float) -> bool: 126 | return True if entropy > self.entropy_threshold else False 127 | 128 | def _if_dangerous_variable(self, token: Token) -> bool: 129 | if token.semantic is None: 130 | return False 131 | 132 | if token.semantic.creds_probability == 9: 133 | return True 134 | 135 | cleaned_up_varname, name_parts = self.normalize_punctuation(token.semantic.name) 136 | badvar = self.dangerous_variable_regex.findall(cleaned_up_varname) 137 | if len(badvar) == 0: 138 | return False 139 | 140 | if any(part in var_name_showstoppers for part in name_parts): 141 | return False 142 | 143 | return True 144 | 145 | def normalize_punctuation(self, string: str): 146 | normalized = string.replace(' ', '_').replace('-', ' ').replace('_', ' ') 147 | parts = self.__camel_case_divide(normalized).split(' ') 148 | return normalized.lower(), parts 149 | 150 | def __camel_case_divide(self, string: str): 151 | final = '' 152 | for i, _ in enumerate(string): 153 | final += string[i].lower() 154 | if i == len(string) - 1: 155 | continue 156 | 157 | if string[i].islower() and string[i+1].isupper(): 158 | final += ' ' 159 | return final -------------------------------------------------------------------------------- /deepsecrets/core/helpers/content_analyzer.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from typing import Callable, List, Optional 3 | 4 | from deepsecrets.core.engines.iengine import IEngine 5 | from deepsecrets.core.model.finding import Finding 6 | from deepsecrets.core.model.token import Token 7 | 8 | 9 | class ContentAnalyzer: 10 | engine: IEngine 11 | flags: dict[str, bool] 12 | token: Token 13 | uncover_tactics: List[Callable] 14 | 15 | def __init__(self, engine: IEngine) -> None: 16 | self.engine = engine 17 | self.uncover_tactics = [self._check_by_base64] 18 | 19 | def analyze(self, token: Token) -> List[Finding]: 20 | self.token = token 21 | self.uncover() 22 | return self.engine.search(self.token) if self.engine is not None else [] 23 | 24 | def uncover(self) -> None: 25 | for tactic in self.uncover_tactics: 26 | uncovered_str = tactic() 27 | if uncovered_str is None: 28 | continue 29 | 30 | if len(uncovered_str) < 5: 31 | continue 32 | 33 | self.token.uncovered_content.append(uncovered_str) 34 | 35 | def _check_by_base64(self) -> Optional[str]: 36 | try: 37 | return base64.b64decode(self.token.content).decode('UTF-8') 38 | except Exception: 39 | return None 40 | -------------------------------------------------------------------------------- /deepsecrets/core/helpers/entropy.py: -------------------------------------------------------------------------------- 1 | import math 2 | import regex as re 3 | from enum import Enum 4 | from typing import Optional 5 | 6 | 7 | class IteratorType(Enum): 8 | BASE64 = 'base64' 9 | HEX = 'hex' 10 | 11 | 12 | class EntropyHelper: 13 | B64_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=' 14 | B64_REGEX = re.compile(r'[A-Za-z0123456789+/=]{20,}') 15 | 16 | HEX_CHARS = '1234567890abcdefABCDEF' 17 | HEX_REGEX = re.compile(r'[A-Fa-f0123456789]{20,}') 18 | 19 | @classmethod 20 | def get_for_string(cls, str: str, with_iterator: Optional[str] = None) -> float: 21 | iterator = None 22 | i_type = None 23 | if with_iterator is not None: 24 | i_type = IteratorType(with_iterator) 25 | 26 | if i_type == IteratorType.BASE64: 27 | iterator = cls.B64_CHARS 28 | 29 | if i_type == IteratorType.HEX: 30 | iterator = cls.HEX_CHARS 31 | 32 | return cls._shannon_entropy(str, iterator) 33 | 34 | @classmethod 35 | def _shannon_entropy(cls, data: str, iterator: Optional[str] = None) -> float: 36 | """ 37 | Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html 38 | """ 39 | if not data: 40 | return 0 41 | entropy = 0 42 | if iterator: 43 | for x in iterator: 44 | p_x = float(data.count(x)) / len(data) 45 | if p_x > 0: 46 | entropy += -p_x * math.log(p_x, 2) 47 | return entropy 48 | 49 | unique_base = set(data) 50 | M = len(data) 51 | entropy_list = [] 52 | # Number of residues in column 53 | for base in unique_base: 54 | n_i = data.count(base) # Number of residues of type i 55 | P_i = n_i / float(M) # n_i(Number of residues of type i) / M(Number of residues in column) 56 | entropy_i = P_i * (math.log(P_i, 2)) 57 | entropy_list.append(entropy_i) 58 | 59 | entropy = -(sum(entropy_list)) 60 | 61 | return entropy 62 | -------------------------------------------------------------------------------- /deepsecrets/core/model/__init__.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.finding import Finding 2 | 3 | Finding.update_forward_refs() 4 | -------------------------------------------------------------------------------- /deepsecrets/core/model/file.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import Dict, Optional, Tuple 3 | 4 | from deepsecrets import logger 5 | from deepsecrets.core.utils.fs import get_abspath 6 | 7 | 8 | class File: 9 | relative_path: str 10 | path: str 11 | content: str = '' 12 | length: int 13 | line_offsets: Dict[int, Tuple[int, int]] = {} 14 | line_contents_cache: Dict[int, str] = {} 15 | empty: bool 16 | name: str 17 | extension: Optional[str] 18 | 19 | def __init__( 20 | self, 21 | path: str, 22 | relative_path: Optional[str] = None, 23 | content: Optional[str] = None, 24 | offsets: Optional[Dict] = None, 25 | ) -> None: 26 | self.line_offsets = {} 27 | self.line_contents_cache = {} 28 | 29 | if path is not None: 30 | self.path = get_abspath(path) 31 | 32 | self.relative_path = relative_path if relative_path is not None else self.path 33 | 34 | if content is not None: 35 | self.content = content 36 | else: 37 | try: 38 | self.content = self._get_contents() 39 | except Exception as e: 40 | logger.error('Error during fetching file contents') 41 | 42 | self.length = len(self.content) 43 | 44 | self.name = self._get_name() 45 | self.extension = self._get_extension() 46 | self.empty = True if self.length == 0 else False 47 | 48 | if offsets is not None: 49 | self.line_offsets = offsets 50 | 51 | if not self.empty and len(self.line_offsets) == 0: 52 | self._calc_offsets() 53 | 54 | def _get_name(self) -> str: 55 | by_slash = self.path.split('/') 56 | return by_slash[-1].split('.')[0] 57 | 58 | 59 | def _get_extension(self) -> Optional[str]: 60 | by_dot = self.path.split('.') 61 | if len(by_dot) == 1: 62 | return None 63 | 64 | return by_dot[-1] 65 | 66 | def _calc_offsets(self) -> None: 67 | line_breaks = [i.start() for i in re.finditer('\n', self.content)] 68 | for i, lb in enumerate(line_breaks): 69 | start = line_breaks[i - 1] + 1 if i > 0 else 0 70 | self.line_offsets[i + 1] = (start, lb) 71 | 72 | if len(self.line_offsets) == 0 and self.length > 0: 73 | self.line_offsets[1] = (0, self.length) 74 | 75 | def _get_contents(self) -> str: 76 | with open(self.path) as f: 77 | raw = f.read() 78 | if raw[-1] != '\n': 79 | raw += '\n' 80 | return raw 81 | 82 | def get_line_number(self, position: int) -> Optional[int]: 83 | return self._get_line_number_for_position(position=position) 84 | 85 | def _get_line_number_for_position(self, position: int) -> Optional[int]: 86 | for linum, offsets in self.line_offsets.items(): 87 | if offsets[1] < position: 88 | continue 89 | return linum 90 | return None 91 | 92 | def get_line_contents(self, line_number: int) -> Optional[str]: 93 | if line_number is None: 94 | return 95 | 96 | if line_number not in self.line_contents_cache: 97 | self.line_contents_cache[line_number] = self.content[ 98 | self.line_offsets[line_number][0]:self.line_offsets[line_number][1] 99 | ] 100 | return self.line_contents_cache[line_number] 101 | 102 | def get_full_line_for_position(self, span_end: int) -> Optional[str]: 103 | linum = self._get_line_number_for_position(span_end) 104 | if linum is None: 105 | return None 106 | 107 | return self.get_line_contents(linum) 108 | 109 | def get_span_for_string(self, str: str, between: Optional[Tuple[int, int]] = None) -> Optional[Tuple[int, int]]: 110 | if between is None: 111 | between = (0, self.length) 112 | 113 | search_window = self.content[between[0] : between[1]] 114 | 115 | pattern = re.escape(str) 116 | pattern = pattern.replace('\\\n', '\n').replace('\\\t', '\t') 117 | detects = re.finditer(pattern, search_window) 118 | for detect in detects: 119 | span = detect.span() 120 | return (between[0] + span[0], between[0] + span[1]) 121 | return None 122 | 123 | def __repr__(self) -> str: # pragma: no cover 124 | return self.path 125 | -------------------------------------------------------------------------------- /deepsecrets/core/model/finding.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from hashlib import sha256 4 | from typing import Any, Dict, List, Optional 5 | 6 | from pydantic import BaseModel, Field, PrivateAttr 7 | 8 | from deepsecrets.core.model.file import File 9 | from deepsecrets.core.model.rules.rule import Rule 10 | 11 | 12 | class Finding(BaseModel): 13 | file: Optional['File'] = Field(default=None) 14 | rules: List[Rule] = Field(default=[]) 15 | detection: str 16 | full_line: Optional[str] = Field(default=None) 17 | linum: Optional[int] = Field(default=None) 18 | start_pos: int 19 | end_pos: int 20 | reason: str = Field(default='') 21 | final_rule: Optional[Rule] = Field(default=None) 22 | _mapped_on_file: bool = PrivateAttr(default=False) 23 | 24 | def map_on_file(self, relative_start: int, file: Optional['File'] = None) -> None: 25 | if self._mapped_on_file: 26 | return 27 | 28 | if file is None and self.file is None: 29 | raise Exception('No file to match on') 30 | if self.file is None: 31 | self.file = file 32 | 33 | self.start_pos += relative_start 34 | self.end_pos += relative_start 35 | self.linum = self.file.get_line_number(self.end_pos) 36 | if not self.full_line: 37 | self.full_line = self.file.get_line_contents(self.linum) 38 | self._mapped_on_file = True 39 | 40 | def get_reason(self) -> str: 41 | if self.final_rule is None: 42 | self.choose_final_rule() 43 | 44 | return f'{self.final_rule.name} | {self.get_fingerprint()}' # type: ignore 45 | 46 | def get_fingerprint(self) -> str: 47 | return sha256(self.detection.encode('utf-8')).hexdigest()[23:33] 48 | 49 | class Config: 50 | arbitrary_types_allowed = True 51 | 52 | def choose_final_rule(self) -> None: 53 | self.final_rule = sorted( 54 | self.rules, key=lambda r: r.confidence, 55 | reverse=True 56 | )[0] 57 | 58 | def __hash__(self) -> int: # pragma: nocover 59 | if not self.file: 60 | raise Exception() 61 | 62 | return hash(f'{self.file.path}{self.detection}{self.start_pos}{self.end_pos}') 63 | 64 | def __eq__(self, other: Any) -> bool: 65 | if not isinstance(other, Finding): 66 | return False 67 | 68 | if other.file and self.file: 69 | if other.file.path != self.file.path: 70 | return False 71 | 72 | if other.detection and self.detection: 73 | if other.detection != self.detection: 74 | return False 75 | 76 | if other.start_pos and self.start_pos: 77 | if other.start_pos != self.start_pos: 78 | return False 79 | 80 | if other.end_pos and self.end_pos: 81 | if other.end_pos != self.end_pos: 82 | return False 83 | 84 | return True 85 | 86 | def merge(self, other: Any) -> bool: 87 | if not isinstance(other, Finding): 88 | return False 89 | 90 | if other != self: 91 | return False 92 | 93 | self.rules.extend(other.rules) 94 | self.rules = list(set(self.rules)) 95 | 96 | return True 97 | 98 | 99 | class FindingMerger: 100 | all: List[Finding] 101 | 102 | def __init__(self, full_list: List[Finding]) -> None: 103 | self.all = full_list 104 | 105 | def merge(self) -> List[Finding]: 106 | interm_dict: Dict[int, Finding] = {} 107 | 108 | for elem in self.all: 109 | hash = elem.__hash__() 110 | if hash not in interm_dict: 111 | interm_dict[hash] = elem 112 | 113 | interm_dict[hash].merge(elem) 114 | 115 | return list(interm_dict.values()) 116 | 117 | 118 | class FindingResponse: 119 | @classmethod 120 | def from_list(cls, list: List[Finding]) -> Dict[str, List[Dict]]: 121 | resp: Dict[str, List[Dict]] = {} 122 | for finding in list: 123 | if finding.file is None: 124 | continue 125 | 126 | if finding.file.path not in resp: 127 | resp[finding.file.path] = [] 128 | 129 | resp[finding.file.path].append(FindingApiModel.from_finding(finding).dict()) 130 | return resp 131 | 132 | 133 | class FindingApiModel(BaseModel): 134 | line: str 135 | string: str 136 | line_number: int 137 | rule: str 138 | reason: str 139 | confidence: int 140 | fingerprint: str 141 | 142 | @classmethod 143 | def from_finding(cls, finding: Finding) -> FindingApiModel: 144 | finding.choose_final_rule() 145 | return FindingApiModel( 146 | line=finding.full_line, 147 | string=finding.detection, 148 | line_number=finding.linum, 149 | rule=finding.final_rule.id, 150 | reason=finding.get_reason(), 151 | confidence=finding.final_rule.confidence, 152 | fingerprint=finding.get_fingerprint(), 153 | ) 154 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/model/rules/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/exlcuded_path.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from pydantic import BaseModel 3 | from deepsecrets.core.model.rules.regex import RegexRuleWithoutId 4 | 5 | 6 | class ExcludePathRule(RegexRuleWithoutId): 7 | disabled: bool = False 8 | 9 | 10 | class ExcludePatternsList(BaseModel): 11 | __root__: List[ExcludePathRule] 12 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/false_finding.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.rules.regex import RegexRuleWithoutId 2 | 3 | 4 | class FalseFindingRule(RegexRuleWithoutId): 5 | pass 6 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/hashed_secret.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import mmh3 4 | from pydantic import root_validator 5 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm 6 | 7 | from deepsecrets.core.model.rules.rule import Rule 8 | 9 | 10 | class HashedSecretRule(Rule): 11 | hashed_val: str 12 | token_length: int 13 | algorithm: HashingAlgorithm 14 | 15 | def __eq__(self, other: Any) -> bool: 16 | if not isinstance(other, HashedSecretRule): 17 | return False 18 | 19 | if other.hashed_val == self.hashed_val: 20 | return True 21 | 22 | if other.id == self.id: 23 | return True 24 | 25 | return False 26 | 27 | def __hash__(self) -> int: # pragma: nocover 28 | return hash(self.hashed_val) 29 | 30 | @root_validator(pre=True) 31 | def fill_id(cls, values: Dict) -> Dict: 32 | hashed_val = values.get('hashed_val', None) 33 | if hashed_val is None: 34 | return values 35 | 36 | algorithm = values.get('algorithm', None) 37 | 38 | if algorithm is None: 39 | values['algorithm'] = HashingAlgorithm.SHA_512 40 | else: 41 | try: 42 | values['algorithm'] = HashingAlgorithm(algorithm) 43 | except: 44 | raise Exception(f'Unsupported hashing algorithm: {algorithm}') 45 | 46 | 47 | if values.get('id', None) is None: 48 | int_hash = abs(mmh3.hash(hashed_val)) 49 | first_3 = str(int_hash)[:3] 50 | last_2 = str(int_hash)[-2:] 51 | values['id'] = f'S{first_3}{last_2}' 52 | 53 | return values 54 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/hashing.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class HashingAlgorithm(Enum): 5 | SHA_1 = 'sha1' 6 | SHA_256 = 'sha256' 7 | SHA_512 = 'sha512' 8 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/regex.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import Dict, ForwardRef, List, Optional, Union 3 | 4 | from pydantic import Field, root_validator 5 | 6 | from deepsecrets.core.helpers.entropy import EntropyHelper 7 | from deepsecrets.core.model.rules.rule import Rule 8 | from deepsecrets.core.model.token import Token 9 | 10 | RegexRule = ForwardRef('RegexRule') 11 | 12 | 13 | class RegexRule(Rule): # type: ignore 14 | pattern: re.Pattern 15 | match_rules: Optional[Dict[int, RegexRule]] = Field(default={}) # type: ignore 16 | target_group: int = Field(default=0) 17 | entropy_settings: Optional[float] = Field(default=None) 18 | escaping_needed: bool = False 19 | 20 | class Config: 21 | arbitrary_types_allowed = True 22 | json_encoders = { 23 | re.Pattern: lambda v: v.pattern, 24 | } 25 | 26 | @root_validator(pre=True) 27 | def build_pattern(cls, values: Dict) -> Dict: 28 | pattern_str = values.get('pattern', None) 29 | if pattern_str is not None and isinstance(pattern_str, str): 30 | escaping_needed = values.get('escaping_needed', False) 31 | if escaping_needed: 32 | pattern_str = re.escape(pattern_str) 33 | 34 | values['pattern'] = re.compile(pattern_str, re.IGNORECASE) 35 | 36 | match_rules = values.get('match_rules', {}) 37 | for _, match_rule in match_rules.items(): 38 | match_rule['id'] = '' 39 | match_rule['confidence'] = 9 40 | 41 | return values 42 | 43 | def __hash__(self) -> int: # pragma: nocover 44 | return hash(self.id) 45 | 46 | def match(self, token: Union[Token, str]) -> List[re.Match]: 47 | good_matches = [] 48 | contents = [] 49 | contents.append(token.content if isinstance(token, Token) else token) 50 | contents.extend(token.uncovered_content if isinstance(token, Token) else []) 51 | 52 | for i, content in enumerate(contents): 53 | matches = re.finditer(self.pattern, content) 54 | 55 | for match in matches: 56 | if not self._verify(match): 57 | continue 58 | 59 | good_matches.append(match.span(self.target_group) if i == 0 else (0, len(contents[0]))) 60 | 61 | return good_matches 62 | 63 | def _verify(self, match: re.Match) -> bool: 64 | match_ok = True 65 | entropy_ok = True 66 | 67 | if self.match_rules is not None: 68 | for group_i, match_rule in self.match_rules.items(): 69 | span = match.span(group_i) 70 | window = match.string[span[0] : span[1]] 71 | if not match_rule.match(window): # type: ignore 72 | match_ok = False 73 | return False 74 | 75 | if self.entropy_settings is not None: 76 | span = match.span(self.target_group) 77 | str_to_check = match.string[span[0] : span[1]] 78 | ent = EntropyHelper.get_for_string(str_to_check) 79 | if ent < self.entropy_settings: 80 | entropy_ok = False 81 | 82 | return match_ok and entropy_ok 83 | 84 | 85 | RegexRule.update_forward_refs() # type: ignore 86 | 87 | 88 | class RegexRuleWithoutId(RegexRule): 89 | id: Optional[str] = Field(default=None) 90 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/rule.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field, root_validator 5 | 6 | 7 | class Rule(BaseModel): 8 | id: str 9 | name: Optional[str] 10 | confidence: int = Field(default=9) 11 | applicable_file_patterns: List[re.Pattern] = Field(default=[]) 12 | 13 | @root_validator(pre=True) 14 | def fill_confidence(cls, values: Dict) -> Dict: 15 | file_patterns = values.get('applicable_file_patterns', []) 16 | if len(file_patterns) > 0: 17 | pattеrns = [re.compile(p) for p in file_patterns] 18 | values['applicable_file_patterns'] = pattеrns 19 | 20 | if values.get('confidence', None) is None and values.get('id') is not None: 21 | values['confidence'] = 9 22 | 23 | return values 24 | 25 | def __hash__(self) -> int: # pragma: nocover 26 | return hash(self.id) 27 | 28 | class Config: 29 | arbitrary_types_allowed = True 30 | -------------------------------------------------------------------------------- /deepsecrets/core/model/rules/semantic.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.rules.rule import Rule 2 | 3 | 4 | class SemanticRule(Rule): 5 | pass 6 | -------------------------------------------------------------------------------- /deepsecrets/core/model/semantic.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from deepsecrets.core.model.token import Token 3 | 4 | 5 | class Variable: 6 | name: Token 7 | value: Token 8 | span: List[int] 9 | found_by: 'VaribleDetector' 10 | 11 | 12 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.detector import ( 13 | VaribleDetector, 14 | ) 15 | -------------------------------------------------------------------------------- /deepsecrets/core/model/token.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from enum import Enum 4 | from typing import List, Optional, Type 5 | 6 | from deepsecrets.core.model.file import File 7 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm 8 | from deepsecrets.core.utils.hashing import get_hash 9 | 10 | 11 | class SemanticType(Enum): 12 | VAR = 1 13 | 14 | 15 | class Semantic: 16 | type: SemanticType 17 | name: str 18 | creds_probability: int 19 | 20 | def __init__(self, type: SemanticType, name: str, creds_probability: int = 0) -> None: 21 | self.type = type 22 | self.name = name 23 | self.creds_probability = creds_probability 24 | 25 | 26 | class Token: 27 | content: str 28 | uncovered_content: List[str] 29 | span: List[int] 30 | file: 'File' 31 | type: List[Type] 32 | length: int 33 | hashed_value: Optional[str] 34 | semantic: Optional[Semantic] 35 | previous: Optional['Token'] 36 | next: Optional['Token'] 37 | 38 | def __init__(self, file: File, content: Optional[str] = None, span: Optional[List[int]] = None) -> None: 39 | self.file = file 40 | self.content = content 41 | self.span = span 42 | self.length = len(content) if self.content else 0 43 | self.hashed_value = None 44 | self.previous = None 45 | self.next = None 46 | self.type: List[Type] = [] # type: ignore 47 | self.semantic = None 48 | self.uncovered_content = [] 49 | 50 | def set_type(self, type: List[Type]) -> None: 51 | self.type = type # type: ignore 52 | 53 | def val_hash(self) -> int: 54 | return hash(self.content) 55 | 56 | def calculate_hashed_value(self, algorithm: HashingAlgorithm) -> None: 57 | if self.hashed_value: 58 | return 59 | 60 | self.hashed_value = get_hash(payload=self.content, algorithm=algorithm) 61 | 62 | def __repr__(self) -> str: # pragma: no cover 63 | if self.semantic is None and self.type is not None: 64 | return f'{self.content} | {self.type[0]}\n' 65 | 66 | out = f'======== VAR: {self.semantic.name} = {self.content}' # type: ignore 67 | if self.type is not None: 68 | out += f' | {self.type[0]}\n' 69 | 70 | return out 71 | -------------------------------------------------------------------------------- /deepsecrets/core/modes/iscan_mode.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from multiprocessing import get_context 3 | import os 4 | from abc import abstractmethod, abstractstaticmethod 5 | from datetime import datetime 6 | from functools import partial 7 | from multiprocessing.pool import Pool 8 | from typing import Any, Callable, List, Optional, Type 9 | import regex as re 10 | 11 | from dotwiz import DotWiz 12 | 13 | from deepsecrets import PLATFORM, PROFILER_ON, logger 14 | from deepsecrets.config import Config 15 | from deepsecrets.core.model.finding import Finding, FindingMerger 16 | from deepsecrets.core.model.rules.exlcuded_path import ExcludePathRule 17 | from deepsecrets.core.rulesets.excluded_paths import ExcludedPathsBuilder 18 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder 19 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer 20 | from deepsecrets.core.utils.fs import get_abspath 21 | 22 | 23 | class ScanMode: 24 | config: Config 25 | filepaths: List[str] 26 | path_exclusion_rules: List[ExcludePathRule] = [] 27 | file_analyzer: FileAnalyzer 28 | pool_engine: Type 29 | 30 | def __init__(self, config: Config, pool_engine: Optional[Any] = None) -> None: 31 | if pool_engine is None: 32 | if PLATFORM == 'Darwin': 33 | self.pool_engine = get_context('fork').Pool 34 | else: 35 | self.pool_engine = Pool 36 | else: 37 | self.pool_engine = pool_engine 38 | 39 | self.config = config 40 | self.filepaths = self._get_files_list() 41 | self.prepare_for_scan() 42 | 43 | def _get_process_count_for_runner(self) -> int: 44 | limit = self.config.process_count 45 | 46 | file_count = len(self.filepaths) 47 | if file_count == 0: 48 | return 0 49 | return limit if file_count >= limit else file_count 50 | 51 | def run(self) -> List[Finding]: 52 | final: List[Finding] = [] 53 | 54 | bundle = self.analyzer_bundle() 55 | proc_count = self._get_process_count_for_runner() 56 | if proc_count == 0: 57 | return final 58 | 59 | if PROFILER_ON: 60 | for file in self.filepaths: 61 | final.extend(self._per_file_analyzer(file=file, bundle=bundle)) 62 | else: 63 | with self.pool_engine(processes=proc_count) as pool: 64 | per_file_findings: List[List[Finding]] = pool.map( 65 | partial(pool_wrapper, bundle, self._per_file_analyzer), 66 | self.filepaths, 67 | ) # type: ignore 68 | 69 | for file_findings in list(per_file_findings): 70 | if not file_findings: 71 | continue 72 | final.extend(file_findings) 73 | 74 | fin = FindingMerger(final).merge() 75 | fin = self.filter_false_positives(fin) 76 | return fin 77 | 78 | def _get_files_list(self) -> List[str]: 79 | flist = [] 80 | if not self.path_exclusion_rules: 81 | excl_paths_builder = ExcludedPathsBuilder() 82 | for path in self.config.global_exclusion_paths: 83 | excl_paths_builder.with_rules_from_file(path) 84 | 85 | self.path_exclusion_rules = excl_paths_builder.rules 86 | 87 | for fpath, _, files in os.walk(get_abspath(self.config.workdir_path)): 88 | for filename in files: 89 | full_path = os.path.join(fpath, filename) 90 | rel_path = full_path.replace(f'{self.config.workdir_path}/', '') 91 | if not self._path_included(rel_path): 92 | continue 93 | 94 | flist.append(full_path) 95 | 96 | return flist 97 | 98 | def _path_included(self, path: str) -> bool: 99 | if self.path_exclusion_rules is None or len(self.path_exclusion_rules) == 0: 100 | return True 101 | 102 | if any(excl_rule.match(path) for excl_rule in self.path_exclusion_rules): 103 | return False 104 | return True 105 | 106 | @abstractmethod 107 | def prepare_for_scan(self) -> None: 108 | pass 109 | 110 | def analyzer_bundle(self) -> DotWiz: 111 | return DotWiz( 112 | workdir=self.config.workdir_path, 113 | path_exclusion_rules=self.path_exclusion_rules, 114 | engines={} 115 | ) 116 | 117 | @abstractstaticmethod 118 | def _per_file_analyzer(bundle, file: Any) -> List[Finding]: # type: ignore 119 | pass 120 | 121 | def filter_false_positives(self, results: List[Finding]) -> List[Finding]: 122 | false_finding_rules = self.rulesets.get(FalseFindingsBuilder.ruleset_name) 123 | if false_finding_rules is None: 124 | return results 125 | 126 | 127 | final: List[Finding] = [] 128 | for result in results: 129 | good_result = True 130 | for false_pattern in false_finding_rules: 131 | if re.match(false_pattern.pattern, result.detection) is not None: 132 | good_result = False 133 | break 134 | if not good_result: 135 | continue 136 | 137 | final.append(result) 138 | 139 | return final 140 | 141 | 142 | def pool_wrapper(bundle: DotWiz, runner: Callable, file: str) -> List[Finding]: # pragma: nocover 143 | start_ts = datetime.now() 144 | result = runner(bundle, file) 145 | if logger.level == logging.DEBUG: 146 | logger.debug( 147 | f' ✓ [{file}] {(datetime.now() - start_ts).total_seconds()}s elapsed \t {len(result)} potential findings' 148 | ) 149 | else: 150 | logger.info(f' ✓ [{file}] \t {len(result)} potential findings') 151 | return result 152 | -------------------------------------------------------------------------------- /deepsecrets/core/rulesets/excluded_paths.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.rules.exlcuded_path import ExcludePathRule 2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder 3 | 4 | 5 | class ExcludedPathsBuilder(IRulesetBuilder): 6 | rule_model = ExcludePathRule 7 | ruleset_name = 'excluded_paths' 8 | -------------------------------------------------------------------------------- /deepsecrets/core/rulesets/false_findings.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.rules.false_finding import FalseFindingRule 2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder 3 | 4 | 5 | class FalseFindingsBuilder(IRulesetBuilder): 6 | rule_model = FalseFindingRule 7 | ruleset_name = 'false_findings' 8 | -------------------------------------------------------------------------------- /deepsecrets/core/rulesets/hashed_secrets.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tarfile 3 | from os.path import exists 4 | 5 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule 6 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder 7 | 8 | 9 | class HashedSecretsRulesetBuilder(IRulesetBuilder): 10 | rule_model = HashedSecretRule 11 | ruleset_name = 'hashed' 12 | 13 | def with_rules_from_file(self, file: str, compressed: bool = False) -> object: 14 | rules_raw = None 15 | true_file = file 16 | if compressed: 17 | if not exists(file): 18 | return 19 | 20 | with tarfile.open(file, 'r:gz') as tar: 21 | true_file = tar.extractfile('secrets').read() 22 | 23 | with open(true_file) as sec: 24 | rules_raw = json.load(sec) 25 | 26 | rules_set = set() 27 | for secret in rules_raw: 28 | path = secret.get('path') 29 | if path is not None: 30 | path = '/'.join(path.split('/')[1:3]) 31 | if 'non-prod' in path: 32 | continue 33 | 34 | rules_set.add( 35 | HashedSecretRule( 36 | id=None, # calculated inside the constructor # type: ignore 37 | name=f'{path}:{secret["name"]}', 38 | hashed_val=secret['hash'], 39 | algorithm=secret['algorithm'], 40 | token_length=secret['length'], 41 | confidence=9, 42 | ) 43 | ) 44 | 45 | self.rules = list(rules_set) 46 | return self 47 | -------------------------------------------------------------------------------- /deepsecrets/core/rulesets/ibuilder.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Type 3 | 4 | from deepsecrets.core.model.rules.rule import Rule 5 | 6 | 7 | class IRulesetBuilder: 8 | rules: List[Rule] 9 | rule_model: Type 10 | ruleset_name = 'rules' 11 | 12 | def __init__(self) -> None: 13 | self.rules = [] 14 | 15 | def with_rules_from_file(self, file: str) -> object: 16 | rules_raw = None 17 | with open(file) as f: 18 | rules_raw = json.load(f) 19 | 20 | self.rules.extend([self.rule_model(**rule) for rule in rules_raw]) 21 | return self 22 | 23 | @property 24 | def high_confidence_rules(self) -> List[Rule]: 25 | return [rule for rule in self.rules if rule.confidence == 9] 26 | -------------------------------------------------------------------------------- /deepsecrets/core/rulesets/regex.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.model.rules.regex import RegexRule 2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder 3 | 4 | 5 | class RegexRulesetBuilder(IRulesetBuilder): 6 | rule_model = RegexRule 7 | ruleset_name = 'regex' 8 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 2 | from deepsecrets.core.tokenizers.per_line import PerLineTokenizer 3 | 4 | fallback_ladder = {LexerTokenizer: PerLineTokenizer} 5 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/full_content.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer 4 | from deepsecrets.core.model.file import File 5 | from deepsecrets.core.model.token import Token 6 | 7 | 8 | class FullContentTokenizer(Tokenizer): 9 | def tokenize(self, file: File) -> List[Token]: 10 | return [Token(file=file, content=file.content, span=[0, file.length])] 11 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/semantic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/semantic/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/semantic/language.py: -------------------------------------------------------------------------------- 1 | from aenum import MultiValueEnum 2 | 3 | 4 | class Language(MultiValueEnum): 5 | PYTHON = 'py' 6 | GOLANG = 'go' 7 | PHP = 'php' 8 | JS = 'js','jsx' 9 | TOML = 'toml' 10 | JSON = 'json' 11 | YAML = 'yaml' 12 | INI = 'ini' 13 | PUPPET = 'pp' 14 | SHELL = 'sh' 15 | CSHARP = 'cs' 16 | JAVA = 'java' 17 | KOTLIN = 'kt' 18 | SWIFT = 'swift' 19 | 20 | ANY = 'any' 21 | UNKNOWN = 'unknown' 22 | 23 | @classmethod 24 | def from_text(cls, text: str) -> object: 25 | ext = text.split('.')[-1] 26 | return cls(ext) 27 | 28 | @classmethod 29 | def _missing_(cls, value: str) -> object: 30 | return Language.UNKNOWN 31 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/semantic/var_detection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/semantic/var_detection/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/semantic/var_detection/detector.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import Any, Dict, List, Optional 3 | 4 | from pydantic import BaseModel, Field, validator 5 | 6 | from deepsecrets.core.model.token import Token 7 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language 8 | 9 | 10 | class Match(BaseModel): 11 | types: List[Any] = Field(default_factory=list) 12 | values: List[re.Pattern] = Field(default_factory=list) 13 | not_values: List[re.Pattern] = Field(default_factory=list) 14 | 15 | def check(self, tokens: List[Token]) -> bool: 16 | 17 | types_match = self._check_types(tokens) 18 | values_match = self._check_values(tokens) 19 | not_values_match = self._check_not_values(tokens) 20 | 21 | if not types_match: 22 | return False 23 | 24 | if not values_match: 25 | return False 26 | 27 | if not_values_match: 28 | return False 29 | 30 | return True 31 | 32 | 33 | 34 | def _check_types(self, tokens): 35 | if len(self.types) == 0: 36 | return True # should match any type 37 | 38 | for token in tokens: 39 | if token.type[0] in self.types: 40 | return True 41 | return False 42 | 43 | 44 | def _check_values(self, tokens): 45 | if len(self.values) == 0: 46 | return True # should match any value 47 | 48 | for token in tokens: 49 | for pattern in self.values: 50 | if re.match(pattern, token.content) is not None: 51 | return True 52 | return False 53 | 54 | 55 | def _check_not_values(self, tokens): 56 | if len(self.not_values) == 0: 57 | return False 58 | 59 | for token in tokens: 60 | for pattern in self.not_values: 61 | if re.match(pattern, token.content) is not None: 62 | return True 63 | return False 64 | 65 | 66 | 67 | 68 | 69 | 70 | @validator('values', 'not_values', pre=True) 71 | def regexify_values(cls, values: Dict) -> List[re.Pattern]: 72 | if values is None: 73 | return values 74 | 75 | if not isinstance(values, list): 76 | raise Exception('value must be an array') 77 | 78 | patterns = [] 79 | for val in values: 80 | if isinstance(val, re.Pattern): 81 | patterns.append(val) 82 | continue 83 | 84 | patterns.append(re.compile(re.escape(val), re.IGNORECASE)) 85 | 86 | return patterns 87 | 88 | class Config: 89 | arbitrary_types_allowed = True 90 | 91 | 92 | class VaribleDetector(BaseModel): 93 | language: Optional[Language] = None 94 | stream_pattern: re.Pattern 95 | match_rules: Dict[int, Match] 96 | match_semantics: Dict[int, str] 97 | creds_probability: int = 0 98 | 99 | class Config: 100 | arbitrary_types_allowed = True 101 | 102 | def match(self, tokens: List[Token], token_stream: str) -> List['Variable']: 103 | true_detections = [] 104 | 105 | for match in re.finditer(self.stream_pattern, token_stream, overlapped=True): 106 | if not self._verify(match, tokens): 107 | continue 108 | 109 | var = Variable() 110 | for i, name in self.match_semantics.items(): 111 | setattr(var, name, tokens[match.span(i)[0]]) 112 | var.found_by = self 113 | var.span = [match.span(0)[0], match.span(0)[1]] 114 | 115 | true_detections.append(var) 116 | 117 | return true_detections 118 | 119 | def _verify(self, match: re.Match, tokens: List[Token]) -> bool: 120 | for group_i, match_rule in self.match_rules.items(): 121 | span = match.span(group_i) 122 | window = tokens[span[0] : span[1]] 123 | 124 | if not match_rule.check(window): 125 | return False 126 | 127 | return True 128 | 129 | 130 | class VaribleSuppressor(VaribleDetector): 131 | 132 | def match(self, tokens: List[Token], token_stream: str) -> List['Variable']: 133 | detections = super().match(tokens, token_stream) 134 | spans = [] 135 | for detection in detections: 136 | spans.append(detection.span) 137 | 138 | return spans 139 | 140 | 141 | 142 | from deepsecrets.core.model.semantic import Variable 143 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/semantic/var_detection/rules.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import List 3 | 4 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language 5 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.detector import Match, VaribleDetector, VaribleSuppressor 6 | from pygments.token import Token as PygmentsToken 7 | 8 | 9 | class VariableDetectionRules: 10 | rules = [ 11 | VaribleDetector( 12 | language=Language.PYTHON, 13 | stream_pattern=re.compile('(n)(o|p)(?:\n?)(L)(?:\n|p|\?)'), # noqa 14 | match_rules={2: Match(values=[ 15 | re.compile('^=$'), 16 | re.compile('^:$') 17 | ])}, 18 | match_semantics={1: 'name', 3: 'value'}, 19 | ), 20 | VaribleDetector( 21 | language=Language.PYTHON, 22 | stream_pattern=re.compile('(L)(p)(L)(?:p|\n)'), 23 | match_rules={2: Match(values=[':'])}, 24 | match_semantics={1: 'name', 3: 'value'}, 25 | ), 26 | VaribleDetector( 27 | language=Language.PYTHON, 28 | stream_pattern=re.compile('(L)(p)(o)(L)'), 29 | match_rules={2: Match(values=[']']), 3: Match(values=['='])}, 30 | match_semantics={1: 'name', 4: 'value'}, 31 | ), 32 | # GOLANG 33 | VaribleDetector( 34 | language=Language.GOLANG, 35 | stream_pattern=re.compile('(n)(p)(L)(?:p|\n)?'), 36 | match_rules={2: Match(values=[':', '='])}, 37 | match_semantics={1: 'name', 3: 'value'}, 38 | ), 39 | VaribleDetector( 40 | language=Language.GOLANG, 41 | stream_pattern=re.compile('(n)(p)(L)(?:p|\n)?(L)(p)'), 42 | match_rules={ 43 | 1: Match(values=['Setenv', 'Getenv']), 44 | 2: Match(values=['(']), 45 | 5: Match(values=[')']), 46 | }, 47 | match_semantics={3: 'name', 4: 'value'}, 48 | ), 49 | 50 | VaribleDetector( 51 | language=Language.GOLANG, 52 | stream_pattern=re.compile('(n)(?:p|n|u){0,3}?(o).*(n)(p)(L)'), 53 | match_rules={ 54 | 2: Match(values=[':=']), 55 | 3: Match(not_values=['Getenv', 'Setenv', 'Format']), 56 | }, 57 | match_semantics={1: 'name', 5: 'value'}, 58 | ), 59 | 60 | VaribleDetector( 61 | language=Language.GOLANG, 62 | stream_pattern=re.compile('(n)(?:o|p){1,3}(\?|u)p(L)p'), # noqa 63 | match_rules={2: Match(values=['byte', 'string'])}, 64 | match_semantics={1: 'name', 3: 'value'}, 65 | ), 66 | # PHP 67 | VaribleDetector( 68 | language=Language.PHP, 69 | stream_pattern=re.compile('(n|v|L)(o)(L)'), 70 | match_rules={2: Match(values=['=', '=>'])}, 71 | match_semantics={1: 'name', 3: 'value'}, 72 | ), 73 | VaribleDetector( 74 | language=Language.PHP, 75 | stream_pattern=re.compile('(L)(o)(n)(p)Lp(L)p'), 76 | match_rules={ 77 | 2: Match(values=['=>']), 78 | 3: Match(values=['env']), 79 | 4: Match(values=['(']), 80 | }, 81 | match_semantics={1: 'name', 5: 'value'}, 82 | ), 83 | # CONFIGS AND FORMATS 84 | VaribleDetector( 85 | language=Language.TOML, 86 | stream_pattern=re.compile('(n)(o)(L)\n'), 87 | match_rules={2: Match(values=['='])}, 88 | match_semantics={1: 'name', 3: 'value'}, 89 | ), 90 | VaribleDetector( 91 | language=Language.YAML, 92 | stream_pattern=re.compile('(L)(p)(L)'), 93 | match_rules={2: Match(values=[':'])}, 94 | match_semantics={1: 'name', 3: 'value'}, 95 | ), 96 | VaribleDetector( 97 | language=Language.INI, 98 | stream_pattern=re.compile('(n)(o)(L)'), 99 | match_rules={2: Match(values=['='])}, 100 | match_semantics={1: 'name', 3: 'value'}, 101 | ), 102 | VaribleDetector( 103 | language=Language.PUPPET, 104 | stream_pattern=re.compile('(v|n)(o)(L)'), 105 | match_rules={2: Match(values=['=>', '='])}, 106 | match_semantics={1: 'name', 3: 'value'}, 107 | ), 108 | VaribleDetector( 109 | language=Language.ANY, 110 | stream_pattern=re.compile('(v|n)(p|o)(L)'), 111 | match_rules={ 112 | 2: Match(values=[ 113 | re.compile('^:$'), 114 | re.compile('^=$'), 115 | ])}, 116 | match_semantics={1: 'name', 3: 'value'}, 117 | ), 118 | VaribleDetector( 119 | language=Language.SHELL, 120 | stream_pattern=re.compile('(L)(L)(L)(L)'), 121 | match_rules={ 122 | 1: Match(values=[re.compile('^curl$')]), 123 | 2: Match(values=[re.compile('^-u$')]), 124 | 4: Match(not_values=[re.compile('^\\$')]), 125 | }, 126 | match_semantics={3: 'name', 4: 'value'}, 127 | creds_probability=9, 128 | ), 129 | 130 | VaribleDetector( 131 | language=Language.CSHARP, 132 | stream_pattern=re.compile('(n).{0,6}(u|L)p(L)(p)'), 133 | match_rules={ 134 | 1: Match(values=[re.compile('^KeyValuePair$')]), 135 | 4: Match(not_values=[re.compile('^}$')]), 136 | }, 137 | match_semantics={2: 'name', 3: 'value'}, 138 | ), 139 | 140 | VaribleDetector( 141 | language=Language.CSHARP, 142 | stream_pattern=re.compile('(p)(.)(p)(L)(p)'), 143 | match_rules={ 144 | 1: Match(values=[re.compile('^{$')]), 145 | 3: Match(values=[re.compile('^,$')]), 146 | 5: Match(values=[re.compile('^}$')]), 147 | }, 148 | match_semantics={2: 'name', 4: 'value'}, 149 | ), 150 | 151 | VaribleDetector( 152 | language=Language.JAVA, 153 | stream_pattern=re.compile('(n)(p)(.)(p)(L)'), 154 | match_rules={ 155 | 1: Match(values=[re.compile('^put$')]), 156 | 2: Match(values=[re.compile('^\\($')]), 157 | 4: Match(values=[re.compile('^,$')]), 158 | }, 159 | match_semantics={3: 'name', 5: 'value'}, 160 | ), 161 | 162 | ] 163 | 164 | @classmethod 165 | def for_language(cls, language: Language) -> List[VaribleDetector]: 166 | return list(filter(lambda x: x.language in [language, Language.ANY], cls.rules)) 167 | 168 | 169 | class VariableSuppressionRules(VariableDetectionRules): 170 | rules=[ 171 | VaribleSuppressor( 172 | language=Language.JS, 173 | stream_pattern=re.compile('(p)(n).+?(p)(u|L|\n)'), 174 | match_rules={ 175 | 1: Match(values=[ 176 | re.compile('^<$'), 177 | re.compile('^(}|{)$'), 178 | ]), 179 | 2: Match( 180 | types=[ 181 | PygmentsToken.Name.Tag, 182 | PygmentsToken.Name.Attribute 183 | ] 184 | ), 185 | 3: Match(values=[ 186 | re.compile('^>$'), 187 | re.compile('^(}|{)$'), 188 | ]), 189 | }, 190 | match_semantics={} 191 | ), 192 | 193 | 194 | VaribleSuppressor( 195 | language=Language.JS, 196 | stream_pattern=re.compile('(n)(o)L.{0,4}(?:u|\n)(n)(o)(?:L|u)'), 197 | match_rules={ 198 | 1: Match(values=[ 199 | re.compile('^key$'), 200 | ]), 201 | 2: Match(values=[ 202 | re.compile('^:$'), 203 | ]), 204 | 3: Match(values=[ 205 | re.compile('^value$'), 206 | ]), 207 | 4: Match(values=[ 208 | re.compile('^:$'), 209 | ]), 210 | }, 211 | match_semantics={} 212 | ), 213 | 214 | 215 | 216 | 217 | VaribleSuppressor( 218 | language=Language.SWIFT, 219 | stream_pattern=re.compile('(n)(p)(n)(p)L'), 220 | match_rules={ 221 | 1: Match(values=[ 222 | re.compile('^decode$'), 223 | re.compile('^decodeIfPresent$'), 224 | re.compile('^unbox$') 225 | ]), 226 | 2: Match(values=[re.compile('^\($')]), 227 | 3: Match(values=[re.compile('^(key|keyPath)$')]), 228 | 4: Match(values=[re.compile('^:$')]), 229 | }, 230 | match_semantics={} 231 | ), 232 | 233 | 234 | VaribleSuppressor( 235 | language=Language.GOLANG, 236 | stream_pattern=re.compile('(p)(n)(p)L(p)(n)(p).'), 237 | match_rules={ 238 | 1: Match(values=[ 239 | re.compile('^{$'), 240 | ]), 241 | 2: Match(values=[ 242 | re.compile('^key$', re.IGNORECASE), 243 | ]), 244 | 3: Match(values=[ 245 | re.compile('^:$'), 246 | ]), 247 | 4: Match(values=[ 248 | re.compile('^,$'), 249 | ]), 250 | 5: Match(values=[ 251 | re.compile('^value$', re.IGNORECASE), 252 | ]), 253 | 6: Match(values=[ 254 | re.compile('^:$'), 255 | ]), 256 | }, 257 | match_semantics={} 258 | ) 259 | 260 | 261 | ] -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/spot_improvements.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import Callable, List 3 | 4 | from pygments.token import Token as PygmentsToken 5 | 6 | from deepsecrets.core.model.token import Token 7 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language 8 | from deepsecrets.core.tokenizers.helpers.type_stream import token_to_typestream_item 9 | 10 | 11 | class SpotImprovements: 12 | language: Language 13 | acc: dict[Language, List[Callable]] 14 | 15 | def __init__(self, lang: Language) -> None: 16 | self.language = lang 17 | self.acc = {Language.SHELL: [self._curl_argstring_breakdown]} 18 | 19 | def improve_token(self, so_far_tokens: List[Token], so_far_type_stream: str, current_token: Token) -> List[Token]: 20 | tokens = [] 21 | for improvement in self.acc.get(self.language, []): 22 | tokens.extend(improvement(so_far_tokens, so_far_type_stream, current_token)) 23 | 24 | if len(tokens) == 0: 25 | return [current_token] 26 | 27 | return tokens 28 | 29 | def _curl_argstring_breakdown( 30 | self, so_far_tokens: List[Token], so_far_type_stream: str, current_token: Token 31 | ) -> List[Token]: 32 | projected_typestream = so_far_type_stream + token_to_typestream_item(current_token) 33 | rule = {'pattern': re.compile('(L)(L)$'), 'checks': {1: re.compile('^-u$')}} 34 | match: re.Match = rule['pattern'].search(projected_typestream) 35 | if not match: 36 | return [current_token] 37 | 38 | for group_i, pattern in rule['checks'].items(): 39 | span = match.span(group_i) 40 | group_token: Token = so_far_tokens[span[0]] 41 | if not pattern.search(group_token.content): 42 | return [current_token] 43 | 44 | new_parts = current_token.content.split(':') 45 | if new_parts[0] == '' or new_parts[1] == '': 46 | return [current_token] 47 | 48 | final = [] 49 | for part in new_parts: 50 | t = Token( 51 | file=current_token.file, 52 | content=part, 53 | span=current_token.file.get_span_for_string(part, between=current_token.span), 54 | ) 55 | t.set_type([PygmentsToken.Text]) 56 | final.append(t) 57 | 58 | return final 59 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/helpers/type_stream.py: -------------------------------------------------------------------------------- 1 | from pygments.token import Token as PygmentsToken 2 | 3 | from deepsecrets.core.model.token import Token 4 | 5 | types_to_filter_before = [ 6 | PygmentsToken.Text.Whitespace, 7 | PygmentsToken.Error, 8 | PygmentsToken.Keyword, 9 | PygmentsToken.Generic, 10 | PygmentsToken.Literal.Date, 11 | PygmentsToken.Literal.Number, 12 | PygmentsToken.Literal.String.Char, 13 | PygmentsToken.Literal.String.Delimiter, 14 | PygmentsToken.Literal.String.Escape, 15 | PygmentsToken.Literal.String.Affix, 16 | PygmentsToken.Literal.String.Interpol, 17 | PygmentsToken.Comment.Hashbang, 18 | PygmentsToken.Name.Namespace, 19 | PygmentsToken.Name.Builtin.Pseudo, 20 | ] 21 | 22 | 23 | types_to_filter_after = [ 24 | PygmentsToken.Punctuation, 25 | PygmentsToken.Operator, 26 | PygmentsToken.Name, 27 | ] 28 | 29 | 30 | acc = { 31 | PygmentsToken.Operator: 'o', 32 | PygmentsToken.Name: 'n', 33 | PygmentsToken.Name.Variable: 'v', 34 | PygmentsToken.Name.Variable.Global: 'v', 35 | PygmentsToken.Name.Variable.Instance: 'v', 36 | PygmentsToken.Name.Variable.Magic: 'v', 37 | PygmentsToken.Name.Other: 'n', 38 | PygmentsToken.Name.Tag: 'n', 39 | PygmentsToken.Name.Constant: 'n', 40 | PygmentsToken.Name.Attribute: 'n', 41 | PygmentsToken.Keyword.Constant: 'k', 42 | PygmentsToken.Punctuation: 'p', 43 | PygmentsToken.Punctuation.Indicator: 'p', 44 | PygmentsToken.Literal: 'L', 45 | PygmentsToken.Literal.Scalar.Plain: 'L', 46 | PygmentsToken.Literal.String: 'L', 47 | PygmentsToken.String: 'L', 48 | PygmentsToken.String.Single: 'L', 49 | PygmentsToken.String.Double: 'L', 50 | PygmentsToken.Text: 'L', 51 | PygmentsToken.Literal.String.Backtick: 'p', # technically it's a punc 52 | } 53 | 54 | 55 | def token_to_typestream_item(token: Token) -> str: 56 | if token.content == '\n': 57 | return '\n' 58 | 59 | if any(type in token.type for type in types_to_filter_before): # type: ignore 60 | return 'u' 61 | 62 | return acc.get(token.type[0], '?') # type: ignore 63 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/itokenizer.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from collections import namedtuple 3 | from typing import List, NamedTuple 4 | 5 | from deepsecrets.core.model.file import File 6 | from deepsecrets.core.model.token import Token 7 | 8 | 9 | class Tokenizer: 10 | tokens: List[Token] 11 | settings: NamedTuple 12 | 13 | def __init__(self, **kwargs) -> None: 14 | self.tokens = [] 15 | Settings = namedtuple('Settings', kwargs.keys()) # type: ignore 16 | self.settings = Settings._make(kwargs.values()) # type: ignore 17 | 18 | @abstractmethod 19 | def tokenize(self, file: File) -> List[Token]: 20 | pass 21 | 22 | def __hash__(self) -> int: # pragma: nocover 23 | return hash(type(self)) 24 | 25 | def __repr__(self) -> str: # pragma: no cover 26 | return self.__class__.__name__ 27 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/lexer.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Sequence, Set, Type, Union 2 | 3 | from deepsecrets import logger 4 | 5 | from ordered_set import OrderedSet 6 | from pygments import highlight 7 | from pygments.formatters import RawTokenFormatter 8 | from pygments.lexers.special import Lexer, RawTokenLexer 9 | from pygments.token import Token as PygmentsToken 10 | 11 | from deepsecrets.core.model.file import File 12 | from deepsecrets.core.model.semantic import Variable 13 | from deepsecrets.core.model.token import Semantic, SemanticType, Token 14 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language 15 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.rules import VariableDetectionRules, VariableSuppressionRules 16 | from deepsecrets.core.tokenizers.helpers.spot_improvements import SpotImprovements 17 | from deepsecrets.core.tokenizers.helpers.type_stream import ( 18 | token_to_typestream_item, 19 | types_to_filter_after, 20 | types_to_filter_before, 21 | ) 22 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer 23 | from deepsecrets.core.utils.lexer_finder import LexerFinder 24 | 25 | empty_tokens = ['\n', '\t', "'", "''", '"', '""'] 26 | 27 | 28 | class LexerTokenizer(Tokenizer): 29 | token_stream: str 30 | lexer: Lexer 31 | language: Language 32 | 33 | def _get_types_for_token(self, token: PygmentsToken) -> List[Type]: # type: ignore 34 | types = [] 35 | types.append(token) 36 | if token.parent is not None: 37 | if token.parent == PygmentsToken: 38 | return types 39 | deep = self._get_types_for_token(token.parent) 40 | types.extend(deep) 41 | return types 42 | 43 | def sanitize(self, content: str) -> Union[str, bool]: 44 | quotes = ["'", "''", '"', '""'] 45 | 46 | whitespace_cleaned = content.replace(' ', '') 47 | if 0 <= len(whitespace_cleaned) == 0: 48 | return False 49 | 50 | # some lexers (eq. TextLexer) leave \n 51 | # at the end of a Token 52 | if len(content) > 1 and content[-1] == '\n': 53 | content = content[:-1] 54 | 55 | if content[0] == content[-1]: 56 | if content[0] in quotes: 57 | content = content[1:-1] 58 | 59 | if content in quotes: 60 | return False 61 | 62 | return content 63 | 64 | def _find_lexer_for_file(self, file: File): 65 | lexer = LexerFinder().find(file=file) 66 | if lexer is not None and lexer.name == 'Text only': 67 | return None 68 | return lexer 69 | 70 | 71 | def tokenize(self, file: File, post_filter=True) -> List[Token]: 72 | self.token_stream = '' 73 | # TODO: don't trust the extension, use 'file' utility ? 74 | 75 | self.lexer = self._find_lexer_for_file(file) 76 | if not self.lexer: 77 | return self.tokens 78 | 79 | try: 80 | self.language: Language = Language.from_text(self.lexer.filenames[0]) 81 | except (ValueError, IndexError): 82 | self.language: Language = Language.from_text(file.extension) 83 | except Exception as e: 84 | logger.exception(e) 85 | 86 | result = highlight(file.content, self.lexer, RawTokenFormatter()) 87 | raw_tokens = list(RawTokenLexer().get_tokens(result)) 88 | token_improver = SpotImprovements(lang=self.language) 89 | 90 | current_position = 0 91 | 92 | for i, raw_token in enumerate(raw_tokens): 93 | content: str = raw_token[1] 94 | types: List[Type] = self._get_types_for_token(raw_token[0]) 95 | start = current_position 96 | end = start + len(content) 97 | current_position = end 98 | 99 | try: 100 | content = self.sanitize(content) 101 | if not content: 102 | continue 103 | 104 | span = file.get_span_for_string(content, between=[start - 1, end + 1]) 105 | token = Token(file=file, content=content, span=span) 106 | token.set_type(types) 107 | 108 | improved_tokens = token_improver.improve_token(self.tokens, self.token_stream, token) 109 | 110 | self.tokens.extend(improved_tokens) 111 | self.add_to_token_stream(improved_tokens) 112 | except Exception as e: 113 | str(e) 114 | 115 | tokens_to_be_excluded = [] 116 | if self.settings.deep_token_inspection is True: # type: ignore 117 | tokens_to_be_excluded = self.deep_analyze() 118 | 119 | return self.final_cleanup(self.tokens, tokens_to_be_excluded) if post_filter else list(self.tokens) 120 | 121 | def add_to_token_stream(self, tokens: List[Token]) -> None: 122 | for token in tokens: 123 | self.token_stream += token_to_typestream_item(token=token) 124 | 125 | def final_cleanup(self, tokens_all: Sequence[Token], tokens_to_be_excluded: Sequence[Token]) -> List[Token]: 126 | if not isinstance(tokens_all, OrderedSet): 127 | tokens_all = OrderedSet(tokens_all) 128 | 129 | tokens_all = tokens_all - tokens_to_be_excluded 130 | final = [] 131 | for token in tokens_all: 132 | if any(type in token.type for type in types_to_filter_before): # type: ignore 133 | continue 134 | 135 | if any(type in token.type for type in types_to_filter_after): # type: ignore 136 | continue 137 | 138 | if token.content.replace(' ', '') in empty_tokens: 139 | continue 140 | 141 | final.append(token) 142 | 143 | return final 144 | 145 | def deep_analyze(self) -> Set[Token]: 146 | tokens_all = OrderedSet(self.tokens) 147 | if self.language is None: 148 | return tokens_all 149 | 150 | exclude_after = set() 151 | 152 | true_detections: List[Variable] = [] 153 | suppression_regions: List[List[int]] = [] 154 | 155 | detection_rules = VariableDetectionRules.for_language(self.language) 156 | suppression_rules = VariableSuppressionRules.for_language(self.language) 157 | 158 | for rule in detection_rules: 159 | true_detections.extend(rule.match(self.tokens, self.token_stream)) 160 | 161 | for rule in suppression_rules: 162 | suppression_regions.extend(rule.match(self.tokens, self.token_stream)) 163 | 164 | suppression_regions = self._collapse_suppression_regions(suppression_regions) 165 | 166 | for var in true_detections: 167 | suppressed = self._if_suppressed(var, suppression_regions) 168 | if suppressed: 169 | exclude_after.update([var.name, var.value]) 170 | continue 171 | 172 | var.value.semantic = Semantic( 173 | type=SemanticType.VAR, 174 | name=var.name.content, 175 | creds_probability=var.found_by.creds_probability, 176 | ) 177 | exclude_after.add(var.name) 178 | 179 | return exclude_after 180 | 181 | def _if_suppressed(self, var: Variable, regions): 182 | for reg in regions: 183 | if var.span[0] >= reg[0] and var.span[1] <= reg[1]: 184 | return True 185 | return False 186 | 187 | 188 | def get_variables(self, tokens: Optional[List[Token]] = None) -> List[Token]: 189 | tokens = tokens if tokens is not None else self.tokens 190 | vars = [] 191 | if len(tokens) == 0: 192 | return [] 193 | 194 | for token in tokens: 195 | if token.semantic is None: 196 | continue 197 | 198 | if token.semantic.type != SemanticType.VAR: 199 | continue 200 | 201 | vars.append(token) 202 | 203 | return vars 204 | 205 | def print_token_type_stream(self) -> None: 206 | print(self.token_stream) 207 | 208 | def _collapse_suppression_regions(self, suppression_regions): 209 | regions = [] 210 | if len(suppression_regions) == 0: 211 | return regions 212 | 213 | for i, reg in enumerate(suppression_regions): 214 | if i == 0: 215 | regions.append(suppression_regions[0]) 216 | continue 217 | 218 | if reg[0] == regions[-1][1]: 219 | regions[-1][1] = reg[1] 220 | else: 221 | regions.append(reg) 222 | 223 | return regions 224 | 225 | 226 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/per_line.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import List 3 | 4 | from deepsecrets.core.model.file import File 5 | from deepsecrets.core.model.token import Token 6 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer 7 | 8 | separator = re.compile(r'\n') 9 | 10 | 11 | class PerLineTokenizer(Tokenizer): 12 | def tokenize(self, file: File) -> List[Token]: 13 | separs = separator.finditer(file.content) 14 | prev_end = 0 15 | for sep in separs: 16 | s, e = sep.span() 17 | self.tokens.append(Token(file=file, content=file.content[prev_end:s], span=[prev_end, s])) 18 | prev_end = e 19 | 20 | return self.tokens 21 | -------------------------------------------------------------------------------- /deepsecrets/core/tokenizers/per_word.py: -------------------------------------------------------------------------------- 1 | import regex as re 2 | from typing import List, Optional 3 | 4 | from deepsecrets.core.model.file import File 5 | from deepsecrets.core.model.token import Token 6 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer 7 | 8 | separator = re.compile(r'[ ,"\'\n:={}\[\]\+]+') 9 | 10 | 11 | class PerWordTokenizer(Tokenizer): 12 | def tokenize(self, file: File, content: Optional[str] = None) -> List[Token]: 13 | cnt = content if content is not None else file.content 14 | length = len(cnt) 15 | separs = separator.finditer(cnt) 16 | prev_end = 0 17 | 18 | for sep in separs: 19 | s, e = sep.span() 20 | if s == prev_end: 21 | prev_end = e 22 | continue 23 | 24 | token = Token(file=file, content=content[prev_end : e - 1], span=[prev_end, e - 1]) 25 | self.tokens.append(token) 26 | prev_end = e 27 | 28 | if prev_end != length: 29 | token = Token(file=file, content=content[prev_end:length], span=[prev_end, length]) 30 | self.tokens.append(token) 31 | 32 | return self.tokens 33 | -------------------------------------------------------------------------------- /deepsecrets/core/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/utils/__init__.py -------------------------------------------------------------------------------- /deepsecrets/core/utils/cpu.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | 3 | from deepsecrets.core.utils.fs import path_exists 4 | 5 | QUOTA_FILE = '/sys/fs/cgroup/cpu/cpu.cfs_quota_us' 6 | PERIOD_FILE = '/sys/fs/cgroup/cpu/cpu.cfs_period_us' 7 | CGROUP_2_MAX = '/sys/fs/cgroup/cpu.max' 8 | 9 | 10 | class CpuHelper: 11 | 12 | def get_limit(self) -> int: 13 | multiproc_limit = self._by_multiproc() 14 | cgroup = self._by_cgroup() 15 | 16 | final = cgroup if cgroup != -1 else multiproc_limit 17 | return final if final > 0 else 0 18 | 19 | def _by_multiproc(self): 20 | return cpu_count() 21 | 22 | def _by_cgroup(self): 23 | quota = 1 24 | period = -1 25 | 26 | # cgroup 2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html 27 | if path_exists(CGROUP_2_MAX): 28 | try: 29 | quota, period = self.__cgroup2() 30 | return quota // period 31 | except Exception: 32 | pass 33 | 34 | # cgroup 1: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html 35 | if path_exists(QUOTA_FILE) and path_exists(PERIOD_FILE): 36 | try: 37 | quota, period = self.__cgroup1() 38 | return quota // period 39 | except Exception: 40 | pass 41 | 42 | return quota // period 43 | 44 | def __cgroup1(self): 45 | quota = 1 46 | period = -1 47 | 48 | with open(QUOTA_FILE) as f: 49 | quota = int(f.read()) 50 | 51 | with open(PERIOD_FILE) as f: 52 | period = int(f.read()) 53 | 54 | return quota, period 55 | 56 | 57 | def __cgroup2(self): 58 | quota = 1 59 | period = -1 60 | 61 | with open(CGROUP_2_MAX) as f: 62 | str_quota_period = f.read().split(' ') 63 | quota = int(str_quota_period[0]) 64 | period = int(str_quota_period[1]) 65 | 66 | return quota, period 67 | 68 | -------------------------------------------------------------------------------- /deepsecrets/core/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | class FileNotFoundException(Exception): 2 | pass 3 | 4 | 5 | class TokenizationException(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /deepsecrets/core/utils/file_analyzer.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import RLock 2 | from multiprocessing.pool import Pool 3 | from typing import Dict, List, Optional, Type 4 | 5 | from pydantic import BaseModel 6 | 7 | from deepsecrets import logger 8 | from deepsecrets.core.engines.iengine import IEngine 9 | from deepsecrets.core.model.file import File 10 | from deepsecrets.core.model.finding import Finding 11 | from deepsecrets.core.model.token import Token 12 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer 13 | 14 | 15 | class EngineWithTokenizer(BaseModel): 16 | engine: IEngine 17 | tokenizer: Tokenizer 18 | 19 | class Config: 20 | arbitrary_types_allowed = True 21 | 22 | 23 | class FileAnalyzer: 24 | file: File 25 | engine_tokenizers: List[EngineWithTokenizer] 26 | tokens: Dict[Type, List[Token]] 27 | pool_class: Type 28 | 29 | def __init__(self, file: File, pool_class: Optional[Type] = None): 30 | if pool_class is not None: 31 | self.pool_class = Pool 32 | else: 33 | self.pool_class = pool_class 34 | 35 | self.engine_tokenizers = [] 36 | self.file = file 37 | self.tokens = {} 38 | self.tokenizers_lock = RLock() 39 | 40 | def add_engine(self, engine: IEngine, tokenizers: List[Tokenizer]) -> None: 41 | for tokenizer in tokenizers: 42 | self.engine_tokenizers.append(EngineWithTokenizer(engine=engine, tokenizer=tokenizer)) 43 | 44 | def process(self, threaded: bool = False) -> List[Finding]: 45 | results: List[Finding] = [] 46 | 47 | if threaded: # pragma: nocover 48 | with self.pool_class(2) as pool: 49 | engine_results = pool.imap(self._run_engine, self.engine_tokenizers) 50 | pool.close() 51 | pool.join() 52 | 53 | if engine_results is None: 54 | return results 55 | 56 | for er in engine_results: 57 | if not er: 58 | continue 59 | results.extend(er) 60 | 61 | else: 62 | for et in self.engine_tokenizers: 63 | results.extend(self._run_engine(et)) 64 | 65 | return results 66 | 67 | def _run_engine(self, et: EngineWithTokenizer) -> List[Finding]: 68 | results: List[Finding] = [] 69 | processed_values: Dict[int, bool] = {} 70 | 71 | with self.tokenizers_lock: 72 | if et.tokenizer not in self.tokens: 73 | self.tokens[et.tokenizer] = et.tokenizer.tokenize(self.file) 74 | 75 | tokens: List[Token] = self.tokens[et.tokenizer] 76 | 77 | for token in tokens: 78 | is_known_content = processed_values.get(token.val_hash()) 79 | if is_known_content is not None and is_known_content is False: 80 | continue 81 | 82 | processed_values[token.val_hash()] = False 83 | 84 | try: 85 | findings: List[Finding] = et.engine.search(token) 86 | for finding in findings: 87 | finding.map_on_file(file=self.file, relative_start=token.span[0]) 88 | results.append(finding) 89 | processed_values[token.val_hash()] = True 90 | 91 | except Exception as e: 92 | logger.exception('Unable to process token') 93 | continue 94 | 95 | return results 96 | -------------------------------------------------------------------------------- /deepsecrets/core/utils/fs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from deepsecrets import BASE_DIR, MODULE_NAME 5 | 6 | 7 | def get_abspath(filepath: str) -> str: 8 | if filepath.startswith('/'): 9 | return filepath 10 | 11 | filepath = filepath.replace('./', '') 12 | return os.path.join(BASE_DIR, filepath) 13 | 14 | 15 | def path_exists(filepath: str) -> bool: 16 | abs_path = get_abspath(filepath) 17 | return os.path.exists(abs_path) 18 | 19 | 20 | def get_path_inside_package(filepath: str) -> str: 21 | pkg_root = sys.modules[MODULE_NAME].__path__[0] 22 | return os.path.join(pkg_root, filepath) 23 | -------------------------------------------------------------------------------- /deepsecrets/core/utils/guess_filetype.py: -------------------------------------------------------------------------------- 1 | from configparser import ConfigParser 2 | import json 3 | import tomllib 4 | from typing import Optional 5 | from puppetparser.parser import parse 6 | 7 | 8 | 9 | class FileTypeGuesser: 10 | 11 | def __init__(self) -> None: 12 | self.probes = { 13 | 'json': self._is_json, 14 | 'toml': self._is_toml, 15 | 'pp': self._is_puppet, 16 | 'conf': self._is_conf, 17 | } 18 | 19 | def guess(self, content: str) -> Optional[str]: 20 | for ext, probe in self.probes.items(): 21 | if probe(content): 22 | return ext 23 | 24 | # TODO: Guesslang 25 | ''' 26 | ml_guesser = Guess() 27 | guess = ml_guesser.language_name(content) 28 | if not guess: 29 | return None 30 | 31 | for ext, name in ml_guesser._extension_map.items(): 32 | if name == guess: 33 | return ext 34 | ''' 35 | return None 36 | 37 | 38 | 39 | def _is_json(self, content: str): 40 | try: 41 | json.loads(content) 42 | except Exception: 43 | return False 44 | 45 | return True 46 | 47 | def _is_toml(self, content: str): 48 | try: 49 | tomllib.loads(content) 50 | except Exception: 51 | return False 52 | 53 | return True 54 | 55 | def _is_puppet(self, content: str): 56 | try: 57 | _, _ = parse(content) 58 | except Exception: 59 | return False 60 | 61 | return True 62 | 63 | def _is_conf(self, content): 64 | try: 65 | conf = ConfigParser().read_string(content) 66 | except Exception as e: 67 | return False 68 | return True -------------------------------------------------------------------------------- /deepsecrets/core/utils/hashing.py: -------------------------------------------------------------------------------- 1 | from hashlib import sha1, sha256, sha512 2 | 3 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm 4 | 5 | 6 | def c_sha1(payload: str) -> str: 7 | return sha1(payload.encode('UTF-8')).hexdigest() 8 | 9 | 10 | def c_sha256(payload: str) -> str: 11 | return sha256(payload.encode('UTF-8')).hexdigest() 12 | 13 | 14 | def c_sha512(payload: str) -> str: 15 | return sha512(payload.encode('UTF-8')).hexdigest() 16 | 17 | 18 | algorithm_to_method = { 19 | HashingAlgorithm.SHA_512: c_sha512, 20 | HashingAlgorithm.SHA_256: c_sha256, 21 | HashingAlgorithm.SHA_1: c_sha1 22 | } 23 | 24 | 25 | def get_hash(payload: str, algorithm: HashingAlgorithm) -> str: 26 | method = algorithm_to_method.get(algorithm) 27 | if method is None: 28 | raise Exception(f'Unable to calculate hash for algorithm {algorithm.name}') 29 | 30 | return method(payload) -------------------------------------------------------------------------------- /deepsecrets/core/utils/lexer_finder.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.utils.guess_filetype import FileTypeGuesser 5 | from pygments.lexers import load_lexer_from_file, get_lexer_for_filename, get_lexer_by_name 6 | from pygments.util import ClassNotFound 7 | from jsx import lexer as lexer_mod 8 | 9 | 10 | class LexerFinder: 11 | 12 | file: File 13 | extension: str 14 | distinguishing_feature: List[str] 15 | 16 | alias_exceptions: Dict 17 | probes: Dict 18 | 19 | def __init__(self) -> None: 20 | self._init_custom_lexers() 21 | self._init_alias_exceptions() 22 | self._init_probes() 23 | 24 | def _init_custom_lexers(self): 25 | load_lexer_from_file(lexer_mod.__file__, "JsxLexer") 26 | 27 | def _init_alias_exceptions(self): 28 | self.alias_exceptions = { 29 | 'js+react': 'react' 30 | } 31 | 32 | def _init_probes(self): 33 | self.probes = { 34 | 'js': [ 35 | _probe_react 36 | ] 37 | } 38 | 39 | def find(self, file: File): 40 | self.file = file 41 | self.extension = self._determine_extension() 42 | self.distinguishing_feature = self._determine_distinguishing_feature() 43 | 44 | filename = self._projected_filename() 45 | alias = self._projected_alias() 46 | lexer = None 47 | 48 | try: 49 | lexer = get_lexer_by_name(alias) 50 | return lexer 51 | except ClassNotFound as e: 52 | pass 53 | 54 | try: 55 | lexer = get_lexer_for_filename(filename, file.content) 56 | return lexer 57 | except ClassNotFound as e: 58 | pass 59 | 60 | return lexer 61 | 62 | def _determine_extension(self): 63 | if self.file.extension is None: 64 | return self._try_guess_extension() 65 | 66 | return self.file.extension 67 | 68 | def _try_guess_extension(self) -> Optional[str]: 69 | return FileTypeGuesser().guess(self.file.content) 70 | 71 | def _determine_distinguishing_feature(self): 72 | applicable_strategies = self.probes.get(self.extension, []) 73 | for strategy in applicable_strategies: 74 | f = strategy(self.file) 75 | if f is None: 76 | continue 77 | return f 78 | 79 | def _projected_alias(self): 80 | alias = self.extension 81 | if self.distinguishing_feature is not None: 82 | alias += f'+{self.distinguishing_feature}' 83 | 84 | return self.alias_exceptions.get(alias, alias) 85 | 86 | def _projected_filename(self): 87 | filename = self.file.name 88 | if self.extension is not None: 89 | filename += f'.{self.extension}' 90 | 91 | return filename 92 | 93 | 94 | def _probe_react(file: File): 95 | # very simple approach at the moment 96 | evidences = [ 97 | 'import React', 98 | 'ReactDOM', 99 | ] 100 | if any(evidence in file.content for evidence in evidences): 101 | return 'react' 102 | return None -------------------------------------------------------------------------------- /deepsecrets/rules/excluded_paths.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Path excluded", 4 | "pattern": "\\.git" 5 | }, 6 | { 7 | "name": "Path excluded", 8 | "pattern": ".*package-lock\\.json.*" 9 | }, 10 | { 11 | "name": "Path excluded", 12 | "pattern": ".*-requirements.txt" 13 | }, 14 | { 15 | "name": "Path excluded", 16 | "pattern": ".*Pipfile\\.lock$" 17 | }, 18 | { 19 | "name": "Path excluded", 20 | "pattern": ".*package.json.*" 21 | }, 22 | { 23 | "name": "Images", 24 | "pattern": ".*\\.(jpg|png|bmp|gif|tiff)$" 25 | }, 26 | { 27 | "name": "Executives", 28 | "pattern": ".*\\.(exe|dll)$" 29 | }, 30 | { 31 | "name": "Path excluded", 32 | "pattern": "vendor/" 33 | }, 34 | { 35 | "name": "Path excluded", 36 | "pattern": ".*Gopkg\\.lock.*" 37 | }, 38 | { 39 | "name": "Path excluded", 40 | "pattern": "venv/" 41 | }, 42 | { 43 | "name": "Path excluded", 44 | "pattern": "requirements.txt$" 45 | }, 46 | { 47 | "name": "Path excluded", 48 | "pattern": ".*Gopkg\\.lck$" 49 | }, 50 | { 51 | "name": "Path excluded", 52 | "pattern": ".*Podfile\\.lock$" 53 | }, 54 | { 55 | "name": "Path excluded", 56 | "pattern": "\\.gitignore" 57 | }, 58 | { 59 | "name": "Path excluded", 60 | "pattern": ".*xcodeproj.*" 61 | }, 62 | { 63 | "name": "Path excluded", 64 | "pattern": ".*__snapshots__.*" 65 | }, 66 | { 67 | "name": "Path excluded", 68 | "pattern": "internal/generated/" 69 | }, 70 | { 71 | "name": "Path excluded", 72 | "pattern": "npm-shrinkwrap.json" 73 | }, 74 | { 75 | "name": "Path excluded", 76 | "pattern": ".*composer.json.*" 77 | }, 78 | { 79 | "name": "Path excluded", 80 | "pattern": ".*brief" 81 | }, 82 | { 83 | "name": "Path excluded", 84 | "pattern": ".*Godeps\\.json$" 85 | }, 86 | { 87 | "name": "Path excluded", 88 | "pattern": ".*composer\\.lock.*" 89 | }, 90 | { 91 | "name": "Path excluded", 92 | "pattern": "src/Generated/" 93 | }, 94 | { 95 | "name": "Path excluded", 96 | "pattern": ".*yarn\\.lock$" 97 | }, 98 | { 99 | "name": "Path excluded", 100 | "pattern": ".*node_modules\\/.*" 101 | }, 102 | { 103 | "name": "Path excluded", 104 | "pattern": ".*symfony\\.lock$" 105 | }, 106 | { 107 | "name": "Path excluded", 108 | "pattern": "Gopkg.toml" 109 | }, 110 | { 111 | "name": "Path excluded", 112 | "pattern": "lib/generated/" 113 | }, 114 | { 115 | "name": "Path excluded", 116 | "pattern": ".*/vendor\\/.*" 117 | }, 118 | { 119 | "name": "Path excluded", 120 | "pattern": ".*pbxproj$" 121 | }, 122 | { 123 | "name": "Path excluded", 124 | "pattern": ".*go\\.sum$" 125 | }, 126 | { 127 | "name": "Postman collection files", 128 | "pattern": ".*postman_collection\\.json$" 129 | } 130 | ] -------------------------------------------------------------------------------- /deepsecrets/rules/regexes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "S0", 4 | "name": "Slack Token", 5 | "confidence": 9, 6 | "pattern": "xox(?:a|b|p|o|s|r)-(?:\\d+-)+[a-z0-9]+" 7 | }, 8 | { 9 | "id": "S1", 10 | "name": "RSA private key", 11 | "confidence": 9, 12 | "pattern": "-----BEGIN RSA PRIVATE KEY-----[\\S\\s]{15,}?-----END RSA PRIVATE KEY-----" 13 | }, 14 | { 15 | "id": "S2", 16 | "name": "SSH (OPENSSH) private key", 17 | "confidence": 9, 18 | "pattern": "-----BEGIN OPENSSH PRIVATE KEY-----[\\S\\s]{15,}?-----END OPENSSH PRIVATE KEY-----" 19 | }, 20 | { 21 | "id": "S3", 22 | "name": "SSH (DSA) private key", 23 | "confidence": 9, 24 | "pattern": "-----BEGIN DSA PRIVATE KEY-----[\\S\\s]{15,}?-----END DSA PRIVATE KEY-----" 25 | }, 26 | { 27 | "id": "S4", 28 | "name": "SSH (EC) private key", 29 | "confidence": 9, 30 | "pattern": "-----BEGIN EC PRIVATE KEY-----[\\S\\s]{15,}?-----END EC PRIVATE KEY-----" 31 | }, 32 | { 33 | "id": "S5", 34 | "name": "PGP private key block", 35 | "confidence": 9, 36 | "pattern": "-----BEGIN PGP PRIVATE KEY BLOCK-----" 37 | }, 38 | { 39 | "id": "S7", 40 | "name": "Facebook Oauth", 41 | "confidence": 9, 42 | "pattern": "facebook.*['|\"][0-9a-f]{32}['|\"]" 43 | }, 44 | { 45 | "id": "S8", 46 | "name": "Twitter Oauth", 47 | "confidence": 9, 48 | "pattern": "twitter.*['|\"][0-9a-zA-Z]{35,44}['|\"]" 49 | }, 50 | { 51 | "id": "S10", 52 | "name": "Google Oauth", 53 | "confidence": 9, 54 | "pattern": "(\"client_secret\":\"[a-zA-Z0-9-_]{24}\")" 55 | }, 56 | { 57 | "id": "S12", 58 | "name": "Heroku API Key", 59 | "confidence": 9, 60 | "pattern": "heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}" 61 | }, 62 | { 63 | "id": "S17", 64 | "name": "Slack Webhook", 65 | "confidence": 9, 66 | "pattern": "https://hooks.slack.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+" 67 | }, 68 | { 69 | "id": "S18", 70 | "name": "Google (GCP) Service-account", 71 | "confidence": 9, 72 | "pattern": "\"type\": \"service_account\"" 73 | }, 74 | { 75 | "id": "S19", 76 | "name": "Password in URL", 77 | "confidence": 9, 78 | "pattern": "://([^.\"]+):([^.\"]+)@\\S+", 79 | "target_group": 2, 80 | "match_rules": { 81 | "2": { 82 | "pattern": "^[^\\$|{|%|<].+[^\\$|}|%|>]$" 83 | } 84 | } 85 | }, 86 | { 87 | "id": "S20", 88 | "name": "BAuth", 89 | "confidence": 9, 90 | "pattern": "Basic @[a-zA-Z0-9+/]+={0,2}" 91 | }, 92 | { 93 | "id": "S22", 94 | "name": "Tableau PATS", 95 | "confidence": 0, 96 | "pattern": "\\bpersonalAccessTokenSecret\\b" 97 | }, 98 | { 99 | "id": "S23", 100 | "name": "Tableau PAT", 101 | "confidence": 0, 102 | "pattern": "\\bpersonalAccessToken\\b" 103 | }, 104 | { 105 | "id": "S25", 106 | "name": "Slack App Token", 107 | "confidence": 9, 108 | "pattern": "xapp-[0-9]+-[A-Za-z0-9_]+-[0-9]+-[a-f0-9]+" 109 | }, 110 | { 111 | "id": "S26", 112 | "name": "Custom private key", 113 | "confidence": 9, 114 | "pattern": "-----BEGIN PRIVATE KEY-----[\\S\\s]{15,}?-----END PRIVATE KEY-----" 115 | }, 116 | { 117 | "id": "S28", 118 | "name": "Suspicious password declaration", 119 | "pattern": "\\b(pass|password|pwd|passwd)\\b(\\W+)([A-Za-z0-9()$]*)\\b", 120 | "confidence": 0, 121 | "match_rules": { 122 | "2": { 123 | "pattern": "^\\s*(?:'|:|=)*\\s*$" 124 | } 125 | }, 126 | "target_group": 3, 127 | "entropy_settings": 3.72 128 | }, 129 | { 130 | "id": "S29", 131 | "name": "Ansible vault", 132 | "confidence": 9, 133 | "pattern": "\\$ANSIBLE_VAULT;[0-9]\\.[0-9];AES256" 134 | }, 135 | { 136 | "id": "S30", 137 | "name": "AWS MWS", 138 | "confidence": 9, 139 | "applicable_file_patterns": [ 140 | ".*.txt$" 141 | ], 142 | "pattern": "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" 143 | } 144 | 145 | ] -------------------------------------------------------------------------------- /deepsecrets/scan_modes/cli.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Any, Dict, List, Type 4 | 5 | from dotwiz import DotWiz 6 | 7 | from deepsecrets import PROFILER_ON, logger 8 | from deepsecrets.core.engines.hashed_secret import HashedSecretEngine 9 | from deepsecrets.core.engines.regex import RegexEngine 10 | from deepsecrets.core.engines.semantic import SemanticEngine 11 | from deepsecrets.core.model.file import File 12 | from deepsecrets.core.model.finding import Finding 13 | from deepsecrets.core.modes.iscan_mode import ScanMode 14 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder 15 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder 16 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer 17 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 18 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer 19 | 20 | 21 | class CliScanMode(ScanMode): 22 | engines_enabled: Dict[Type, bool] = {} 23 | rulesets: Dict[str, List] = {} 24 | 25 | def prepare_for_scan(self) -> None: 26 | logger.info(f'Found {len(self.filepaths)} applicable files for the scan') 27 | if len(self.filepaths) == 0: 28 | return 29 | 30 | for engine in self.config.engines: 31 | self.engines_enabled[engine.name] = True 32 | 33 | for ruleset_builder, paths in self.config.rulesets.items(): 34 | builder = ruleset_builder() 35 | for path in paths: 36 | builder.with_rules_from_file(os.path.abspath(path)) 37 | self.rulesets[builder.ruleset_name] = builder.rules 38 | 39 | 40 | def analyzer_bundle(self) -> DotWiz: 41 | bundle = super().analyzer_bundle() 42 | bundle.update( 43 | workdir=self.config.workdir_path, 44 | engines=self.engines_enabled, 45 | rulesets=self.rulesets, 46 | ) 47 | return bundle 48 | 49 | 50 | @staticmethod 51 | def _per_file_analyzer(bundle, file: Any) -> List[Finding]: 52 | if logger.level == logging.DEBUG: 53 | logger.debug(f'Starting analysis for {file}') 54 | 55 | results: List[Finding] = [] 56 | 57 | if not isinstance(file, str): 58 | raise Exception('Filepath as str expected') 59 | 60 | file = File(path=file, relative_path=file.replace(f'{bundle.workdir}/', '')) 61 | if file.length == 0: 62 | return results 63 | 64 | file_analyzer = FileAnalyzer(file) 65 | fct = FullContentTokenizer() 66 | lex = LexerTokenizer(deep_token_inspection=True) 67 | 68 | regex_engine = RegexEngine( 69 | ruleset=bundle.rulesets.get(RegexRulesetBuilder.ruleset_name, []), 70 | ) 71 | 72 | for eng, enabled in bundle.engines.items(): 73 | if not enabled: 74 | continue 75 | 76 | if eng == RegexEngine.name: 77 | file_analyzer.add_engine(regex_engine, [fct]) 78 | 79 | if eng == HashedSecretEngine.name: 80 | hashed_secret_engine = HashedSecretEngine( 81 | ruleset=bundle.ruleset.get(HashedSecretsRulesetBuilder.ruleset_name, []) 82 | ) 83 | file_analyzer.add_engine(hashed_secret_engine, [lex]) 84 | 85 | if eng == SemanticEngine.name: 86 | semantic_engine = SemanticEngine(regex_engine) 87 | file_analyzer.add_engine(semantic_engine, [lex]) 88 | 89 | try: 90 | results = file_analyzer.process(threaded=False) 91 | except Exception as e: 92 | logger.exception(e) 93 | 94 | if PROFILER_ON: 95 | pass 96 | 97 | return results 98 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "deepsecrets" 3 | version = "1.1.3" 4 | description = "A better tool for secrets search" 5 | license = "MIT" 6 | authors = [ 7 | "Nikolai Khechumov ", 8 | ] 9 | keywords = ["security", "secrets", "credentials", "scanning", "appsec"] 10 | packages = [{include = "deepsecrets"}] 11 | 12 | 13 | readme = "README.md" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | "Environment :: Console", 19 | "Topic :: Security" 20 | 21 | ] 22 | 23 | [tool.poetry.urls] 24 | "Homepage" = "https://github.com/avito-tech/deepsecrets" 25 | "Bug Tracker" = "https://github.com/avito-tech/deepsecrets/issues" 26 | 27 | [tool.poetry.scripts] 28 | deepsecrets = "deepsecrets:__main__" 29 | 30 | 31 | [tool.poetry.dependencies] 32 | python = ">=3.9,<3.12" 33 | pydantic = "^1.10.4" 34 | pyyaml = "^6.0.0" 35 | pygments = "^2.14.0" 36 | ordered-set = "^4.1.0" 37 | dotwiz = "^0.4.0" 38 | mmh3 = "^3.0.0" 39 | regex = "^2023.3.23" 40 | jsx-lexer = "^2.0.1" 41 | aenum = "^3.1.15" 42 | puppetparser = "^0.2.0" 43 | 44 | 45 | [tool.poetry.group.test.dependencies] 46 | pytest = "^7.2.1" 47 | coverage = "^7.2.0" 48 | pytest-cov = "^4.0.0" 49 | 50 | [tool.poetry.group.dev.dependencies] 51 | black = "^23.1.0" 52 | 53 | 54 | [build-system] 55 | requires = ["poetry-core"] 56 | build-backend = "poetry.core.masonry.api" 57 | 58 | [tool.mypy] 59 | packages=["deepsecrets"] 60 | disallow_untyped_defs = true 61 | disallow_any_unimported = true 62 | no_implicit_optional = true 63 | check_untyped_defs = true 64 | warn_return_any = true 65 | show_error_codes = true 66 | 67 | exclude = [ 68 | '^tests/*', # TOML literal string (single-quotes, no escaping necessary) 69 | ] 70 | plugins = [ 71 | "pydantic.mypy" 72 | ] 73 | 74 | [tool.pytest.ini_options] 75 | pythonpath = [ 76 | "." 77 | ] 78 | 79 | [tool.black] 80 | line-length = 120 81 | skip-string-normalization = 1 82 | 83 | [tool.coverage] 84 | run.omit = ["deepsecrets/__main__.py"] 85 | 86 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [pycodestyle] 2 | show-source = 1 3 | max-line-length = 120 4 | ignore = E402, W605 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/tests/__init__.py -------------------------------------------------------------------------------- /tests/cli/test_cli.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.cli import DeepSecretsCliTool 4 | 5 | 6 | @pytest.fixture(scope='module') 7 | def args_1(): 8 | return [ 9 | '', 10 | '--target-dir', 11 | '/app/tests/fixtures/', 12 | '--false-findings', 13 | '/app/tests/fixtures/false_findings.json', 14 | '--outfile', 15 | './fdsafad.json', 16 | '--verbose', 17 | ] 18 | 19 | @pytest.fixture(scope='module') 20 | def args_2(): 21 | return [ 22 | '', 23 | '--target-dir', 24 | '/app/tests/fixtures/', 25 | '--false-findings', 26 | '/app/tests/fixtures/false_findings.json', 27 | '--excluded-paths', 28 | 'built-in', 29 | '/app/tests/fixtures/false_findings.json', 30 | '--outfile', 31 | './fdsafad.json', 32 | ] 33 | 34 | 35 | def test_1_cli(args_1): 36 | tool = DeepSecretsCliTool(args=args_1) 37 | tool.parse_arguments() 38 | 39 | assert tool.config is not None 40 | assert len(tool.config.rulesets) == 2 41 | assert len(tool.config.engines) == 2 42 | assert len(tool.config.global_exclusion_paths) == 1 43 | 44 | assert tool.config.output.path == './fdsafad.json' 45 | assert tool.config.workdir_path == '/app/tests/fixtures/' 46 | 47 | 48 | def test_2_cli(args_2): 49 | tool = DeepSecretsCliTool(args=args_2) 50 | tool.parse_arguments() 51 | 52 | assert tool.config is not None 53 | assert len(tool.config.global_exclusion_paths) == 2 -------------------------------------------------------------------------------- /tests/config/test_config.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.config import Config, Output 2 | from deepsecrets.core.engines.regex import RegexEngine 3 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder 4 | from deepsecrets.core.utils.exceptions import FileNotFoundException 5 | 6 | 7 | def test_config(): 8 | config = Config() 9 | config.set_workdir('tests') 10 | config.engines.append(RegexEngine) 11 | config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/1.conf']) 12 | config.output = Output(type='json', path='tests/1.json') 13 | config.set_global_exclusion_paths(['tests/fixtures/1.conf']) 14 | 15 | exception = None 16 | try: 17 | config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/0.conf']) 18 | except FileNotFoundException as e: 19 | exception = e 20 | 21 | assert exception is not None 22 | 23 | assert config.workdir_path == '/app/tests' 24 | assert len(config.engines) == 1 25 | assert len(config.rulesets) == 1 26 | assert config.rulesets[RegexRulesetBuilder] == ['/app/tests/fixtures/1.conf'] -------------------------------------------------------------------------------- /tests/core/engines/hashed_secret/test_hs.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pytest 4 | 5 | from deepsecrets.core.engines.hashed_secret import HashedSecretEngine 6 | from deepsecrets.core.engines.regex import RegexEngine 7 | from deepsecrets.core.model.file import File 8 | from deepsecrets.core.model.finding import Finding 9 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule 10 | from deepsecrets.core.model.token import Token 11 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder 12 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 13 | 14 | 15 | @pytest.fixture(scope='module') 16 | def file(): 17 | path = 'tests/fixtures/1.py' 18 | return File(path=path, relative_path=path) 19 | 20 | 21 | @pytest.fixture(scope='module') 22 | def engine(): 23 | builder = HashedSecretsRulesetBuilder() 24 | builder.with_rules_from_file('tests/fixtures/hashed_secrets.json') 25 | return HashedSecretEngine(ruleset=builder.rules) 26 | 27 | 28 | def test_1(file: File, engine: RegexEngine): 29 | findings: List[Finding] = [] 30 | tokens: List[Token] = LexerTokenizer(deep_token_inspection=True).tokenize(file) 31 | for token in tokens: 32 | findings.extend(engine.search(token)) 33 | 34 | assert len(findings) == 1 35 | assert isinstance(findings[0].rules[0], HashedSecretRule) 36 | assert findings[0].rules[0].hashed_val == '8c535f99d6d0fa55b64af0fae6e3b6829eda413b' 37 | 38 | 39 | def test_2(engine: HashedSecretEngine): 40 | rules = engine.ruleset 41 | 42 | assert rules[0] == rules[0] 43 | assert rules[1] == rules[1] 44 | assert rules[1] != rules[0] 45 | -------------------------------------------------------------------------------- /tests/core/engines/regex/test_regex.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pytest 4 | 5 | from deepsecrets.core.engines.regex import RegexEngine 6 | from deepsecrets.core.model.file import File 7 | from deepsecrets.core.model.finding import Finding, FindingMerger, FindingResponse 8 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder 9 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer 10 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 11 | from deepsecrets.core.utils.fs import get_path_inside_package 12 | 13 | 14 | @pytest.fixture(scope='module') 15 | def file(): 16 | path = 'tests/fixtures/regex_checks.txt' 17 | return File(path=path, relative_path=path) 18 | 19 | @pytest.fixture(scope='module') 20 | def file_extless(): 21 | path = 'tests/fixtures/extless/radius' 22 | return File(path=path, relative_path=path) 23 | 24 | @pytest.fixture(scope='module') 25 | def file_go_7(): 26 | path = 'tests/fixtures/7.go' 27 | return File(path=path, relative_path=path) 28 | 29 | 30 | @pytest.fixture(scope='module') 31 | def regex_engine(): 32 | builder = RegexRulesetBuilder() 33 | builder.with_rules_from_file(get_path_inside_package('rules/regexes.json')) 34 | return RegexEngine(ruleset=builder.rules) 35 | 36 | 37 | def test_1(file: File, regex_engine: RegexEngine): 38 | findings: List[Finding] = [] 39 | tokens = FullContentTokenizer().tokenize(file) 40 | for token in tokens: 41 | token_findings = regex_engine.search(token) 42 | for finding in token_findings: 43 | finding.map_on_file(file=file, relative_start=token.span[0]) 44 | findings.append(finding) 45 | 46 | for finding in findings: 47 | finding.map_on_file(file=file, relative_start=finding.start_pos) 48 | finding.choose_final_rule() 49 | 50 | assert len(findings) == 9 51 | assert findings[0].rules[0].id == 'S0' 52 | assert findings[1].rules[0].id == 'S0' 53 | assert findings[2].rules[0].id == 'S1' 54 | assert findings[3].rules[0].id == 'S2' 55 | assert findings[4].rules[0].id == 'S3' 56 | assert findings[5].rules[0].id == 'S4' 57 | assert findings[6].rules[0].id == 'S5' 58 | assert findings[7].rules[0].id == 'S19' 59 | assert findings[8].rules[0].id == 'S19' 60 | 61 | findings = FindingMerger(findings).merge() 62 | assert len(findings) == 9 63 | 64 | response = FindingResponse.from_list(findings) 65 | 66 | 67 | def test_extless(file_extless: File, regex_engine: RegexEngine): 68 | findings: List[Finding] = [] 69 | tokens = FullContentTokenizer().tokenize(file_extless) 70 | tokens_lex = LexerTokenizer(deep_token_inspection=True).tokenize(file_extless) 71 | 72 | for token in tokens: 73 | token_findings = regex_engine.search(token) 74 | for finding in token_findings: 75 | finding.map_on_file(file=file_extless, relative_start=token.span[0]) 76 | findings.append(finding) 77 | 78 | for finding in findings: 79 | finding.map_on_file(file=file_extless, relative_start=finding.start_pos) 80 | finding.choose_final_rule() 81 | 82 | assert len(findings) == 1 83 | assert findings[0].rules[0].id == 'S28' 84 | 85 | 86 | 87 | def test_go_7(file_go_7: File, regex_engine: RegexEngine): 88 | findings: List[Finding] = [] 89 | tokens = FullContentTokenizer().tokenize(file_go_7) 90 | 91 | for token in tokens: 92 | token_findings = regex_engine.search(token) 93 | for finding in token_findings: 94 | finding.map_on_file(file=file_go_7, relative_start=token.span[0]) 95 | findings.append(finding) 96 | 97 | for finding in findings: 98 | finding.map_on_file(file=file_go_7, relative_start=finding.start_pos) 99 | finding.choose_final_rule() 100 | 101 | assert len(findings) == 0 102 | -------------------------------------------------------------------------------- /tests/core/engines/semantic/test_semantic.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import pytest 3 | 4 | from deepsecrets.core.engines.semantic import SemanticEngine 5 | from deepsecrets.core.model.file import File 6 | from deepsecrets.core.model.finding import Finding, FindingMerger 7 | from deepsecrets.core.model.token import SemanticType 8 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 9 | 10 | 11 | @pytest.fixture(scope='module') 12 | def file() -> File: 13 | path = 'tests/fixtures/4.py' 14 | return File(path=path, relative_path=path) 15 | 16 | @pytest.fixture(scope='module') 17 | def file_json_2() -> File: 18 | path = 'tests/fixtures/2.json' 19 | return File(path=path, relative_path=path) 20 | 21 | @pytest.fixture(scope='module') 22 | def file_toml_1() -> File: 23 | path = 'tests/fixtures/1.toml' 24 | return File(path=path, relative_path=path) 25 | 26 | @pytest.fixture(scope='module') 27 | def file_toml_2() -> File: 28 | path = 'tests/fixtures/2.toml' 29 | return File(path=path, relative_path=path) 30 | 31 | @pytest.fixture(scope='module') 32 | def file_sh_2() -> File: 33 | path = 'tests/fixtures/2.sh' 34 | return File(path=path, relative_path=path) 35 | 36 | @pytest.fixture(scope='module') 37 | def file_html_1() -> File: 38 | path = 'tests/fixtures/1.html' 39 | return File(path=path, relative_path=path) 40 | 41 | 42 | def test_1_semantic_engine(file: File): 43 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file) 44 | assert len(tokens) == 13 45 | 46 | assert tokens[3].semantic.type == SemanticType.VAR 47 | assert tokens[3].semantic.name == 'pass' 48 | 49 | engine = SemanticEngine(subengine=None) 50 | findings = engine.search(tokens[3]) 51 | assert len(findings) == 1 52 | assert findings[0].rules[0].name == 'Var naming' 53 | 54 | 55 | def test_2_semantic_engine(file_json_2: File): 56 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_json_2) 57 | assert len(tokens) == 6 58 | 59 | assert tokens[0].semantic.type == SemanticType.VAR 60 | assert tokens[0].semantic.name == 'access_Token' 61 | 62 | assert tokens[1].semantic.type == SemanticType.VAR 63 | assert tokens[1].semantic.name == 'accessToken' 64 | 65 | engine = SemanticEngine(subengine=None) 66 | 67 | findings = [] 68 | for token in tokens: 69 | findings.extend(engine.search(token)) 70 | 71 | assert len(findings) == 3 72 | assert findings[0].rules[0].name == 'Entropy+Var naming' 73 | assert findings[1].rules[0].name == 'Entropy+Var naming' 74 | assert findings[2].rules[0].name == 'Var naming' 75 | 76 | 77 | 78 | def test_3_semantic_engine(file_toml_1: File): 79 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_toml_1) 80 | assert len(tokens) == 51 81 | 82 | assert tokens[50].semantic.type == SemanticType.VAR 83 | assert tokens[50].semantic.name == 'MATTERMOST_BOT_TOKEN' 84 | 85 | engine = SemanticEngine(subengine=None) 86 | 87 | findings = [] 88 | for token in tokens: 89 | findings.extend(engine.search(token)) 90 | 91 | assert len(findings) == 2 92 | assert findings[0].rules[0].name == 'Var naming' 93 | assert findings[1].rules[0].name == 'Var naming' 94 | 95 | 96 | 97 | def test_4_semantic_engine(file_toml_2: File): 98 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_toml_2) 99 | assert len(tokens) == 13 100 | 101 | engine = SemanticEngine(subengine=None) 102 | 103 | findings = [] 104 | findings.extend(engine.search(tokens[4])) 105 | findings.extend(engine.search(tokens[10])) 106 | findings.extend(engine.search(tokens[12])) 107 | 108 | assert len(findings) == 1 109 | assert findings[0].rules[0].name == 'Var naming' 110 | 111 | 112 | def test_5_semantic_engine(file_sh_2: File): 113 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_sh_2) 114 | assert len(tokens) == 16 115 | 116 | engine = SemanticEngine(subengine=None) 117 | 118 | findings: List[Finding] = [] 119 | for token in tokens: 120 | findings.extend(engine.search(token)) 121 | 122 | for finding in findings: 123 | finding.map_on_file(file=file_sh_2, relative_start=finding.start_pos) 124 | finding.choose_final_rule() 125 | 126 | 127 | findings = FindingMerger(findings).merge() 128 | assert len(findings) == 1 129 | assert findings[0].final_rule.name == 'Dangerous condition' 130 | 131 | 132 | def test_6_semantic_engine(file_html_1: File): 133 | tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_html_1) 134 | #assert len(tokens) == 16 135 | 136 | engine = SemanticEngine(subengine=None) 137 | 138 | findings: List[Finding] = [] 139 | for token in tokens: 140 | findings.extend(engine.search(token)) 141 | 142 | for finding in findings: 143 | finding.map_on_file(file=file_html_1, relative_start=finding.start_pos) 144 | finding.choose_final_rule() 145 | 146 | 147 | findings = FindingMerger(findings).merge() 148 | assert len(findings) == 0 -------------------------------------------------------------------------------- /tests/core/helpers/test_content_analyzer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.helpers.content_analyzer import ContentAnalyzer 4 | from deepsecrets.core.model.file import File 5 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer 6 | 7 | STR = 'hellofrominsidethebase64' 8 | BASE_64_STR = 'aGVsbG9mcm9taW5zaWRldGhlYmFzZTY0' 9 | 10 | 11 | @pytest.fixture(scope='module') 12 | def file() -> File: 13 | path = 'test.txt' 14 | return File(path=path, relative_path=path, content=BASE_64_STR) 15 | 16 | 17 | def test_semantic_engine(file: File): 18 | tokens = FullContentTokenizer().tokenize(file) 19 | assert len(tokens) == 1 20 | 21 | token = tokens[0] 22 | assert len(token.uncovered_content) == 0 23 | 24 | ContentAnalyzer(engine=None).analyze(token) 25 | assert len(token.uncovered_content) == 1 26 | assert token.uncovered_content[0] == STR 27 | -------------------------------------------------------------------------------- /tests/core/helpers/test_entropy.py: -------------------------------------------------------------------------------- 1 | from deepsecrets.core.helpers.entropy import EntropyHelper 2 | 3 | 4 | def test_high_entropy(): 5 | test_string = 'qwertyuiopasdfghjklzxcvbnm,123456789' 6 | entropy = EntropyHelper.get_for_string(test_string) 7 | 8 | assert 5.16 <= entropy <= 5.17 9 | 10 | 11 | def test_some_entropy(): 12 | test_string = 'hello and very warm welcome, let\'s get the party started' 13 | entropy = EntropyHelper.get_for_string(test_string) 14 | 15 | assert 3.91 <= entropy <= 3.92 16 | 17 | 18 | def test_password_entropy(): 19 | test_string = 'v3ry$tongp@ssw0rd' 20 | entropy = EntropyHelper.get_for_string(test_string) 21 | 22 | assert 3.85 <= entropy <= 3.86 23 | 24 | 25 | # Oops, it seems like the password has less entropy that a statement 26 | -------------------------------------------------------------------------------- /tests/core/model/test_file.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | 5 | LINE_BREAK = '\n' 6 | 7 | 8 | @pytest.fixture(scope='module') 9 | def model() -> File: 10 | path = 'tests/fixtures/4.go' 11 | return File(path=path, relative_path=path) 12 | 13 | 14 | def test_basic_info(model): 15 | assert model.path == '/app/tests/fixtures/4.go' 16 | assert model.relative_path == 'tests/fixtures/4.go' 17 | assert model.extension == 'go' 18 | assert model.length == 395 19 | assert len(model.line_offsets) == 15 20 | 21 | 22 | def test_line_offsets(model): 23 | assert model.line_offsets[1] == (0, 48) 24 | assert model.content[48] == LINE_BREAK 25 | 26 | assert model.line_offsets[2] == (49, 152) 27 | assert model.content[152] == LINE_BREAK 28 | 29 | assert model.line_offsets[3] == (153, 154) 30 | assert model.content[154] == LINE_BREAK 31 | 32 | assert model.line_offsets[4] == (155, 194) 33 | assert model.content[194] == LINE_BREAK 34 | 35 | assert model.line_offsets[5] == (195, 240) 36 | assert model.content[240] == LINE_BREAK 37 | 38 | assert model.line_offsets[6] == (241, 293) 39 | assert model.content[293] == LINE_BREAK 40 | 41 | assert model.line_offsets[7] == (294, 294) 42 | assert model.content[294] == LINE_BREAK 43 | 44 | assert model.line_offsets[8] == (295, 311) 45 | assert model.content[311] == LINE_BREAK 46 | 47 | assert model.line_offsets[9] == (312, 325) 48 | assert model.content[325] == LINE_BREAK 49 | 50 | assert model.line_offsets[10] == (326, 328) 51 | assert model.content[328] == LINE_BREAK 52 | 53 | assert model.line_offsets[11] == (329, 358) 54 | assert model.content[358] == LINE_BREAK 55 | 56 | assert model.line_offsets[12] == (359, 375) 57 | assert model.content[375] == LINE_BREAK 58 | 59 | assert model.line_offsets[13] == (376, 389) 60 | assert model.content[389] == LINE_BREAK 61 | 62 | assert model.line_offsets[14] == (390, 392) 63 | assert model.content[392] == LINE_BREAK 64 | 65 | assert model.line_offsets[15] == (393, 394) 66 | assert model.content[394] == LINE_BREAK 67 | 68 | assert ( 69 | model.content[-1] 70 | == model.content[model.length - 1] 71 | == model.content[394] 72 | == '\n' 73 | ) 74 | 75 | 76 | def test_caching(model: File): 77 | LINUM = 4 78 | line_contents = model.get_line_contents(LINUM) 79 | assert line_contents == '''\ttest2 := os.Getenv(`TEST_TEST`, "lol")''' 80 | assert model.line_contents_cache[LINUM] == line_contents 81 | 82 | 83 | def test_get_full_line_for_position(model: File): 84 | POSITION = 94 85 | projected_line_number = 2 86 | line_contents = model.get_full_line_for_position(POSITION) 87 | assert ( 88 | line_contents 89 | == '\tos.Setenv("RABBITMQ_URL", "amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp")' 90 | ) 91 | assert projected_line_number in model.line_contents_cache.keys() 92 | 93 | 94 | def test_get_line_number(model: File): 95 | POSITION = 94 96 | projected_line_number = 2 97 | line_number = model.get_line_number(POSITION) 98 | assert line_number == projected_line_number 99 | 100 | 101 | def test_1_span_for_string(model: File): 102 | looking_for = 'rabbitmq-esp01' 103 | span = model.get_span_for_string(looking_for) 104 | assert span == (109, 123) 105 | 106 | 107 | def test_2_span_for_string(model: File): 108 | looking_for = 'rabbitmq-esp01' 109 | span = model.get_span_for_string(looking_for, between=(130, 150)) 110 | assert span is None 111 | -------------------------------------------------------------------------------- /tests/core/model/test_finding.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.model.finding import Finding 5 | from deepsecrets.core.model.rules.rule import Rule 6 | from deepsecrets.core.model.token import Token 7 | 8 | TEST_TOKEN_CONTENTS = ( 9 | '"amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp"' 10 | ) 11 | TOKEN_SPAN = (76, 151) 12 | 13 | FINDING_CONTENT = 'TESTSECRET1234' 14 | FINDING_SPAN_INSIDE_TOKEN = (18, 32) 15 | 16 | 17 | @pytest.fixture(scope='module') 18 | def file() -> File: 19 | path = 'tests/fixtures/4.go' 20 | return File(path=path, relative_path=path) 21 | 22 | 23 | @pytest.fixture(scope='module') 24 | def rule() -> Rule: 25 | return Rule(id='test') 26 | 27 | 28 | @pytest.fixture(scope='module') 29 | def token(file: File) -> Token: 30 | return Token( 31 | file=file, 32 | content=TEST_TOKEN_CONTENTS, 33 | span=file.get_span_for_string(TEST_TOKEN_CONTENTS), 34 | ) 35 | 36 | 37 | def test_1_finding(file: File, token: Token, rule: Rule): 38 | assert file.content[token.span[0] : token.span[1]] == TEST_TOKEN_CONTENTS 39 | 40 | new_finding = Finding( 41 | file=file, 42 | rules=[rule], 43 | start_pos=FINDING_SPAN_INSIDE_TOKEN[0], 44 | end_pos=FINDING_SPAN_INSIDE_TOKEN[1], 45 | detection=token.content[ 46 | FINDING_SPAN_INSIDE_TOKEN[0] : FINDING_SPAN_INSIDE_TOKEN[1] 47 | ], 48 | ) 49 | 50 | assert new_finding.detection == FINDING_CONTENT 51 | new_finding.map_on_file(relative_start=token.span[0]) 52 | 53 | assert new_finding.start_pos == TOKEN_SPAN[0] + FINDING_SPAN_INSIDE_TOKEN[0] 54 | assert new_finding.end_pos == TOKEN_SPAN[0] + FINDING_SPAN_INSIDE_TOKEN[1] 55 | -------------------------------------------------------------------------------- /tests/core/model/test_token.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.model.semantic import Variable 5 | from deepsecrets.core.model.token import Semantic, SemanticType, Token 6 | 7 | TEST_TOKEN_CONTENTS = ( 8 | '"amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp"' 9 | ) 10 | TOKEN_SPAN = (76, 151) 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def file() -> File: 15 | path = 'tests/fixtures/4.go' 16 | return File(path=path, relative_path=path) 17 | 18 | 19 | def test_token(file: File): 20 | token = Token( 21 | file=file, 22 | content=TEST_TOKEN_CONTENTS, 23 | span=file.get_span_for_string(TEST_TOKEN_CONTENTS), 24 | ) 25 | 26 | assert token.span == TOKEN_SPAN 27 | assert token.length == 75 28 | assert token.semantic is None 29 | assert len(token.type) == 0 30 | 31 | 32 | def test_semantic_token(file: File): 33 | token = Token( 34 | file=file, 35 | content=TEST_TOKEN_CONTENTS, 36 | span=file.get_span_for_string(TEST_TOKEN_CONTENTS), 37 | ) 38 | 39 | token.set_type(['Variable']) 40 | variable = Variable() 41 | variable.name = token 42 | variable.value = token 43 | 44 | token.semantic = Semantic(type=SemanticType.VAR, name=variable.name.content) 45 | 46 | assert token.span == TOKEN_SPAN 47 | assert token.length == 75 48 | assert token.semantic is not None 49 | assert len(token.type) == 1 50 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_conf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_toml_1(): 9 | path = 'tests/fixtures/1.toml' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def file_json_1(): 15 | path = 'tests/fixtures/1.json' 16 | return File(path=path, relative_path=path) 17 | 18 | @pytest.fixture(scope='module') 19 | def file_json_2_broken(): 20 | path = 'tests/fixtures/2.json' 21 | return File(path=path, relative_path=path) 22 | 23 | 24 | @pytest.fixture(scope='module') 25 | def file_yaml_1(): 26 | path = 'tests/fixtures/1.yaml' 27 | return File(path=path, relative_path=path) 28 | 29 | @pytest.fixture(scope='module') 30 | def file_yml_1(): 31 | path = 'tests/fixtures/1.yml' 32 | return File(path=path, relative_path=path) 33 | 34 | 35 | @pytest.fixture(scope='module') 36 | def file_ini_1(): 37 | path = 'tests/fixtures/1.ini' 38 | return File(path=path, relative_path=path) 39 | 40 | 41 | @pytest.fixture(scope='module') 42 | def file_pp_1(): 43 | path = 'tests/fixtures/1.pp' 44 | return File(path=path, relative_path=path) 45 | 46 | 47 | def test_1(file_toml_1): 48 | lex = LexerTokenizer(deep_token_inspection=True) 49 | lex.tokenize(file_toml_1, post_filter=False) 50 | 51 | variables = lex.get_variables() 52 | assert len(variables) == 50 53 | 54 | 55 | def test_2(file_json_1): 56 | lex = LexerTokenizer(deep_token_inspection=True) 57 | lex.tokenize(file_json_1, post_filter=False) 58 | 59 | variables = lex.get_variables() 60 | assert len(variables) == 1 61 | 62 | 63 | def test_3(file_yaml_1): 64 | lex = LexerTokenizer(deep_token_inspection=True) 65 | lex.tokenize(file_yaml_1, post_filter=False) 66 | 67 | variables = lex.get_variables() 68 | assert len(variables) == 4 69 | 70 | 71 | def test_4(file_ini_1): 72 | lex = LexerTokenizer(deep_token_inspection=True) 73 | lex.tokenize(file_ini_1, post_filter=False) 74 | 75 | variables = lex.get_variables() 76 | assert len(variables) == 9 77 | 78 | 79 | def test_5(file_pp_1): 80 | lex = LexerTokenizer(deep_token_inspection=True) 81 | lex.tokenize(file_pp_1, post_filter=False) 82 | 83 | variables = lex.get_variables() 84 | assert len(variables) == 37 85 | 86 | 87 | def test_6(file_json_2_broken): 88 | lex = LexerTokenizer(deep_token_inspection=True) 89 | lex.tokenize(file_json_2_broken, post_filter=False) 90 | 91 | variables = lex.get_variables() 92 | assert len(variables) == 6 93 | 94 | 95 | def test_7(file_yml_1): 96 | lex = LexerTokenizer(deep_token_inspection=True) 97 | lex.tokenize(file_yml_1, post_filter=False) 98 | 99 | variables = lex.get_variables() 100 | assert len(variables) == 1 101 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_cs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_cs_1(): 9 | path = 'tests/fixtures/1.cs' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_1(file_cs_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | lex.tokenize(file_cs_1, post_filter=False) 16 | 17 | variables = lex.get_variables() 18 | assert len(variables) == 9 19 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_go.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_go_1(): 9 | path = 'tests/fixtures/1.go' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def file_go_2(): 15 | path = 'tests/fixtures/2.go' 16 | return File(path=path, relative_path=path) 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def file_go_3(): 21 | path = 'tests/fixtures/3.go' 22 | return File(path=path, relative_path=path) 23 | 24 | 25 | @pytest.fixture(scope='module') 26 | def file_go_4(): 27 | path = 'tests/fixtures/4.go' 28 | return File(path=path, relative_path=path) 29 | 30 | 31 | @pytest.fixture(scope='module') 32 | def file_go_5(): 33 | path = 'tests/fixtures/5.go' 34 | return File(path=path, relative_path=path) 35 | 36 | 37 | @pytest.fixture(scope='module') 38 | def file_go_6(): 39 | path = 'tests/fixtures/6.go' 40 | return File(path=path, relative_path=path) 41 | 42 | 43 | @pytest.fixture(scope='module') 44 | def file_go_7(): 45 | path = 'tests/fixtures/7.go' 46 | return File(path=path, relative_path=path) 47 | 48 | 49 | def test_1(file_go_1): 50 | lex = LexerTokenizer(deep_token_inspection=True) 51 | tokens = lex.tokenize(file_go_1, post_filter=False) 52 | variables = lex.get_variables(tokens) 53 | assert len(variables) == 65 54 | 55 | 56 | def test_2(file_go_2): 57 | lex = LexerTokenizer(deep_token_inspection=True) 58 | lex.tokenize(file_go_2, post_filter=False) 59 | variables = lex.get_variables() 60 | assert len(variables) == 86 61 | 62 | 63 | def test_3(file_go_3): 64 | lex = LexerTokenizer(deep_token_inspection=True) 65 | lex.tokenize(file_go_3, post_filter=False) 66 | variables = lex.get_variables() 67 | assert len(variables) == 2 68 | 69 | 70 | def test_4(file_go_4): 71 | lex = LexerTokenizer(deep_token_inspection=True) 72 | lex.tokenize(file_go_4, post_filter=False) 73 | variables = lex.get_variables() 74 | assert len(variables) == 2 75 | 76 | 77 | def test_5(file_go_5): 78 | lex = LexerTokenizer(deep_token_inspection=True) 79 | lex.tokenize(file_go_5, post_filter=False) 80 | variables = lex.get_variables() 81 | assert len(variables) == 1 82 | 83 | 84 | def test_6(file_go_6): 85 | lex = LexerTokenizer(deep_token_inspection=True) 86 | lex.tokenize(file_go_6, post_filter=False) 87 | variables = lex.get_variables() 88 | assert len(variables) == 4 89 | 90 | 91 | def test_7(file_go_7): 92 | lex = LexerTokenizer(deep_token_inspection=True) 93 | lex.tokenize(file_go_7, post_filter=False) 94 | variables = lex.get_variables() 95 | assert len(variables) == 1 96 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_html.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_html_1(): 9 | path = 'tests/fixtures/1.html' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_1(file_html_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | lex.tokenize(file_html_1, post_filter=False) 16 | 17 | variables = lex.get_variables() 18 | assert len(variables) == 32 19 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_java.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_java_1(): 9 | path = 'tests/fixtures/1.java' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_1(file_java_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | lex.tokenize(file_java_1, post_filter=False) 16 | 17 | variables = lex.get_variables() 18 | assert len(variables) == 2 19 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_js.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_js_3(): 9 | path = 'tests/fixtures/3.js' 10 | return File(path=path, relative_path=path) 11 | 12 | @pytest.fixture(scope='module') 13 | def file_jsx_1(): 14 | path = 'tests/fixtures/1.jsx' 15 | return File(path=path, relative_path=path) 16 | 17 | @pytest.fixture(scope='module') 18 | def file_jsx_2(): 19 | path = 'tests/fixtures/2.jsx' 20 | return File(path=path, relative_path=path) 21 | 22 | @pytest.fixture(scope='module') 23 | def file_jsx_3(): 24 | path = 'tests/fixtures/3.jsx' 25 | return File(path=path, relative_path=path) 26 | 27 | @pytest.fixture(scope='module') 28 | def file_js_4(): 29 | path = 'tests/fixtures/4.js' 30 | return File(path=path, relative_path=path) 31 | 32 | 33 | 34 | def test_1(file_js_3): 35 | lex = LexerTokenizer(deep_token_inspection=True) 36 | tokens = lex.tokenize(file_js_3, post_filter=True) 37 | assert lex.lexer.name == 'react' 38 | 39 | variables = lex.get_variables(tokens) 40 | assert len(variables) == 2 41 | 42 | 43 | def test_2_jsx(file_jsx_1): 44 | lex = LexerTokenizer(deep_token_inspection=True) 45 | tokens = lex.tokenize(file_jsx_1, post_filter=True) 46 | assert lex.lexer.name == 'react' 47 | 48 | variables = lex.get_variables(tokens) 49 | assert len(variables) == 1 50 | 51 | 52 | def test_3_jsx(file_jsx_2): 53 | lex = LexerTokenizer(deep_token_inspection=True) 54 | tokens = lex.tokenize(file_jsx_2, post_filter=True) 55 | assert lex.lexer.name == 'react' 56 | 57 | variables = lex.get_variables(tokens) 58 | assert len(variables) == 0 59 | 60 | 61 | def test_4_jsx(file_jsx_3): 62 | lex = LexerTokenizer(deep_token_inspection=True) 63 | tokens = lex.tokenize(file_jsx_3, post_filter=True) 64 | assert lex.lexer.name == 'react' 65 | 66 | variables = lex.get_variables(tokens) 67 | assert len(variables) == 0 68 | 69 | 70 | def test_5_js(file_js_4): 71 | lex = LexerTokenizer(deep_token_inspection=True) 72 | tokens = lex.tokenize(file_js_4, post_filter=True) 73 | 74 | variables = lex.get_variables(tokens) 75 | assert len(variables) == 0 -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_php.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_php_1(): 9 | path = 'tests/fixtures/1.php' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_1(file_php_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | lex.tokenize(file_php_1, post_filter=False) 16 | 17 | variables = lex.get_variables() 18 | assert len(variables) == 12 19 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_py.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_py_1(): 9 | path = 'tests/fixtures/1.py' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | @pytest.fixture(scope='module') 14 | def file_py_2(): 15 | path = 'tests/fixtures/2.py' 16 | return File(path=path, relative_path=path) 17 | 18 | 19 | @pytest.fixture(scope='module') 20 | def file_py_3(): 21 | path = 'tests/fixtures/3.py' 22 | return File(path=path, relative_path=path) 23 | 24 | 25 | @pytest.fixture(scope='module') 26 | def file_py_4(): 27 | path = 'tests/fixtures/4.py' 28 | return File(path=path, relative_path=path) 29 | 30 | 31 | def test_1(file_py_1): 32 | lex = LexerTokenizer(deep_token_inspection=True) 33 | lex.tokenize(file_py_1, post_filter=False) 34 | variables = lex.get_variables() 35 | assert len(variables) == 5 36 | 37 | 38 | def test_2(file_py_2): 39 | lex = LexerTokenizer(deep_token_inspection=True) 40 | lex.tokenize(file_py_2, post_filter=False) 41 | variables = lex.get_variables() 42 | assert len(variables) == 92 43 | 44 | 45 | def test_3(file_py_3): 46 | lex = LexerTokenizer(deep_token_inspection=True) 47 | lex.tokenize(file_py_3, post_filter=False) 48 | 49 | variables = lex.get_variables() 50 | assert len(variables) == 3 51 | assert variables[1].semantic.name == 'password' 52 | assert variables[1].content == 'TESTSECRET1234' 53 | 54 | assert variables[2].semantic.name == 'pwd' 55 | assert variables[2].content == '2TESTSECRET1234' 56 | 57 | 58 | def test_4(file_py_4): 59 | lex = LexerTokenizer(deep_token_inspection=True) 60 | lex.tokenize(file_py_4, post_filter=False) 61 | 62 | variables = lex.get_variables() 63 | assert len(variables) == 11 64 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_sh.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_sh_1(): 9 | path = 'tests/fixtures/1.sh' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_1(file_sh_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | lex.tokenize(file_sh_1, post_filter=False) 16 | 17 | variables = lex.get_variables() 18 | assert len(variables) == 7 19 | -------------------------------------------------------------------------------- /tests/core/tokenizers/lexer/variable_detection/test_swift.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_swift_1(): 9 | path = 'tests/fixtures/1.swift' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_suppress(file_swift_1): 14 | lex = LexerTokenizer(deep_token_inspection=True) 15 | vars = lex.tokenize(file_swift_1, post_filter=True) 16 | 17 | assert len(vars) == 0 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tests/core/tokenizers/test_full_content.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_toml_1(): 9 | path = 'tests/fixtures/1.toml' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_full_content(file_toml_1: File): 14 | tokenizer = FullContentTokenizer() 15 | tokens = tokenizer.tokenize(file=file_toml_1) 16 | assert len(tokens) == 1 17 | assert tokens[0].content == file_toml_1.content 18 | -------------------------------------------------------------------------------- /tests/core/tokenizers/test_per_line.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.model.file import File 4 | from deepsecrets.core.tokenizers import PerLineTokenizer 5 | 6 | 7 | @pytest.fixture(scope='module') 8 | def file_toml_1(): 9 | path = 'tests/fixtures/1.toml' 10 | return File(path=path, relative_path=path) 11 | 12 | 13 | def test_per_line(file_toml_1: File): 14 | tokenizer = PerLineTokenizer() 15 | tokens = tokenizer.tokenize(file=file_toml_1) 16 | assert len(tokens) == 76 17 | -------------------------------------------------------------------------------- /tests/core/utils/test_file_analyzer.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.core.engines.semantic import SemanticEngine 4 | from deepsecrets.core.model.file import File 5 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer 6 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer 7 | 8 | 9 | @pytest.fixture(scope='module') 10 | def file_toml_1(): 11 | path = 'tests/fixtures/1.toml' 12 | return File(path=path, relative_path=path) 13 | 14 | 15 | def test_file_analyzer(file_toml_1): 16 | file_analyzer = FileAnalyzer(file_toml_1) 17 | lex = LexerTokenizer(deep_token_inspection=True) 18 | semantic_engine = SemanticEngine(subengine=None) 19 | file_analyzer.add_engine(engine=semantic_engine, tokenizers=[lex]) 20 | 21 | findings = file_analyzer.process() 22 | assert findings is not None 23 | -------------------------------------------------------------------------------- /tests/core/utils/test_fs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/tests/core/utils/test_fs.py -------------------------------------------------------------------------------- /tests/core/utils/test_lexer_finder.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from deepsecrets.core.model.file import File 3 | from deepsecrets.core.utils.lexer_finder import LexerFinder 4 | 5 | 6 | @pytest.fixture(scope='module') 7 | def file_extless_json(): 8 | path = 'tests/fixtures/extless/json' 9 | return File(path=path, relative_path=path) 10 | 11 | @pytest.fixture(scope='module') 12 | def file_js_react(): 13 | path = 'tests/fixtures/3.js' 14 | return File(path=path, relative_path=path) 15 | 16 | 17 | 18 | def test_extless_json(file_extless_json): 19 | lf = LexerFinder() 20 | lexer = lf.find(file_extless_json) 21 | 22 | assert lexer.name == 'JSON' 23 | 24 | def test_js_react(file_js_react): 25 | lf = LexerFinder() 26 | lexer = lf.find(file_js_react) 27 | 28 | assert lexer.name == 'react' -------------------------------------------------------------------------------- /tests/fixtures/1.conf: -------------------------------------------------------------------------------- 1 | imap = { 2 | 'host': 'example.test.com', 3 | 'username': 'loremipsum@test.com', 4 | 'password': 'passwordstrongbutleaked' 5 | } -------------------------------------------------------------------------------- /tests/fixtures/1.cs: -------------------------------------------------------------------------------- 1 | string name = "John"; 2 | int myNum = 15; 3 | string myText; 4 | myText = "Hello"; 5 | 6 | var people = new Dictionary() 7 | { 8 | { 5, "Tom"}, 9 | { 3, "Sam"}, 10 | { 11, "Bob"} 11 | }; 12 | 13 | var people = new Dictionary() 14 | { 15 | [5] = "Tom", // to be covered 16 | [6] = "Sam", // to be covered 17 | [7] = "Bob" // to be covered 18 | }; 19 | 20 | 21 | var mike = new KeyValuePair(56, "Mike"); // to be covered 22 | -------------------------------------------------------------------------------- /tests/fixtures/1.erb: -------------------------------------------------------------------------------- 1 | <% 2 | require 'digest/sha1' 3 | actual_password = Digest::SHA1.hexdigest [scope['bareos::secret'], @director_password].join('') 4 | -%> 5 | Director { # define myself 6 | Name = "<%= scope['bareos::director'] %>" 7 | QueryFile = "/usr/lib/bareos/scripts/query.sql" 8 | Maximum Concurrent Jobs = 80 9 | Maximum Console Connections = 48 10 | Plugin Directory = /usr/lib/bareos/plugins 11 | Plugin Names = "python" 12 | Password = "<%= actual_password %>" # Console password 13 | Messages = "bareos:messages:daemon" 14 | Auditing = yes 15 | Optimize For Speed = yes 16 | } 17 | 18 | Catalog { 19 | Name = "<%= scope['bareos::catalog'] %>" 20 | dbdriver = "postgresql" 21 | dbname = "bareos" 22 | dbuser = "service_bareos_production_01" 23 | dbpassword = "asdfasdf" 24 | dbaddress = db19 25 | dbport = 6432 26 | Reconnect = yes 27 | } -------------------------------------------------------------------------------- /tests/fixtures/1.html: -------------------------------------------------------------------------------- 1 | {% extends 'labels/base.html' %} 2 | {% load static %} 3 | 4 | {% block contentcss %} 5 | 6 | {% endblock %} 7 | 8 | {% block content %} 9 |
10 | {% if session and not user.is_authenticated %} 11 | session: {{ session }} 12 | {% endif %} 13 |
14 |
15 |
16 |
17 |
Запрос 18 | 19 | {{ task.query_text }} 20 |
21 |
22 |
23 | {{ task.item_title }} 24 |
25 |
{{ task.item_category }}
26 | {% if task.item_img %} 27 | item image 28 | {% endif %} 29 |
{{ task.item_description|safe }}
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | 38 | 39 | 40 |
41 |
42 | {% endblock %} 43 | 44 | {% block contentjs %} 45 | 109 | {% endblock %} -------------------------------------------------------------------------------- /tests/fixtures/1.ini: -------------------------------------------------------------------------------- 1 | [auth] 2 | login = cchecker 3 | password = fbyuihqwjlkfr 4 | 5 | [api] 6 | jira = https://jr.example.com/rest 7 | cf = https://cf.example.com/rest 8 | 9 | [mail] 10 | to = security@example.com 11 | from = logs@example.com 12 | subject = JR|CF Keyword Alert 13 | send_debug = nice@example.com 14 | sender = http://prod.example.com:8888/service-email-sender 15 | -------------------------------------------------------------------------------- /tests/fixtures/1.java: -------------------------------------------------------------------------------- 1 | String name = "John"; 2 | 3 | Map map = new HashMap(); 4 | map.put("dog", "type of animal"); 5 | -------------------------------------------------------------------------------- /tests/fixtures/1.js: -------------------------------------------------------------------------------- 1 | const ATLASSIAN_USERNAME = process.env.ATLASSIAN_USERNAME || 'user'; 2 | const ATLASSIAN_PASSWORD = process.env.ATLASSIAN_PASSWORD || 'TESTSECRET1234'; -------------------------------------------------------------------------------- /tests/fixtures/1.json: -------------------------------------------------------------------------------- 1 | { 2 | "queue": { 3 | "connection": "amqp://guest:guest@10.10.11.104:32769/" 4 | } 5 | } -------------------------------------------------------------------------------- /tests/fixtures/1.jsx: -------------------------------------------------------------------------------- 1 | import Link from "@/components/Link"; 2 | import PageHeader from "@/components/PageHeader"; 3 | import Menu from "antd/lib/menu"; 4 | import PropTypes from "prop-types"; 5 | import React from "react"; 6 | 7 | import "./layout.less"; 8 | 9 | export default function Layout({ activeTab, children }) { 10 | return ( 11 |
12 |
13 | 14 |
15 | 16 | 17 | System Status 18 | 19 | 20 | RQ Status 21 | 22 | 23 | Outdated Queries 24 | 25 | 26 | {children} 27 |
28 |
29 |
30 | ); 31 | } 32 | 33 | Layout.propTypes = { 34 | activeTab: PropTypes.string, 35 | children: PropTypes.node, 36 | }; 37 | 38 | Layout.defaultProps = { 39 | activeTab: "system_status", 40 | children: null, 41 | }; -------------------------------------------------------------------------------- /tests/fixtures/1.php: -------------------------------------------------------------------------------- 1 | "bar", 33 | "bar" => "foo", 34 | ]; 35 | 36 | 37 | $array["foo"] = "test"; 38 | 39 | $array2 = array( 40 | "foo" => "bar", 41 | "bar" => "foo", 42 | ); 43 | 44 | $today = 'tuesday'; 45 | 46 | class CSP 47 | { 48 | public const GRAFANA_CSP_METRIC_BASE = 'products.example.security.csp.'; 49 | 50 | } 51 | 52 | 'smtp' => [ 53 | 'transport' => 'smtp', 54 | 'host' => env('MAIL_HOST', 'smtp.mailtrap.io'), 55 | 'port' => env('MAIL_PORT', 2525), 56 | 'encryption' => env('MAIL_ENCRYPTION', 'tls'), 57 | 'username' => env('MAIL_USERNAME', 'mailer'), 58 | 'password' => env('MAIL_PASSWORD', 'fjanflkdsanfkjdsanf'), 59 | 'timeout' => null, 60 | ] -------------------------------------------------------------------------------- /tests/fixtures/1.pp: -------------------------------------------------------------------------------- 1 | 2 | ### Users and groups ### 3 | # Each user should have primary group as username 4 | # Additional groups can be passed over $groups var 5 | 6 | ### 7 | # NOTICE for $sshaccess: 8 | # hash keys must specify full hostnames and must not contain ^ and $ - they are kind of added automatically 9 | # Example: 10 | # sshaccess => { 'deployer' => ['app00'] } ### regexp is /^deployer$/ and matches only deployer and not deployer-jessie 11 | # sshaccess => { '^deployer' => ['app00'] } ### regexp is /^^deployer$/ AND WILL MATCH NOTHING 12 | # sshaccess => { 'deployer.*' => ['app00'] } ### regexp is /^deployer.*$/ and matches both deployer and deployer-jessie. Also deployer-test, deployer-killallhumans and deployer-blah-blah-blah. 13 | ### 14 | 15 | define add_user_sshaccess ( 16 | $sshaccess, 17 | $usersshaccess='', 18 | $home='' 19 | ) { 20 | $username = $title 21 | if $home != '' { 22 | $realhome=$home 23 | } else { 24 | $realhome="/home/${username}" 25 | } 26 | include concat::setup 27 | unless defined(Concat["${realhome}/.ssh/authorized_keys"]) { 28 | concat { "${realhome}/.ssh/authorized_keys": 29 | owner => $username, 30 | group => $username, 31 | mode => '0600', 32 | } 33 | concat::fragment { "ssh_authorized_keys::${username}::header": 34 | target => "${realhome}/.ssh/authorized_keys", 35 | order => '00', 36 | content => "# This file is managed by Puppet\n", 37 | } 38 | } 39 | if ( $sshaccess != '' ) { 40 | if regexp_key_in_hash($sshaccess, $::hostname)==true { 41 | file {"${realhome}/.ssh/id_rsa": 42 | ensure => present, 43 | owner => $title, 44 | group => $title, 45 | mode => '0600', 46 | content => generate('/usr/local/sbin/generate-ssh-key', $::hostname, $username), 47 | } 48 | file {"${realhome}/.ssh/id_rsa.pub": 49 | ensure => present, 50 | owner => $title, 51 | group => $title, 52 | mode => '0644', 53 | content => generate('/usr/local/sbin/generate-ssh-key', $::hostname, $username, 1), 54 | } 55 | } else { 56 | file { "${realhome}/.ssh/id_rsa": 57 | ensure => absent 58 | } 59 | file { "${realhome}/.ssh/id_rsa.pub": 60 | ensure => absent 61 | } 62 | } 63 | concat::fragment { "ssh_authorized_keys::${username}::sshaccess": 64 | target => "${realhome}/.ssh/authorized_keys", 65 | order => '50', 66 | content => ssh_get_public_keys($sshaccess, $::hostname, $username), 67 | } 68 | } else { 69 | file { "${realhome}/.ssh/id_rsa": 70 | ensure => absent 71 | } 72 | file { "${realhome}/.ssh/id_rsa.pub": 73 | ensure => absent 74 | } 75 | } 76 | if ( $usersshaccess != '' ) { 77 | concat::fragment { "ssh_authorized_keys::${username}::usersshaccess": 78 | target => "${realhome}/.ssh/authorized_keys", 79 | order => '75', 80 | content => get_user_public_keys($usersshaccess, $::hostname, $username), 81 | } 82 | } 83 | } 84 | 85 | ## Adds a user and manages its keys and password 86 | define add_user ( 87 | $name, 88 | $password, 89 | $shell, 90 | $groups, 91 | $sshkeytype, 92 | $sshkey, 93 | $sshaccess='', 94 | $usersshaccess='', 95 | $home='', 96 | $sudoers='', 97 | $uid = undef, 98 | $managehome=true, 99 | $homemode='0751' 100 | ) { 101 | include concat::setup 102 | $username = $title 103 | if $groups == 'UNSET' { 104 | $real_groups = [$username,] 105 | } else { 106 | $real_groups = $groups 107 | } 108 | 109 | if $home == '' { 110 | $homedir = "/home/${username}" 111 | } else { 112 | $homedir = $home 113 | } 114 | 115 | group { $username: 116 | ensure => present, 117 | gid => $uid, 118 | } 119 | user { $username: 120 | comment => $name, 121 | home => $homedir, 122 | shell => $shell, 123 | uid => $uid, 124 | gid => $username, 125 | managehome => true, 126 | password => $password, 127 | groups => $real_groups, 128 | membership => inclusive, 129 | password_min_age => 99999, 130 | } 131 | if $managehome and ! defined(File[$homedir]){ 132 | file { $homedir: 133 | ensure => directory, 134 | mode => $homemode, 135 | owner => $username, 136 | group => $username, 137 | } 138 | } 139 | 140 | if ! defined(File["${homedir}/.ssh"]) { 141 | file { "${homedir}/.ssh": 142 | ensure => directory, 143 | mode => '0700', 144 | owner => $username, 145 | group => $username, 146 | } 147 | } 148 | 149 | unless defined(Concat["${homedir}/.ssh/authorized_keys"]) { 150 | concat { "${homedir}/.ssh/authorized_keys": 151 | owner => $username, 152 | group => $username, 153 | mode => '0600', 154 | } 155 | concat::fragment { "ssh_authorized_keys::${username}::header": 156 | target => "${homedir}/.ssh/authorized_keys", 157 | order => '00', 158 | content => "# This file is managed by Puppet\n", 159 | } 160 | } 161 | if ( $sshkey != '' ) { 162 | concat::fragment { "ssh_authorized_keys::${username}::personal": 163 | target => "${homedir}/.ssh/authorized_keys", 164 | order => '25', 165 | content => "${sshkeytype} ${sshkey} ${username}\n", 166 | } 167 | if ( $sshaccess != '' ) { 168 | notify {'You can use only one of $sshkey or $sshaccess variables in user_add class!': loglevel => error } 169 | } 170 | } else { 171 | if ( $sshaccess != '') { 172 | add_user_sshaccess{$username: 173 | home => $homedir, 174 | sshaccess => $sshaccess, 175 | usersshaccess => $usersshaccess, 176 | } 177 | } 178 | } 179 | if ( $sudoers != '' ) { 180 | sudoers::rules{$username: rules => $sudoers, } 181 | } 182 | } 183 | 184 | # Deletes user 185 | define del_user { 186 | $username = $title 187 | exec { "pkill -u ${username}": 188 | onlyif => "getent passwd ${username}", 189 | returns => [0, 1], 190 | } 191 | -> user { $username: 192 | ensure => absent, 193 | } 194 | -> group { $username: 195 | ensure => absent, 196 | } 197 | file { "/home/${username}/.ssh": 198 | ensure => absent, 199 | force => true, 200 | } 201 | } 202 | 203 | 204 | # Manages user 205 | class user_one( $groups = 'UNSET' ) { 206 | add_user { 'userone': 207 | name => 'John Doe', 208 | uid => '7005', 209 | password => '*', 210 | shell => '/bin/bash', 211 | groups => $groups, 212 | sshkeytype => 'ssh-rsa', 213 | sshkey => get_pubkey('pubkeys/jdoe.pub'), 214 | sudoers => [{command => 'ALL', nopasswd => true, user => 'exacron', nodes => 'crons', }, 215 | {command => '/usr/bin/puppet agent -t', nopasswd => true, nodes => ['crons']}, 216 | {command => 'ALL', user => 'postgres', nopasswd => true, nodes =>['crons','^app\d+', 'exa-sql','exa-sql-indexer[0-9]+','deployer','pg-int']}, 217 | {command => 'ALL', nopasswd => true, user => 'root', nodes => ['app00','exa-dwh15'],}, 218 | {command => '/usr/bin/strace', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']}, 219 | {command => '/bin/netstat', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']}, 220 | {command => '/bin/ss', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']}, 221 | {command => '/usr/bin/lsof', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']}, 222 | {command => 'ALL', user => 'sampler', nodes => 'sql-sample01'},], 223 | } 224 | } -------------------------------------------------------------------------------- /tests/fixtures/1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | LICENSE_KEY='TESTSECRET1234' 4 | 5 | jira_final_file="jira_access_`date +'%Y-%m-%d'`.csv" 6 | cf_final_file="cf_access_`date +'%Y-%m-%d'`.csv" 7 | stash_final_file="stash_access_`date +'%Y-%m-%d'`.csv" 8 | 9 | curl -u 'login01:password01' -s "https://example.com/test" | jq -r '.space.name + " (" + .space.key + "): " + .title' 10 | 11 | # should not be matched as a variable 12 | curl -u 'login01:$password_var' -s "https://example.com/test" 13 | 14 | 15 | printf "\nCleaning logs remotely...\n" 16 | ssh prx-cf "sudo rm confluence_logs_*.tar.gz" 17 | ssh prx-stash "sudo rm bitbucket_logs_*.tar.gz" 18 | ssh prx-jira "sudo rm jira_logs_*.tar.gz" 19 | printf "\nDone.\n" 20 | 21 | 22 | while IFS= read -r line 23 | do 24 | task_number=`echo $line | grep -oP '[A-Z]*\-[0-9]*'` 25 | count_number=`echo $line | tr -s " " | cut -d " " -f1` 26 | long_link=`echo $line | tr -s " " | cut -d " " -f2` 27 | task_summary=`curl -u 'login02:password02' -s "https://example.com/test" | jq -r '.key + ": " + .fields.summary'` 28 | done < accessed_jira_tasks_and_searches.txt 29 | printf "Done.\n" 30 | 31 | while IFS= read -r line 32 | do 33 | content_id=`echo $line | cut -d "=" -f2 | cut -d "&" -f1` 34 | count_number=`echo $line | tr -s " " | cut -d " " -f1` 35 | long_link=`echo $line | tr -s " " | cut -d " " -f2` 36 | page_title=`curl -u 'login03:password03' -s "https://example.com/test" | jq -r '.space.name + " (" + .space.key + "): " + .title'` 37 | printf "%s, \"%s\", %s\n" "$count_number" "$page_title" "$long_link" >> ${cf_final_file} 38 | done < accessed_cf_pages.txt 39 | printf "Done.\n" 40 | -------------------------------------------------------------------------------- /tests/fixtures/1.swift: -------------------------------------------------------------------------------- 1 | extraParameters = decoder.decode(key: "extraParameters", fallbackValue: [:]) 2 | mapSettings = try decoder.decodeIfPresent(key: "mapSettings") 3 | pointListRequest = RequestComponents(from: try decoder.decode(key: "pointListRequest")) 4 | pointInfoRequest = RequestComponents(from: try decoder.decode(key: "pointInfoRequest")) 5 | filtersInfoRequest = try decoder.decodeIfPresent(key: "filtersInfoRequest").map { RequestComponents(from: $0) } 6 | onOpenEvent = try decoder.decodeIfPresent(key: "onOpenEvent") 7 | onInitActions = decoder.decode(key: "onInitActions", fallbackValue: []) 8 | 9 | let result = Unboxer(dictionary: try unboxer.unbox(key: "result")) 10 | title = try result.unbox(key: "title") 11 | description = result.unbox(key: "description") 12 | actionTitle = try? result.unbox(keyPath: "actionTitle") 13 | action = try? result.unbox(keyPath: "action") -------------------------------------------------------------------------------- /tests/fixtures/1.toml: -------------------------------------------------------------------------------- 1 | name = "ab-lol" 2 | kind = "unknown" 3 | 4 | [engine] 5 | name = "python" 6 | 7 | [redis] 8 | enabled = true 9 | size = "small" 10 | version = "4.0" 11 | 12 | [env_vars] 13 | USE_JSON_FORMATTER = "false" 14 | DEBUG = "false" 15 | METRICS_ENABLED = "false" 16 | SENTRY_ENABLED = "false" 17 | REQUEST_LOGGING_ENABLED = "true" 18 | HTTPS_ENABLED = "true" 19 | S3_HOST = "ceph-com.miami.example.com" 20 | S3_BUCKET = "lol-1234" 21 | 22 | 23 | [envs.prod.env_vars] 24 | USE_JSON_FORMATTER = "true" 25 | DEBUG = "false" 26 | METRICS_ENABLED = "true" 27 | SENTRY_ENABLED = "true" 28 | SENTRY_DSN = "http://hello:TESTSECRET1234@sentry.miami.example.com/251" 29 | AUTH_ENABLED = "true" 30 | VERTICA_HOST = 'vertica' 31 | VERTICA_PORT = '5437' 32 | VSQL_DATABASE = "DWH" 33 | VSQL_HOST = "vertica.miami.example.com" 34 | VSQL_PORT = "5437" 35 | AUTH_CONFIG_URL = 'https://oauth2.example.com/.well-known/openid-configuration' 36 | CLICKHOUSE_HOSTS="clickhouse-tcp-clickhouse-abcentral-production-rs-rs01.db.example-sd" 37 | CLICKHOUSE_USER="ab_loader" 38 | DWH_CLICKHOUSE_HOSTS="clickhouse-tcp-clickhouse-dwh-cs-production-rs-rs01.db.example-sd" 39 | S3_BUCKET = "configurator-attachments" 40 | 41 | 42 | # CRONS 43 | [[crons]] 44 | name = "status_updater" 45 | enabled = true 46 | schedule = "*/1 * * * *" 47 | command = "python /app/scripts/status_updater.py" 48 | 49 | name = "scrum-poker" 50 | description = "Утилита для оценки задач" 51 | kind = "infrastructure" 52 | replicas = 1 53 | 54 | [engine] 55 | name = "node" 56 | version = "12.13" 57 | size = "small" 58 | 59 | [env_vars] 60 | NODE_ENV = "production" 61 | SESSION_TTL = "2592" 62 | SESSION_SECRET = "CHANGE ME PLZ!!!" 63 | SESSION_COLLECTION = "sessions" 64 | 65 | [envs.local.env_vars] 66 | NODE_ENV = "development" 67 | BUILD_ENV = "local" 68 | 69 | [envs.dev] 70 | host = "test.host" 71 | [envs.dev.env_vars] 72 | NODE_ENV = "development" 73 | BUILD_ENV = "dev" 74 | JIRA_USERNAME = "test-stest" 75 | JIRA_PASSWORD = "iufkdhsafhiuwehf8qw4oifjh9w4ioafja" 76 | MATTERMOST_BOT_TOKEN = "fjaipu4iwhpfgj8eosiruhjfkea,sjflksea" 77 | -------------------------------------------------------------------------------- /tests/fixtures/1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/fixtures/1.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | kubernetes::kube_kubeconfig_token: chAng3m3 4 | kubernetes::proxy_kubeconfig_token: chAng3m3 5 | kubernetes::kubelet_kubeconfig_token: chAng3m3 6 | kubernetes::calico_kubeconfig_token: chAng3m3 7 | -------------------------------------------------------------------------------- /tests/fixtures/1.yml: -------------------------------------------------------------------------------- 1 | osd_deep_scrub_large_omap_object_key_threshold: 2000000 -------------------------------------------------------------------------------- /tests/fixtures/2.json: -------------------------------------------------------------------------------- 1 | { 2 | "result": true, 3 | "access_Token": "eyJ0eXfdasfdsa5OGRhNmUzYjI0NzkwYTE2ZDg0YmVjYzQwYzhlIn0.eyJpYXQiOjE2Nzk0MDgwNDYsIm5iZiI6MTY3OTQwODA0NiwiZXhwIjoxNjc5NDE0MDQ2LCJzZXR0aW5ncyI6eyJzdWJkaXZpc2lvbl9pZCI6MTAxODE2LCJjcmVkaXRfbGluZV9jaGFubmVsIjoiUFJFU0NPUkUiLCJ0cmFuY2hlX2NoYW5uZWwiOiJFQ09NTSIsInBlcmlvZCI6MzY0LCJjcmVkaXRfbGluZV9wcm9kdWN0X2lkIjoxMDEzNTQsInRyYW5jaGVfcHJvZHVjdF9pZCI6MTAxMzUzLCJwYXltZW50X3R5cGVfaWQiOjEwMjM5NiwiZGRzX3BheW1lbnRfdHlwZV9pZCI6MTAxMTcxLCJjb25zdW1wdGlvbl9pZCI6MTAxNjczLCJjb21pbmdfaWQiOjEwMTY3NiwiYnNfY2hlY2tfZnJlcXVlbmN5IjoxMCwiYnNfYWN0aXZlIjoxLCJic19pc190ZXN0IjowLCJzc3ViX2NoZWNrX2ZyZXF1ZW5jeSI6MTAsInNzdWJfYWN0aXZlIjoxLCJzc3ViX2lzX3Rlc3QiOjEsImlkeF9jaGVja19mcmVxdWVuY3kiOjEwLCJpZHhfbWluX2NvbmZpZGVuY2UiOiIxIiwiaWR4X2FjdGl2ZSI6MCwiaWR4X2lzX3Rlc3QiOjEsInZlcmlmeV9pc190ZXN0IjoxLCJibGF6ZV9jaGVja19mcmVxdWVuY3kiOjEwLCJibGF6ZV9hY3RpdmUiOjEsImJsYXplX2lzX3Rlc3QiOjEsInNtc19pc190ZXN0IjoxLCJzbXNfY29kZV9sZW5ndGgiOjR9LCJ1aWQiOiJhdml0byIsImF1a2V5IjoiYTkxYjQwNjAxZDYwM2U2ZmFhNzhiOTc0ZmFlZDgxM2MiLCJqdGkiOiI4NmEyOThkYTZlM2IyNDc5MGExNmQ4NGJlY2M0MGM4ZSJ9.n1sBlFLMs0dGW_gNp2gXRjhjYrdgfBeVZZ58wdPUPkhMt1fdsafasdfasdfdsafasdmotpcnmkZCr5XxYB0dO6nPxxLwhbZuD5-Eb8nT_kjL6JeVpHG5kNynRkGGTZoV00s8nmE-2X7t24cg96fdsafsdafdgfddfsgdsfk9LnAoamaE_jp0EQ480WBo_ZeDfAdPuNQg7nUwFrEgoIq33YHHzYkksPORKuycYouNXb2eB-jx1Bg7mXuGOuyLX3fdsafdfudsahiufhainEkFKV3S_4FwX734LksQee1DbSXgr3ugWVaoigaUbBD3jGWXQjVQr_E5Svcjtc4gFTRDfpF08x4NofA", 4 | "accessToken": "eyJ0eXfdasfdsa5OGRhNmUzYjI0NzkwYTE2ZDg0YmVjYzQwYzhlIn0.eyJpYXQiOjE2Nzk0MDgwNDYsIm5iZiI6MTY3OTQwODA0NiwiZXhwIjoxNjc5NDE0MDQ2LCJzZXR0aW5ncyI6eyJzdWJkaXZpc2lvbl9pZCI6MTAxODE2LCJjcmVkaXRfbGluZV9jaGFubmVsIjoiUFJFU0NPUkUiLCJ0cmFuY2hlX2NoYW5uZWwiOiJFQ09NTSIsInBlcmlvZCI6MzY0LCJjcmVkaXRfbGluZV9wcm9kdWN0X2lkIjoxMDEzNTQsInRyYW5jaGVfcHJvZHVjdF9pZCI6MTAxMzUzLCJwYXltZW50X3R5cGVfaWQiOjEwMjM5NiwiZGRzX3BheW1lbnRfdHlwZV9pZCI6MTAxMTcxLCJjb25zdW1wdGlvbl9pZCI6MTAxNjczLCJjb21pbmdfaWQiOjEwMTY3NiwiYnNfY2hlY2tfZnJlcXVlbmN5IjoxMCwiYnNfYWN0aXZlIjoxLCJic19pc190ZXN0IjowLCJzc3ViX2NoZWNrX2ZyZXF1ZW5jeSI6MTAsInNzdWJfYWN0aXZlIjoxLCJzc3ViX2lzX3Rlc3QiOjEsImlkeF9jaGVja19mcmVxdWVuY3kiOjEwLCJpZHhfbWluX2NvbmZpZGVuY2UiOiIxIiwiaWR4X2FjdGl2ZSI6MCwiaWR4X2lzX3Rlc3QiOjEsInZlcmlmeV9pc190ZXN0IjoxLCJibGF6ZV9jaGVja19mcmVxdWVuY3kiOjEwLCJibGF6ZV9hY3RpdmUiOjEsImJsYXplX2lzX3Rlc3QiOjEsInNtc19pc190ZXN0IjoxLCJzbXNfY29kZV9sZW5ndGgiOjR9LCJ1aWQiOiJhdml0byIsImF1a2V5IjoiYTkxYjQwNjAxZDYwM2U2ZmFhNzhiOTc0ZmFlZDgxM2MiLCJqdGkiOiI4NmEyOThkYTZlM2IyNDc5MGExNmQ4NGJlY2M0MGM4ZSJ9.n1sBlFLMs0dGW_gNp2gXRjhjYrdgfBeVZZ58wdPUPkhMt1fdsafasdfasdfdsafasdmotpcnmkZCr5XxYB0dO6nPxxLwhbZuD5-Eb8nT_kjL6JeVpHG5kNynRkGGTZoV00s8nmE-2X7t24cg96fdsafsdafdgfddfsgdsfk9LnAoamaE_jp0EQ480WBo_ZeDfAdPuNQg7nUwFrEgoIq33YHHzYkksPORKuycYouNXb2eB-jx1Bg7mXuGOuyLX3fdsafdfudsahiufhainEkFKV3S_4FwX734LksQee1DbSXgr3ugWVaoigaUbBD3jGWXQjVQr_E5Svcjtc4gFTRDfpF08x4NofA", 5 | "exp": {{now '1h' 'unix'}} , 6 | "nbf": {{now '' 'unix'}} , 7 | "iat": {{now '1h' 'unix'}} , 8 | "jti": "gggg", 9 | "authkey": "ggg", 10 | "uid": "avito", 11 | "scope": "client_credentials", 12 | "time": {{now '' 'unix'}} 13 | } -------------------------------------------------------------------------------- /tests/fixtures/2.jsx: -------------------------------------------------------------------------------- 1 | import logoUrl from "@/assets/images/redash_icon_small.png"; 2 | import { useCurrentRoute } from "@/components/ApplicationArea/Router"; 3 | import HelpTrigger from "@/components/HelpTrigger"; 4 | import Link from "@/components/Link"; 5 | import PlainButton from "@/components/PlainButton"; 6 | import CreateDashboardDialog from "@/components/dashboards/CreateDashboardDialog"; 7 | import CreateWorkbookDialog from "@/components/workbooks/CreateWorkbookDialog"; 8 | import { Auth } from "@/services/auth"; 9 | import settingsMenu from "@/services/settingsMenu"; 10 | import AlertOutlinedIcon from "@ant-design/icons/AlertOutlined"; 11 | import WorkbookIcon from "@ant-design/icons/BookOutlined"; 12 | import CodeOutlinedIcon from "@ant-design/icons/CodeOutlined"; 13 | import DesktopOutlinedIcon from "@ant-design/icons/DesktopOutlined"; 14 | import FolderOutlined from "@ant-design/icons/FolderOutlined"; 15 | import LineChartOutlined from "@ant-design/icons/LineChartOutlined"; 16 | import PlusOutlinedIcon from "@ant-design/icons/PlusOutlined"; 17 | import QuestionCircleOutlinedIcon from "@ant-design/icons/QuestionCircleOutlined"; 18 | import SettingOutlinedIcon from "@ant-design/icons/SettingOutlined"; 19 | import Menu from "antd/lib/menu"; 20 | import { first, includes } from "lodash"; 21 | import React, { useMemo } from "react"; 22 | import "./DesktopNavbar.less"; 23 | import VersionInfo from "./VersionInfo"; 24 | 25 | function NavbarSection({ children, ...props }) { 26 | return ( 27 | 28 | {children} 29 | 30 | ); 31 | } 32 | 33 | function useNavbarActiveState() { 34 | const currentRoute = useCurrentRoute(); 35 | 36 | return useMemo( 37 | () => ({ 38 | workbooks: includes( 39 | [ 40 | "Workbooks.Favorites", 41 | "Workbooks.List", 42 | "Workbooks.My", 43 | "Workbooks.ViewOrEdit", 44 | "Workbooks.Presets", 45 | "Workbooks.Presets.View", 46 | ], 47 | currentRoute.id 48 | ), 49 | dashboards: includes( 50 | [ 51 | "Dashboards.Favorites", 52 | "Dashboards.LegacyViewOrEdit", 53 | "Dashboards.List", 54 | "Dashboards.My", 55 | "Dashboards.ViewOrEdit", 56 | "Dashboards.Presets", 57 | "Dashboards.Presets.View", 58 | ], 59 | currentRoute.id 60 | ), 61 | queries: includes( 62 | [ 63 | "Queries.List", 64 | "Queries.Favorites", 65 | "Queries.Archived", 66 | "Queries.My", 67 | "Queries.View", 68 | "Queries.New", 69 | "Queries.Edit", 70 | ], 71 | currentRoute.id 72 | ), 73 | charts: includes(["charts"], currentRoute.id), 74 | dataSources: includes(["DataSources.List"], currentRoute.id), 75 | alerts: includes(["Alerts.List", "Alerts.New", "Alerts.View", "Alerts.Edit"], currentRoute.id), 76 | projects: includes(["Project.View", "Projects.Explore", "Projects.Explore.Favorites"], currentRoute.id), 77 | }), 78 | [currentRoute.id] 79 | ); 80 | } 81 | 82 | export default function DesktopNavbar() { 83 | const firstSettingsTab = first(settingsMenu.getAvailableItems()); 84 | 85 | const activeState = useNavbarActiveState(); 86 | 87 | const canCreateQuery = Auth.hasPermission("create_query"); 88 | const canCreateDashboard = Auth.hasPermission("create_dashboard"); 89 | const canCreateAlert = Auth.hasPermission("list_alerts"); 90 | 91 | return ( 92 | 245 | ); 246 | } -------------------------------------------------------------------------------- /tests/fixtures/2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | curl -u 'login01:password01' -s "https://example.com/test" 4 | # should not be matched as a variable 5 | curl -u 'login01:$password_var' -s "https://example.com/test" 6 | curl -u "qauser:$password" $URL > $FILENAME -------------------------------------------------------------------------------- /tests/fixtures/2.toml: -------------------------------------------------------------------------------- 1 | [env_vars] 2 | ALLOW_ORIGINS = "*" 3 | SERVICE_OAUTH = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs" 4 | S3_SECRET = "SFRcQOuJllDaDlnL7BhcwbjdYQDkjeM8PDDae0y6" 5 | S3_URL = "http://test.com" 6 | STT_KEY = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs" 7 | STT_TIMEOUT = "100" 8 | STT_URL = "https://api.test.com" 9 | TTS_KEY = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs" 10 | TTS_TIMEOUT = "50" 11 | TTS_URL = "http://test1234.com" 12 | 13 | KEYLOGGER_KEYS_KEYRING_SUBKEY = "1" 14 | 15 | # should not be detected 16 | SECRET_KEY_PATH = "/fdsa/fdsa/fdsa" -------------------------------------------------------------------------------- /tests/fixtures/2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Empty 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/fixtures/3.go: -------------------------------------------------------------------------------- 1 | const ( 2 | testKeyPEM = ` 3 | asdf 4 | df 5 | sad 6 | ` 7 | 8 | bla = "lol" 9 | ) 10 | 11 | func init() { 12 | testKey = parseRSA(testKeyPEM, "TESTSECRET1234") 13 | } 14 | 15 | claims := struct{ Msg string }{"TESTSECRET1234"} 16 | key := time.Now().Format("01.02.2006") -------------------------------------------------------------------------------- /tests/fixtures/3.js: -------------------------------------------------------------------------------- 1 | import { Button, Modal, Radio, Row } from 'antd'; 2 | import PropTypes from 'prop-types'; 3 | import React, { useState } from 'react'; 4 | import { useSelector } from 'react-redux'; 5 | 6 | import { AppstoreOutlined, OrderedListOutlined } from '@ant-design/icons'; 7 | 8 | import { getRoot } from '../../store/selectors'; 9 | import SortableGrid from './components/sortable-grid'; 10 | import SortableList from './components/sortable-list'; 11 | 12 | import styles from './styles.css'; 13 | 14 | const reorder = (list, startIndex, endIndex) => { 15 | const result = Array.from(list); 16 | const [removed] = result.splice(startIndex, 1); 17 | 18 | result.splice(endIndex, 0, removed); 19 | 20 | return result; 21 | }; 22 | 23 | const SortModal = ({ stories, isVisible, onClose, onConfirm }) => { 24 | const [cloneStories, setCloneStories] = useState(stories); 25 | const [viewType, setViewType] = useState('grid'); 26 | const loading = useSelector(getRoot).orderLoading; 27 | const changeViewTypeHandler = (event) => setViewType(event.target.value); 28 | const changeSortHandler = (oldIndex, newIndex) => { 29 | setCloneStories(reorder(cloneStories, oldIndex, newIndex)); 30 | }; 31 | 32 | const confirmHandler = () => { 33 | const storiesIds = cloneStories.map(s => s.id); 34 | 35 | onConfirm(storiesIds); 36 | }; 37 | 38 | return ( 39 | 48 | Отмена 49 | , 50 | , 53 | ]} 54 | onCancel={onClose}> 55 | 56 | 57 | 58 | 59 | 60 | 61 |
62 | { 63 | viewType === 'table' 64 | ? 65 | : 66 | } 67 |
68 |
69 | ); 70 | }; 71 | 72 | SortModal.propTypes = { 73 | stories: PropTypes.array, 74 | isVisible: PropTypes.bool, 75 | onClose: PropTypes.func, 76 | onConfirm: PropTypes.func, 77 | }; 78 | 79 | export default SortModal; -------------------------------------------------------------------------------- /tests/fixtures/3.jsx: -------------------------------------------------------------------------------- 1 | {BRAND_TRANSFER_LOCALE.TRANSFER_TITLES_RIGHT} -------------------------------------------------------------------------------- /tests/fixtures/3.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # fmt: off 3 | 4 | from ldap3 import ( 5 | ALL, 6 | Connection, 7 | Server, 8 | ) 9 | 10 | s = Server('ldaps://ldap-main.miami.example.com:636', get_info=ALL, use_ssl=True) 11 | c = Connection(s, user='uid=openvpn,ou=services,dc=example,dc=us', password='TESTSECRET1234') 12 | 13 | c = Connection(s, pwd='2TESTSECRET1234') 14 | 15 | c.bind() 16 | 17 | if key == 'setuptools': 18 | print() 19 | -------------------------------------------------------------------------------- /tests/fixtures/4.go: -------------------------------------------------------------------------------- 1 | func TestNilConnGetChannel_Error(t *testing.T) { 2 | os.Setenv("RABBITMQ_URL", "amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp") 3 | 4 | test2 := os.Getenv(`TEST_TEST`, "lol") 5 | secret := os.Getenv("S3_STORAGE_SECRET_KEY") 6 | b, err := newMQBroker(nil, &AutoAckDisableStrategy) 7 | 8 | if err == nil { 9 | t.FailNow() 10 | } 11 | _, err = b.getChannel(false) 12 | if err == nil { 13 | t.FailNow() 14 | } 15 | } -------------------------------------------------------------------------------- /tests/fixtures/4.js: -------------------------------------------------------------------------------- 1 | const result3 = reducer(result2, changeObjectPageDraftFilters({ 2 | key: 'id-3', 3 | value: '789' 4 | })); 5 | 6 | var PDFRenderingQueue = function () { 7 | 8 | _createClass(PDFRenderingQueue, [{ 9 | key: "setViewer", 10 | value: function setViewer(pdfViewer) { 11 | this.pdfViewer = pdfViewer; 12 | } 13 | }, { 14 | key: "setThumbnailViewer", 15 | value: function setThumbnailViewer(pdfThumbnailViewer) { 16 | this.pdfThumbnailViewer = pdfThumbnailViewer; 17 | } 18 | }, { 19 | key: "isHighestPriority", 20 | value: function isHighestPriority(view) { 21 | return this.highestPriorityPage === view.renderingId; 22 | } 23 | }, { 24 | key: "renderHighestPriority", 25 | value: function renderHighestPriority(currentlyVisiblePages) { 26 | if (this.idleTimeout) { 27 | clearTimeout(this.idleTimeout); 28 | this.idleTimeout = null; 29 | } 30 | if (this.pdfViewer.forceRendering(currentlyVisiblePages)) { 31 | return; 32 | } 33 | if (this.pdfThumbnailViewer && this.isThumbnailViewEnabled) { 34 | if (this.pdfThumbnailViewer.forceRendering()) { 35 | return; 36 | } 37 | } 38 | if (this.printing) { 39 | return; 40 | } 41 | if (this.onIdle) { 42 | this.idleTimeout = setTimeout(this.onIdle.bind(this), CLEANUP_TIMEOUT); 43 | } 44 | } 45 | }, { 46 | key: "getHighestPriority", 47 | value: function getHighestPriority(visible, views, scrolledDown) { 48 | var visibleViews = visible.views; 49 | var numVisible = visibleViews.length; 50 | if (numVisible === 0) { 51 | return false; 52 | } 53 | for (var i = 0; i < numVisible; ++i) { 54 | var view = visibleViews[i].view; 55 | if (!this.isViewFinished(view)) { 56 | return view; 57 | } 58 | } 59 | if (scrolledDown) { 60 | var nextPageIndex = visible.last.id; 61 | if (views[nextPageIndex] && !this.isViewFinished(views[nextPageIndex])) { 62 | return views[nextPageIndex]; 63 | } 64 | } else { 65 | var previousPageIndex = visible.first.id - 2; 66 | if (views[previousPageIndex] && !this.isViewFinished(views[previousPageIndex])) { 67 | return views[previousPageIndex]; 68 | } 69 | } 70 | return null; 71 | } 72 | }, { 73 | key: "isViewFinished", 74 | value: function isViewFinished(view) { 75 | return view.renderingState === RenderingStates.FINISHED; 76 | } 77 | }, { 78 | key: "renderView", 79 | value: function renderView(view) { 80 | var _this = this; 81 | 82 | switch (view.renderingState) { 83 | case RenderingStates.FINISHED: 84 | return false; 85 | case RenderingStates.PAUSED: 86 | this.highestPriorityPage = view.renderingId; 87 | view.resume(); 88 | break; 89 | case RenderingStates.RUNNING: 90 | this.highestPriorityPage = view.renderingId; 91 | break; 92 | case RenderingStates.INITIAL: 93 | this.highestPriorityPage = view.renderingId; 94 | var continueRendering = function continueRendering() { 95 | _this.renderHighestPriority(); 96 | }; 97 | view.draw().then(continueRendering, continueRendering); 98 | break; 99 | } 100 | return true; 101 | } 102 | }]); 103 | 104 | return PDFRenderingQueue; 105 | }(); 106 | -------------------------------------------------------------------------------- /tests/fixtures/4.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | # fmt: off 3 | 4 | CFAuthData = {'username': 'fdfsdfdsf-cf', 'pass': 'TESTSECRET1234', 'host': 'https://cf.example.com'} 5 | 6 | CFAuthData['lol'] = 'valentin' 7 | 8 | result = {'line': 'fdsfdsafdsafdsagfds'} 9 | print('') 10 | 11 | 12 | class SlackApproveTypes(Enum): 13 | USER_MARKED_AS_FALSE = 'user_marked_false' 14 | USER_MARKED_AS_FALSE_BAD_RULE = 'user_marked_false_bad_rule' 15 | USER_MARKED_AS_FALSE_UNAPPLICABLE = 'user_marked_false_unapplicable' 16 | USER_MARKED_AS_FALSE_OTHER = 'user_marked_false_other' 17 | 18 | ADMIN_MARKED_AS_FALSE = 'adm_marked_false' 19 | ADMIN_APPROVED = 'adm_approved' 20 | -------------------------------------------------------------------------------- /tests/fixtures/5.go: -------------------------------------------------------------------------------- 1 | conn, err := amqp.Dial("amqp://guest:guest@localhost:5672/") 2 | aes_key := os.Getenv("AES_KEY") -------------------------------------------------------------------------------- /tests/fixtures/6.go: -------------------------------------------------------------------------------- 1 | func TestPostgresStringer(t *testing.T) { 2 | t.Run("Normal", func(t *testing.T) { 3 | testPostgresStringer( 4 | t, 5 | Postgres{ 6 | Host: "localhost", 7 | Port: 5432, 8 | User: "user", 9 | Password: "password", 10 | DBName: "main", 11 | }, 12 | "postgres://user:password@localhost:5432/main?sslmode=disable&binary_parameters=yes", 13 | ) 14 | }) 15 | } 16 | -------------------------------------------------------------------------------- /tests/fixtures/7.go: -------------------------------------------------------------------------------- 1 | var ( 2 | caKeyPEM = []byte(`TESTSECRET1234 3 | `) 4 | ) 5 | 6 | bsonFilters = append(bsonFilters, bson.M{ 7 | "start": bson.D{{Key: "$gte", Value: in.ActiveAt.Gte}}, 8 | }) 9 | 10 | // unix://:@?db= 11 | -------------------------------------------------------------------------------- /tests/fixtures/excluded_paths.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Path excluded", 4 | "pattern": "\\.git" 5 | }, 6 | { 7 | "name": "Path excluded", 8 | "pattern": ".*package-lock\\.json.*" 9 | }, 10 | { 11 | "name": "Path excluded", 12 | "pattern": ".*-requirements.txt" 13 | }, 14 | { 15 | "name": "Path excluded", 16 | "pattern": ".*Pipfile\\.lock$" 17 | }, 18 | { 19 | "name": "Path excluded", 20 | "pattern": ".*package.json.*" 21 | }, 22 | { 23 | "name": "Images", 24 | "pattern": ".*\\.(jpg|png|bmp|gif|tiff)$" 25 | }, 26 | { 27 | "name": "Executives", 28 | "pattern": ".*\\.(exe|dll)$" 29 | }, 30 | { 31 | "name": "Path excluded", 32 | "pattern": "vendor/" 33 | }, 34 | { 35 | "name": "Path excluded", 36 | "pattern": ".*Gopkg\\.lock.*" 37 | }, 38 | { 39 | "name": "Path excluded", 40 | "pattern": "venv/" 41 | }, 42 | { 43 | "name": "Path excluded", 44 | "pattern": "requirements.txt$" 45 | }, 46 | { 47 | "name": "Path excluded", 48 | "pattern": ".*Gopkg\\.lck$" 49 | }, 50 | { 51 | "name": "Path excluded", 52 | "pattern": ".*Podfile\\.lock$" 53 | }, 54 | { 55 | "name": "Path excluded", 56 | "pattern": "\\.gitignore" 57 | }, 58 | { 59 | "name": "Path excluded", 60 | "pattern": ".*xcodeproj.*" 61 | }, 62 | { 63 | "name": "Path excluded", 64 | "pattern": ".*__snapshots__.*" 65 | }, 66 | { 67 | "name": "Path excluded", 68 | "pattern": "internal/generated/" 69 | }, 70 | { 71 | "name": "Path excluded", 72 | "pattern": "npm-shrinkwrap.json" 73 | }, 74 | { 75 | "name": "Path excluded", 76 | "pattern": ".*composer.json.*" 77 | }, 78 | { 79 | "name": "Path excluded", 80 | "pattern": ".*brief" 81 | }, 82 | { 83 | "name": "Path excluded", 84 | "pattern": ".*Godeps\\.json$" 85 | }, 86 | { 87 | "name": "Path excluded", 88 | "pattern": ".*composer\\.lock.*" 89 | }, 90 | { 91 | "name": "Path excluded", 92 | "pattern": "src/Generated/" 93 | }, 94 | { 95 | "name": "Path excluded", 96 | "pattern": ".*yarn\\.lock$" 97 | }, 98 | { 99 | "name": "Path excluded", 100 | "pattern": ".*node_modules\\/.*" 101 | }, 102 | { 103 | "name": "Path excluded", 104 | "pattern": ".*symfony\\.lock$" 105 | }, 106 | { 107 | "name": "Path excluded", 108 | "pattern": "Gopkg.toml" 109 | }, 110 | { 111 | "name": "Path excluded", 112 | "pattern": "lib/generated/" 113 | }, 114 | { 115 | "name": "Path excluded", 116 | "pattern": ".*/vendor\\/.*" 117 | }, 118 | { 119 | "name": "Path excluded", 120 | "pattern": ".*pbxproj$" 121 | }, 122 | { 123 | "name": "Path excluded", 124 | "pattern": ".*go\\.sum$" 125 | }, 126 | { 127 | "name": "Postman collection files", 128 | "pattern": ".*postman_collection\\.json$" 129 | } 130 | ] -------------------------------------------------------------------------------- /tests/fixtures/extless/json: -------------------------------------------------------------------------------- 1 | { 2 | "queue": { 3 | "connection": "amqp://guest:guest@10.10.11.104:32769/" 4 | } 5 | } -------------------------------------------------------------------------------- /tests/fixtures/false_findings.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "False", 4 | "pattern": "^.*-.*i1Wbhr6G.*eg$" 5 | } 6 | ] -------------------------------------------------------------------------------- /tests/fixtures/hashed_secrets.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Some password", 4 | "hash": "8c535f99d6d0fa55b64af0fae6e3b6829eda413b", 5 | "length": 12, 6 | "algorithm": "sha1" 7 | }, 8 | { 9 | "name": "Another rule", 10 | "hash": "fakjsdfiudsajfndsjkafka", 11 | "length": 5, 12 | "algorithm": "sha1" 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /tests/fixtures/regex_checks.txt: -------------------------------------------------------------------------------- 1 | # S0 2 | xoxb-278549377329-i1Wbhr6Gd7nMMMvKgElv5Neg 3 | xoxb-675660624544-675670263056-vCSNM7pKAGgcBKOwvW2Kmi2K 4 | 5 | # S1 6 | -----BEGIN RSA PRIVATE KEY----- 7 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu 8 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm 9 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k 10 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7 11 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy 12 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs 13 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 14 | -----END RSA PRIVATE KEY----- 15 | 16 | # S2 17 | -----BEGIN OPENSSH PRIVATE KEY----- 18 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu 19 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm 20 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k 21 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7 22 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy 23 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs 24 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 25 | -----END OPENSSH PRIVATE KEY----- 26 | 27 | # S3 28 | -----BEGIN DSA PRIVATE KEY----- 29 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu 30 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm 31 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k 32 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7 33 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy 34 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs 35 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 36 | -----END DSA PRIVATE KEY----- 37 | 38 | # S4 39 | -----BEGIN EC PRIVATE KEY----- 40 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu 41 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm 42 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k 43 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7 44 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy 45 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs 46 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 47 | -----END EC PRIVATE KEY----- 48 | 49 | # S5 50 | -----BEGIN PGP PRIVATE KEY BLOCK----- 51 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu 52 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm 53 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k 54 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7 55 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy 56 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs 57 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00 58 | -----END PGP PRIVATE KEY BLOCK----- 59 | 60 | # S19 61 | https://login:password@example.com 62 | ftp://login:password@example.com 63 | amqp://login:$password@example.com 64 | amqp://login:${password}@example.com 65 | amqp://login:%password%@example.com 66 | // redis://:@: 67 | -------------------------------------------------------------------------------- /tests/fixtures/regexes.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "S0", 4 | "name": "Slack Token", 5 | "confidence": 9, 6 | "pattern": "xox(?:a|b|p|o|s|r)-(?:\\d+-)+[a-z0-9]+" 7 | }, 8 | { 9 | "id": "S1", 10 | "name": "RSA private key", 11 | "confidence": 9, 12 | "pattern": "-----BEGIN RSA PRIVATE KEY-----[\\S\\s]{15,}?-----END RSA PRIVATE KEY-----" 13 | } 14 | ] -------------------------------------------------------------------------------- /tests/fixtures/service.postman_collection.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | } -------------------------------------------------------------------------------- /tests/scan_modes/test_cli_scan_mode.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepsecrets.config import Config, Output 4 | from deepsecrets.core.engines.regex import RegexEngine 5 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder 6 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder 7 | from deepsecrets.scan_modes.cli import CliScanMode 8 | 9 | FP_TO_BE_EXCLUDED = '/app/tests/fixtures/service.postman_collection.json' 10 | 11 | @pytest.fixture(scope='module') 12 | def config(): 13 | config = Config() 14 | config.set_workdir('tests/fixtures') 15 | config.engines.append(RegexEngine) 16 | config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/regexes.json']) 17 | config.add_ruleset(FalseFindingsBuilder, ['tests/fixtures/false_findings.json']) 18 | config.output = Output(type='json', path='tests/1.json') 19 | return config 20 | 21 | 22 | def test_cli_scan_mode(config: Config): 23 | mode = CliScanMode(config=config) 24 | assert FP_TO_BE_EXCLUDED in mode.filepaths 25 | 26 | config.set_global_exclusion_paths(['tests/fixtures/excluded_paths.json']) 27 | mode = CliScanMode(config=config) 28 | assert FP_TO_BE_EXCLUDED not in mode.filepaths 29 | 30 | findings = [] 31 | for file in mode.filepaths: 32 | findings.extend(mode._per_file_analyzer(mode.analyzer_bundle(), file)) 33 | 34 | assert len(findings) == 3 35 | 36 | # checking through the 'run' method 37 | # false findings checked at the end 38 | findings = [] 39 | findings = mode.run() 40 | 41 | assert len(findings) == 2 42 | --------------------------------------------------------------------------------