├── .devcontainer
    └── devcontainer.json
├── .github
    └── workflows
    │   └── python-package.yml
├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── Dockerfile
├── LICENSE
├── README.md
├── deepsecrets
    ├── __init__.py
    ├── __main__.py
    ├── cli.py
    ├── config.py
    ├── core
    │   ├── engines
    │   │   ├── __init__.py
    │   │   ├── hashed_secret.py
    │   │   ├── iengine.py
    │   │   ├── regex.py
    │   │   └── semantic.py
    │   ├── helpers
    │   │   ├── content_analyzer.py
    │   │   └── entropy.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── file.py
    │   │   ├── finding.py
    │   │   ├── rules
    │   │   │   ├── __init__.py
    │   │   │   ├── exlcuded_path.py
    │   │   │   ├── false_finding.py
    │   │   │   ├── hashed_secret.py
    │   │   │   ├── hashing.py
    │   │   │   ├── regex.py
    │   │   │   ├── rule.py
    │   │   │   └── semantic.py
    │   │   ├── semantic.py
    │   │   └── token.py
    │   ├── modes
    │   │   └── iscan_mode.py
    │   ├── rulesets
    │   │   ├── excluded_paths.py
    │   │   ├── false_findings.py
    │   │   ├── hashed_secrets.py
    │   │   ├── ibuilder.py
    │   │   └── regex.py
    │   ├── tokenizers
    │   │   ├── __init__.py
    │   │   ├── full_content.py
    │   │   ├── helpers
    │   │   │   ├── __init__.py
    │   │   │   ├── semantic
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── language.py
    │   │   │   │   └── var_detection
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── detector.py
    │   │   │   │   │   └── rules.py
    │   │   │   ├── spot_improvements.py
    │   │   │   └── type_stream.py
    │   │   ├── itokenizer.py
    │   │   ├── lexer.py
    │   │   ├── per_line.py
    │   │   └── per_word.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── cpu.py
    │   │   ├── exceptions.py
    │   │   ├── file_analyzer.py
    │   │   ├── fs.py
    │   │   ├── guess_filetype.py
    │   │   ├── hashing.py
    │   │   └── lexer_finder.py
    ├── rules
    │   ├── excluded_paths.json
    │   └── regexes.json
    └── scan_modes
    │   └── cli.py
├── poetry.lock
├── pyproject.toml
├── setup.cfg
└── tests
    ├── __init__.py
    ├── cli
        └── test_cli.py
    ├── config
        └── test_config.py
    ├── core
        ├── engines
        │   ├── hashed_secret
        │   │   └── test_hs.py
        │   ├── regex
        │   │   └── test_regex.py
        │   └── semantic
        │   │   └── test_semantic.py
        ├── helpers
        │   ├── test_content_analyzer.py
        │   └── test_entropy.py
        ├── model
        │   ├── test_file.py
        │   ├── test_finding.py
        │   └── test_token.py
        ├── tokenizers
        │   ├── lexer
        │   │   └── variable_detection
        │   │   │   ├── test_conf.py
        │   │   │   ├── test_cs.py
        │   │   │   ├── test_go.py
        │   │   │   ├── test_html.py
        │   │   │   ├── test_java.py
        │   │   │   ├── test_js.py
        │   │   │   ├── test_php.py
        │   │   │   ├── test_py.py
        │   │   │   ├── test_sh.py
        │   │   │   └── test_swift.py
        │   ├── test_full_content.py
        │   └── test_per_line.py
        └── utils
        │   ├── test_file_analyzer.py
        │   ├── test_fs.py
        │   └── test_lexer_finder.py
    ├── fixtures
        ├── 1.conf
        ├── 1.cs
        ├── 1.erb
        ├── 1.go
        ├── 1.html
        ├── 1.ini
        ├── 1.java
        ├── 1.js
        ├── 1.json
        ├── 1.jsx
        ├── 1.php
        ├── 1.pp
        ├── 1.py
        ├── 1.sh
        ├── 1.swift
        ├── 1.toml
        ├── 1.xml
        ├── 1.yaml
        ├── 1.yml
        ├── 2.go
        ├── 2.js
        ├── 2.json
        ├── 2.jsx
        ├── 2.py
        ├── 2.sh
        ├── 2.toml
        ├── 2.xml
        ├── 3.go
        ├── 3.js
        ├── 3.jsx
        ├── 3.py
        ├── 4.go
        ├── 4.js
        ├── 4.py
        ├── 5.go
        ├── 6.go
        ├── 7.go
        ├── excluded_paths.json
        ├── extless
        │   ├── json
        │   └── radius
        ├── false_findings.json
        ├── hashed_secrets.json
        ├── regex_checks.txt
        ├── regexes.json
        └── service.postman_collection.json
    └── scan_modes
        └── test_cli_scan_mode.py


/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "DeepSecrets Devcontainer",
 3 | 	"build": {
 4 | 		"context": "..",
 5 | 		"dockerfile": "../Dockerfile"
 6 | 	},
 7 | 
 8 | 	"workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind,consistency=delegated",
 9 | 	"workspaceFolder": "/app",
10 | 	"customizations": {
11 | 		"vscode": {
12 | 			"extensions": [
13 | 				"ms-python.python",
14 | 				"ms-python.vscode-pylance"
15 | 			]
16 | 		}
17 | 	},
18 | 	"postCreateCommand": "poetry install --no-root --with test,dev"
19 | }
20 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | name: Build and run tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 | 
14 |       - name: Checkout (GitHub)
15 |         uses: actions/checkout@v3
16 | 
17 |       - name: Build and run dev container task
18 |         uses: devcontainers/ci@v0.3
19 |         with:
20 |           runCmd: |
21 |             pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=deepsecrets
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | pip-delete-this-directory.txt
27 | 
28 | # Unit test / coverage reports
29 | .pytest_cache/
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .coverage.*
34 | .cache
35 | nosetests.xml
36 | coverage.xml
37 | *,cover
38 | .hypothesis/
39 | 
40 | # dotenv
41 | .env
42 | 
43 | # virtualenv
44 | venv/
45 | 
46 | # intellij ide
47 | .idea/
48 | 
49 | # auto generated docs
50 | doc.html
51 | 
52 | # misc
53 | /workdir
54 | *.zip
55 | playground/
56 | .mypy*
57 | *.db
58 | pytest*


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         
 5 |         {
 6 |             "name": "Python: Current File",
 7 |             "type": "python",
 8 |             "request": "launch",
 9 |             "program": "${file}",
10 |             "console": "integratedTerminal",
11 |             "justMyCode": true,
12 |             "env": {
13 |                 "PYTHONPATH": "/app/deepsecrets/"
14 |             }
15 |         },
16 |         {
17 |             "name": "Python: Module",
18 |             "type": "python",
19 |             "request": "launch",
20 |             "module": "deepsecrets",
21 |             "args": [
22 |                 "--outfile",
23 |                 "test.json",
24 |                 "--target-dir",
25 |                 "/app/tests/fixtures/",
26 |                 "--outfile",
27 |                 "./fdsafad.json",
28 |                 "--verbose",
29 |                 "--reflect-findings-in-return-code"
30 |             ],
31 |             "justMyCode": true
32 |         },
33 |         {
34 |             "name": "Python: File",
35 |             "type": "python",
36 |             "request": "launch",
37 |             "program": "${file}",
38 |             "justMyCode": true
39 |         }
40 |     ]
41 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.testing.unittestEnabled": false,
 3 |     "python.testing.pytestEnabled": true,
 4 |     "python.testing.pytestArgs": [
 5 |         "tests"
 6 |     ],
 7 |     "python.formatting.provider": "black",
 8 |     "editor.codeActionsOnSave": {
 9 |         "source.organizeImports": true,
10 |         "source.unusedImports": true
11 |     },
12 |     "python.linting.enabled": true,
13 |     "python.linting.mypyEnabled": true,
14 |     "python.linting.flake8Enabled": true
15 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.4-slim-bullseye
 2 | 
 3 | ENV PROJECT_ROOT /app
 4 | WORKDIR $PROJECT_ROOT
 5 | 
 6 | RUN apt update && apt install -y gcc g++
 7 | RUN pip install poetry
 8 | 
 9 | COPY pyproject.toml $PROJECT_ROOT/
10 | COPY *.lock $PROJECT_ROOT/
11 | 
12 | RUN poetry config virtualenvs.create false
13 | RUN poetry update && poetry install --no-root
14 | 
15 | COPY . $PROJECT_ROOT
16 | 
17 | RUN PATH="$PROJECT_ROOT/bin:$PATH"
18 | RUN PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Avito
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepSecrets - a better tool for secret scanning
2 | 
3 | > [!WARNING]
4 | > Active development was switched to the creator's fork at https://github.com/ntoskernel/deepsecrets. This repository will not receive any updates.


--------------------------------------------------------------------------------
/deepsecrets/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import platform
 4 | 
 5 | MODULE_NAME = 'deepsecrets'
 6 | 
 7 | 
 8 | def build_logger() -> logging.Logger:
 9 |     logging.basicConfig(format=' %(message)s', level=logging.INFO)
10 |     logger = logging.getLogger(MODULE_NAME)
11 |     return logger
12 | 
13 | 
14 | logger = build_logger()
15 | 
16 | 
17 | def set_logging_level(level: int) -> None:
18 |     logger.setLevel(level)
19 |     for handler in logger.handlers:
20 |         if isinstance(handler, type(logging.StreamHandler())):
21 |             handler.setLevel(level)
22 |             handler.setFormatter(logging.Formatter('DS-%(levelname)s: %(message)s'))
23 | 
24 |     logger.debug('Debug logging enabled')
25 | 
26 | 
27 | PROFILER_ON = False
28 | BASE_DIR = os.getcwd()
29 | 
30 | PLATFORM = platform.system()


--------------------------------------------------------------------------------
/deepsecrets/__main__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from deepsecrets import logger
 3 | 
 4 | message = \
 5 | '\n'\
 6 | '=================== REPOSITORY MOVED  ===================\n' \
 7 | 'Active development was switched to the creator\'s fork at\n' \
 8 | '      https://github.com/ntoskernel/deepsecrets\n\n' \
 9 | '     This repository will no longer receive updates \n'\
10 | '=========================================================\n'
11 | 
12 | 
13 | logger.error(message)
14 | 
15 | sys.exit()
16 | 


--------------------------------------------------------------------------------
/deepsecrets/cli.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import sys
  5 | from argparse import RawTextHelpFormatter
  6 | from typing import List
  7 | 
  8 | from deepsecrets import MODULE_NAME, logger, set_logging_level
  9 | from deepsecrets.config import Config, Output
 10 | from deepsecrets.core.engines.regex import RegexEngine
 11 | from deepsecrets.core.engines.semantic import SemanticEngine
 12 | from deepsecrets.core.model.finding import Finding, FindingResponse
 13 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder
 14 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder
 15 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder
 16 | from deepsecrets.core.utils.fs import get_abspath, get_path_inside_package
 17 | from deepsecrets.scan_modes.cli import CliScanMode
 18 | 
 19 | DISABLED = 'disabled'
 20 | FINDINGS_DETECTED_RETURN_CODE = 66
 21 | 
 22 | 
 23 | class DeepSecretsCliTool:
 24 |     argparser: argparse.ArgumentParser
 25 | 
 26 |     def __init__(self, args: List[str]):
 27 |         self.args = args
 28 |         self._build_argparser()
 29 | 
 30 |     def say_hello(self) -> None:
 31 |         bar = '-'
 32 |         logger.info('')
 33 |         logger.info(f'{" "*8}{bar*25} DeepSecrets {bar*25}')
 34 |         logger.info(f'{" "*10}A better tool for secret scanning')
 35 |         logger.info(f'{" "*10}version 1.1')
 36 |         logger.info(f'')
 37 |         logger.info(f'{" "*8}{bar*63}')
 38 | 
 39 | 
 40 |     def _build_argparser(self) -> None:
 41 |         parser = argparse.ArgumentParser(
 42 |             prog=MODULE_NAME,
 43 |             description='DeepSecrets - a better tool for secrets search',
 44 |             formatter_class=RawTextHelpFormatter,
 45 |         )
 46 | 
 47 |         parser.add_argument(
 48 |             '--target-dir',
 49 |             required=True,
 50 |             type=str,
 51 |             help="Path to the directory with code you'd like to analyze",
 52 |         )
 53 | 
 54 |         parser.add_argument(
 55 |             '--regex-rules',
 56 |             nargs='*',
 57 |             type=str,
 58 |             help='Paths to your Regex Rulesets.\n'
 59 |             "- Set 'disable' to turn off regex checks\n"
 60 |             '- Ignore this argument to use the built-in ruleset.\n'
 61 |             "- Using your own rulesets disables the default one. Add 'built-in' to the args list to enable it\n"
 62 |             'eq. --regex-rules built-in /root/my_regex_rules.json\n',
 63 |             default=['built-in'],
 64 |         )
 65 | 
 66 |         parser.add_argument(
 67 |             '--hashed-values',
 68 |             nargs='*',
 69 |             type=str,
 70 |             help='Path to your Hashed Values set.\n' "Don't set any value to disable this checks\n",
 71 |         )
 72 | 
 73 |         parser.add_argument(
 74 |             '--semantic-analysis',
 75 |             nargs='*',
 76 |             type=str,
 77 |             help='Controls semantic checks (enabled by default)\n'
 78 |             "- Set 'disable' to turn off semantic checks (not recommended)\n"
 79 |             'eq. --semantic-analysis disable',
 80 |             default=['built-in'],
 81 |         )
 82 | 
 83 |         parser.add_argument(
 84 |             '--excluded-paths',
 85 |             nargs='*',
 86 |             type=str,
 87 |             help='Paths to your Excluded Paths file.\n'
 88 |             "- Set 'disable' to scan everything (may affect performance)\n"
 89 |             '- Ignore this argument to use the built-in ruleset.\n'
 90 |             "- Using your own rulesets disables the default one. Add 'built-in' to the args list to enable it\n"
 91 |             'eq. --excluded-paths built-in /root/my_excluded_paths.json\n',
 92 |             default=['built-in'],
 93 |         )
 94 | 
 95 |         parser.add_argument(
 96 |             '--false-findings',
 97 |             nargs='*',
 98 |             type=str,
 99 |             help='Paths to your False Findings file.\n'
100 |             'Use to filter findings you sure are false positives\n'
101 |             'File syntax is the same as in regex rules\n'
102 |             'eq. --false-findings /root/my_false_findings.json\n',
103 |         )
104 | 
105 |         parser.add_argument(
106 |             '-v',
107 |             '--verbose',
108 |             action='store_true',
109 |             help='Verbose mode',
110 |         )
111 | 
112 |         parser.add_argument(
113 |             '--reflect-findings-in-return-code',
114 |             action='store_true',
115 |             help='Return code of 66 if any findings are detected during scan',
116 |         )
117 | 
118 |         parser.add_argument(
119 |             '--process-count',
120 |             type=int,
121 |             default=0,
122 |             help='Number of processes in a pool for file analysis (one process per file)\n'
123 |             'Default: number of processor cores of your machine or cpu limit of your container from cgroup.\n'
124 |             'If all checks are failed the fallback value is 4'
125 |         )
126 | 
127 |         parser.add_argument('--outfile', required=True, type=str)
128 |         parser.add_argument('--outformat', default='json', type=str, choices=['json'])
129 |         self.argparser = parser
130 | 
131 |     def parse_arguments(self) -> None:
132 |         user_args = self.argparser.parse_args(args=self.args[1:])
133 |         self.say_hello()
134 | 
135 |         if user_args.verbose:
136 |             set_logging_level(logging.DEBUG)
137 | 
138 |         self.config = Config()
139 |         self.config.set_workdir(user_args.target_dir)
140 |         self.config.set_process_count(user_args.process_count)
141 |         self.config.output = Output(type=user_args.outformat, path=user_args.outfile)
142 | 
143 |         if user_args.reflect_findings_in_return_code:
144 |             self.config.return_code_if_findings = True
145 | 
146 |         EXCLUDE_PATHS_BUILTIN = get_path_inside_package('rules/excluded_paths.json')
147 |         if user_args.excluded_paths is not None:
148 |             rules = [rule.replace('built-in', EXCLUDE_PATHS_BUILTIN) for rule in user_args.excluded_paths]
149 |             self.config.set_global_exclusion_paths(rules)
150 | 
151 |         self.config.engines = []
152 | 
153 |         REGEX_BUILTIN_RULESET = get_path_inside_package('rules/regexes.json')
154 |         if user_args.regex_rules is not None:
155 |             rules = [rule.replace('built-in', REGEX_BUILTIN_RULESET) for rule in user_args.regex_rules]
156 |             self.config.engines.append(RegexEngine)
157 |             self.config.add_ruleset(RegexRulesetBuilder, rules)
158 | 
159 |         conf_semantic_analysis = user_args.semantic_analysis
160 |         if conf_semantic_analysis is not None and conf_semantic_analysis != DISABLED:
161 |             self.config.engines.append(SemanticEngine)
162 | 
163 |         conf_hashed_ruleset = user_args.hashed_values
164 |         if conf_hashed_ruleset is not None and conf_hashed_ruleset != DISABLED:
165 |             self.config.engines.append(RegexEngine)
166 |             self.config.add_ruleset(HashedSecretsRulesetBuilder, conf_hashed_ruleset)
167 | 
168 |         conf_false_findings_ruleset = user_args.false_findings
169 |         if conf_false_findings_ruleset is not None:
170 |             self.config.add_ruleset(FalseFindingsBuilder, conf_false_findings_ruleset)
171 | 
172 |     def start(self) -> None:  # pragma: nocover
173 |         try:
174 |             self.parse_arguments()
175 |         except Exception as e:
176 |             logger.exception(e)
177 |             sys.exit(1)
178 | 
179 |         logger.info(f'Starting scan against {self.config.workdir_path} using {self.config.process_count} processes...')
180 |         if self.config.return_code_if_findings is True:
181 |             logger.info(f'[!] The tool will return code of {FINDINGS_DETECTED_RETURN_CODE} if any findings are detected\n')
182 | 
183 |         logger.info(80 * '=')
184 |         findings: List[Finding] = CliScanMode(config=self.config).run()
185 |         logger.info(80 * '=')
186 |         logger.info('Scanning finished')
187 |         logger.info(f'{len(findings)} potential secrets found')
188 |         report_path = get_abspath(self.config.output.path)
189 | 
190 |         logger.info(f'Writing report to {report_path}')
191 |         with open(report_path, 'w+') as f:
192 |             json.dump(FindingResponse.from_list(findings), f)
193 | 
194 |         logger.info('Done')
195 | 
196 |         if len(findings) > 0 and self.config.return_code_if_findings:
197 |             sys.exit(FINDINGS_DETECTED_RETURN_CODE)


--------------------------------------------------------------------------------
/deepsecrets/config.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Type
 2 | 
 3 | from pydantic import BaseModel
 4 | from deepsecrets.core.utils.cpu import CpuHelper
 5 | 
 6 | from deepsecrets.core.utils.exceptions import FileNotFoundException
 7 | from deepsecrets.core.utils.fs import get_abspath, path_exists
 8 | 
 9 | FALLBACK_PROCESS_COUNT = 4
10 | 
11 | class Output(BaseModel):
12 |     type: str
13 |     path: str
14 | 
15 | 
16 | class Config:
17 |     workdir_path: str
18 |     engines: List[Type] = []
19 |     rulesets: Dict[Type, List[str]] = {}
20 |     global_exclusion_paths: List[str] = []
21 |     output: Output
22 |     process_count: int
23 |     return_code_if_findings: bool
24 | 
25 |     def __init__(self) -> None:
26 |         self.engines = []
27 |         self.rulesets = {}
28 |         self.global_exclusion_paths = []
29 |         self.return_code_if_findings = False
30 |         # equals to CPU count
31 |         self.process_count = FALLBACK_PROCESS_COUNT
32 | 
33 |     def _set_path(self, path: str, field: str) -> None:
34 |         if not path_exists(path):
35 |             raise FileNotFoundException(f'{field} path does not exist ({path})')
36 |         setattr(self, field, get_abspath(path))
37 | 
38 |     def set_workdir(self, path: str) -> None:
39 |         self._set_path(path, 'workdir_path')
40 |     
41 |     def set_process_count(self, count: int):
42 |         if count > 0:
43 |             self.process_count = count
44 |             return
45 |         
46 |         count = CpuHelper().get_limit()
47 |         if count > 0:
48 |             self.process_count = count
49 |             return
50 |         
51 |         self.process_count = FALLBACK_PROCESS_COUNT
52 | 
53 | 
54 |     def set_global_exclusion_paths(self, paths: List[str]) -> None:
55 |         for path in paths:
56 |             if not path_exists(path):
57 |                 raise FileNotFoundException(f'global_exclusion_path does not exist ({path})')
58 |             self.global_exclusion_paths.append(path)
59 | 
60 |         self.global_exclusion_paths = list(set(self.global_exclusion_paths))
61 | 
62 |     def add_ruleset(self, type: Type, paths: List[str] = []) -> None:
63 |         self._validate_paths(paths)
64 |         self.rulesets[type] = [get_abspath(path) for path in paths]
65 | 
66 |     def _validate_paths(self, paths: List[str]) -> None:
67 |         if paths is None:
68 |             return
69 | 
70 |         for path in paths:
71 |             if path_exists(path):
72 |                 continue
73 |             raise FileNotFoundException(f'File {path} does not exist')
74 | 
75 |         return
76 | 


--------------------------------------------------------------------------------
/deepsecrets/core/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/engines/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/engines/hashed_secret.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Sequence
 2 | 
 3 | from deepsecrets.core.engines.iengine import IEngine
 4 | from deepsecrets.core.model.finding import Finding
 5 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | 
 9 | class HashedSecretEngine(IEngine):
10 |     name = 'hashed'
11 |     description = 'Scans by regex patterns provided by HashedSecretRules'
12 |     ruleset: Sequence[HashedSecretRule]
13 | 
14 |     def search(self, token: Token) -> List[Finding]:
15 |         results = []
16 |         for rule in self.ruleset:
17 |             if not self.is_rule_applicable(token=token, rule=rule):
18 |                 continue
19 | 
20 |             token.calculate_hashed_value(rule.algorithm)
21 |             results.extend(self._check_rule(token, rule))
22 | 
23 |         return results
24 | 
25 |     def is_rule_applicable(self, token: Token, rule: HashedSecretRule) -> bool:
26 |         if rule.token_length != token.length:
27 |             return False
28 |         return super().is_rule_applicable(token=token, rule=rule)
29 | 
30 |     def _check_rule(self, token: Token, rule: HashedSecretRule) -> List[Finding]:
31 |         findings: List[Finding] = []
32 | 
33 |         if token.hashed_value != rule.hashed_val:
34 |             return findings
35 | 
36 |         findings.append(
37 |             Finding(
38 |                 rules=[rule],
39 |                 detection=token.content,
40 |                 start_pos=0,
41 |                 end_pos=token.length,
42 |                 file=None,  # filled higher
43 |                 final_rule=None,  # filled higher,
44 |                 full_line=None,  # filled higher
45 |                 linum=None,  # filled higher
46 |             )
47 |         )
48 | 
49 |         return findings
50 | 


--------------------------------------------------------------------------------
/deepsecrets/core/engines/iengine.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import List
 3 | 
 4 | from deepsecrets.core.model.finding import Finding
 5 | from deepsecrets.core.model.rules.rule import Rule
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | 
 9 | class IEngine:
10 |     name: str
11 |     ruleset: List[Rule]
12 | 
13 |     def __init__(self, ruleset: List = []) -> None:
14 |         self.ruleset = ruleset
15 | 
16 |     @abstractmethod
17 |     def search(self, token: Token) -> List[Finding]:
18 |         pass
19 | 
20 |     def is_rule_applicable(self, token: Token, rule: Rule) -> bool:
21 |         file_path = token.file.path
22 |         if len(rule.applicable_file_patterns) == 0:
23 |             return True
24 | 
25 |         for file_pattern in rule.applicable_file_patterns:
26 |             matches = file_pattern.search(file_path)
27 |             if matches is not None:
28 |                 return True
29 | 
30 |         return False
31 | 
32 |     def is_token_false_positive(self, token: Token) -> bool:
33 |         for false_token in self.false_tokens:
34 |             if len(false_token.match(token.content)) > 0:
35 |                 return True
36 |         return False
37 | 
38 |     def __hash__(self) -> int:  # pragma: nocover
39 |         return hash(type(self))
40 | 
41 |     def __repr__(self) -> str:  # pragma: no cover
42 |         return self.__class__.__name__
43 | 


--------------------------------------------------------------------------------
/deepsecrets/core/engines/regex.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | 
 3 | from deepsecrets.core.engines.iengine import IEngine
 4 | from deepsecrets.core.model.finding import Finding
 5 | from deepsecrets.core.model.rules.regex import RegexRule
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | 
 9 | class RegexEngine(IEngine):
10 |     name = 'regex'
11 |     description = 'Scans by regex patterns provided by RegexRules'
12 | 
13 |     def search(self, token: Token) -> List[Finding]:
14 |         results = []
15 | 
16 |         for rule in self.ruleset:
17 |             if not self.is_rule_applicable(token=token, rule=rule):
18 |                 continue
19 | 
20 |             results.extend(self._check_rule(token, rule))  # type: ignore
21 |         return results
22 | 
23 |     def _check_rule(self, token: Token, rule: RegexRule) -> List[Finding]:
24 |         findings: List[Finding] = []
25 | 
26 |         # rule.match returns an array of (start, end) spans
27 |         detects: List[Tuple[int, int]] = rule.match(token)
28 | 
29 |         for start, end in detects:
30 |             findings.append(
31 |                 Finding(
32 |                     rules=[rule],
33 |                     detection=token.content[start:end],
34 |                     start_pos=start,
35 |                     end_pos=end,
36 |                 )
37 |             )
38 | 
39 |         return findings


--------------------------------------------------------------------------------
/deepsecrets/core/engines/semantic.py:
--------------------------------------------------------------------------------
  1 | import regex as re
  2 | from typing import List
  3 | 
  4 | from deepsecrets import logger
  5 | from deepsecrets.core.engines.iengine import IEngine
  6 | from deepsecrets.core.helpers.content_analyzer import ContentAnalyzer
  7 | from deepsecrets.core.helpers.entropy import EntropyHelper
  8 | from deepsecrets.core.model.finding import Finding
  9 | from deepsecrets.core.model.rules.rule import Rule
 10 | from deepsecrets.core.model.token import Token
 11 | 
 12 | filenames_ignorelist = [
 13 |     'package-lock.json',
 14 |     'package.json',
 15 | ]
 16 | 
 17 | false_starting_sequences = [
 18 |     '${',
 19 |     'true',
 20 |     '%env',
 21 | ]
 22 | 
 23 | useless_values = [
 24 |     'null',
 25 |     'bearer',
 26 |     'restore_password',
 27 | ]
 28 | 
 29 | var_name_showstoppers = [
 30 |     'public',
 31 |     'path',
 32 |     'location',
 33 |     'field',
 34 |     'data'
 35 | ]
 36 | 
 37 | 
 38 | class SemanticEngine(IEngine):
 39 |     name = 'semantic'
 40 |     entropy_threshold = 4.15
 41 |     dangerous_variable_regex = re.compile(
 42 |         r'(secret|passw|\bpass\b|\btoken\b|\baccess\b|\bpwd\b|rivateke|cesstoke|authkey|\bsecret\b|\bkey\b).{0,15}',
 43 |         re.IGNORECASE,
 44 |     )
 45 |     useless_value_regex = re.compile(r'^[^A-Za-z0-9]*$|^%.*%$|^\[.*\]$|^{.*}$', re.IGNORECASE)
 46 |     subengine: IEngine
 47 | 
 48 |     def __init__(self, subengine: IEngine, **kwargs) -> None:
 49 |         super().__init__(**kwargs)
 50 |         self.subengine = subengine
 51 | 
 52 |     # token is a STRING with potential 'semantic' extension
 53 |     def search(self, token: Token) -> List[Finding]:
 54 |         findings: List[Finding] = []
 55 | 
 56 |         if token.length == token.file.length:
 57 |             return findings
 58 | 
 59 |         for fname in filenames_ignorelist:
 60 |             if fname in token.file.path:
 61 |                 return findings
 62 | 
 63 |         if token.semantic is not None and token.semantic.creds_probability == 9:
 64 |             findings.append(
 65 |                 Finding(
 66 |                     detection=token.content,
 67 |                     start_pos=0,
 68 |                     end_pos=len(token.content),
 69 |                     rules=[Rule(id='S107', name='Dangerous condition', confidence=9)],
 70 |                 )
 71 |             )
 72 | 
 73 |         try:
 74 |             dangerous_variable = self._if_dangerous_variable(token)
 75 | 
 76 |             if self.subengine is not None:  # pragma: nocover
 77 |                 content_findings = ContentAnalyzer(self.subengine).analyze(token)
 78 |                 if content_findings is not None:
 79 |                     findings.extend(content_findings)
 80 | 
 81 |             if not dangerous_variable:
 82 |                 return findings
 83 | 
 84 |             if len(token.content) == 1:
 85 |                 return findings
 86 | 
 87 |             if len(token.content.split(' ')) > 1:
 88 |                 return findings
 89 | 
 90 |             if token.content in useless_values:
 91 |                 return findings
 92 | 
 93 |             if len(re.findall(self.useless_value_regex, token.content)) > 0:
 94 |                 return findings
 95 | 
 96 |             entropy = EntropyHelper.get_for_string(token.content)
 97 |             if self._is_high_entropy(entropy):
 98 |                 findings.append(
 99 |                     Finding(
100 |                         detection=token.content,
101 |                         start_pos=0,
102 |                         end_pos=len(token.content),
103 |                         rules=[Rule(id='S105', name='Entropy+Var naming', confidence=-1)],
104 |                     )
105 |                 )
106 |             else:
107 |                 for fss in false_starting_sequences:
108 |                     if token.content.startswith(fss):
109 |                         return findings
110 | 
111 |                 findings.append(
112 |                     Finding(
113 |                         detection=token.content,
114 |                         start_pos=0,
115 |                         end_pos=len(token.content),
116 |                         rules=[Rule(id='S106', name='Var naming', confidence=-1)],
117 |                     )
118 |                 )
119 | 
120 |         except Exception as e:
121 |             logger.error('Problem during Entropy check on token')
122 | 
123 |         return findings
124 | 
125 |     def _is_high_entropy(self, entropy: float) -> bool:
126 |         return True if entropy > self.entropy_threshold else False
127 | 
128 |     def _if_dangerous_variable(self, token: Token) -> bool:
129 |         if token.semantic is None:
130 |             return False
131 | 
132 |         if token.semantic.creds_probability == 9:
133 |             return True
134 | 
135 |         cleaned_up_varname, name_parts = self.normalize_punctuation(token.semantic.name)
136 |         badvar = self.dangerous_variable_regex.findall(cleaned_up_varname)
137 |         if len(badvar) == 0:
138 |             return False
139 | 
140 |         if any(part in var_name_showstoppers for part in name_parts):
141 |             return False
142 | 
143 |         return True
144 | 
145 |     def normalize_punctuation(self, string: str):
146 |         normalized = string.replace(' ', '_').replace('-', ' ').replace('_', ' ')
147 |         parts = self.__camel_case_divide(normalized).split(' ')
148 |         return normalized.lower(), parts
149 | 
150 |     def __camel_case_divide(self, string: str):
151 |         final = ''
152 |         for i, _ in enumerate(string):
153 |             final += string[i].lower()
154 |             if i == len(string) - 1:
155 |                 continue
156 | 
157 |             if string[i].islower() and string[i+1].isupper():
158 |                 final += ' '
159 |         return final


--------------------------------------------------------------------------------
/deepsecrets/core/helpers/content_analyzer.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | from typing import Callable, List, Optional
 3 | 
 4 | from deepsecrets.core.engines.iengine import IEngine
 5 | from deepsecrets.core.model.finding import Finding
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | 
 9 | class ContentAnalyzer:
10 |     engine: IEngine
11 |     flags: dict[str, bool]
12 |     token: Token
13 |     uncover_tactics: List[Callable]
14 | 
15 |     def __init__(self, engine: IEngine) -> None:
16 |         self.engine = engine
17 |         self.uncover_tactics = [self._check_by_base64]
18 | 
19 |     def analyze(self, token: Token) -> List[Finding]:
20 |         self.token = token
21 |         self.uncover()
22 |         return self.engine.search(self.token) if self.engine is not None else []
23 | 
24 |     def uncover(self) -> None:
25 |         for tactic in self.uncover_tactics:
26 |             uncovered_str = tactic()
27 |             if uncovered_str is None:
28 |                 continue
29 | 
30 |             if len(uncovered_str) < 5:
31 |                 continue
32 | 
33 |             self.token.uncovered_content.append(uncovered_str)
34 | 
35 |     def _check_by_base64(self) -> Optional[str]:
36 |         try:
37 |             return base64.b64decode(self.token.content).decode('UTF-8')
38 |         except Exception:
39 |             return None
40 | 


--------------------------------------------------------------------------------
/deepsecrets/core/helpers/entropy.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import regex as re
 3 | from enum import Enum
 4 | from typing import Optional
 5 | 
 6 | 
 7 | class IteratorType(Enum):
 8 |     BASE64 = 'base64'
 9 |     HEX = 'hex'
10 | 
11 | 
12 | class EntropyHelper:
13 |     B64_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
14 |     B64_REGEX = re.compile(r'[A-Za-z0123456789+/=]{20,}')
15 | 
16 |     HEX_CHARS = '1234567890abcdefABCDEF'
17 |     HEX_REGEX = re.compile(r'[A-Fa-f0123456789]{20,}')
18 | 
19 |     @classmethod
20 |     def get_for_string(cls, str: str, with_iterator: Optional[str] = None) -> float:
21 |         iterator = None
22 |         i_type = None
23 |         if with_iterator is not None:
24 |             i_type = IteratorType(with_iterator)
25 | 
26 |         if i_type == IteratorType.BASE64:
27 |             iterator = cls.B64_CHARS
28 | 
29 |         if i_type == IteratorType.HEX:
30 |             iterator = cls.HEX_CHARS
31 | 
32 |         return cls._shannon_entropy(str, iterator)
33 | 
34 |     @classmethod
35 |     def _shannon_entropy(cls, data: str, iterator: Optional[str] = None) -> float:
36 |         """
37 |         Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html
38 |         """
39 |         if not data:
40 |             return 0
41 |         entropy = 0
42 |         if iterator:
43 |             for x in iterator:
44 |                 p_x = float(data.count(x)) / len(data)
45 |                 if p_x > 0:
46 |                     entropy += -p_x * math.log(p_x, 2)
47 |             return entropy
48 | 
49 |         unique_base = set(data)
50 |         M = len(data)
51 |         entropy_list = []
52 |         # Number of residues in column
53 |         for base in unique_base:
54 |             n_i = data.count(base)  # Number of residues of type i
55 |             P_i = n_i / float(M)  # n_i(Number of residues of type i) / M(Number of residues in column)
56 |             entropy_i = P_i * (math.log(P_i, 2))
57 |             entropy_list.append(entropy_i)
58 | 
59 |         entropy = -(sum(entropy_list))
60 | 
61 |         return entropy
62 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/__init__.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.finding import Finding
2 | 
3 | Finding.update_forward_refs()
4 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/file.py:
--------------------------------------------------------------------------------
  1 | import regex as re
  2 | from typing import Dict, Optional, Tuple
  3 | 
  4 | from deepsecrets import logger
  5 | from deepsecrets.core.utils.fs import get_abspath
  6 | 
  7 | 
  8 | class File:
  9 |     relative_path: str
 10 |     path: str
 11 |     content: str = ''
 12 |     length: int
 13 |     line_offsets: Dict[int, Tuple[int, int]] = {}
 14 |     line_contents_cache: Dict[int, str] = {}
 15 |     empty: bool
 16 |     name: str
 17 |     extension: Optional[str]
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         path: str,
 22 |         relative_path: Optional[str] = None,
 23 |         content: Optional[str] = None,
 24 |         offsets: Optional[Dict] = None,
 25 |     ) -> None:
 26 |         self.line_offsets = {}
 27 |         self.line_contents_cache = {}
 28 | 
 29 |         if path is not None:
 30 |             self.path = get_abspath(path)
 31 | 
 32 |         self.relative_path = relative_path if relative_path is not None else self.path
 33 | 
 34 |         if content is not None:
 35 |             self.content = content
 36 |         else:
 37 |             try:
 38 |                 self.content = self._get_contents()
 39 |             except Exception as e:
 40 |                 logger.error('Error during fetching file contents')
 41 | 
 42 |         self.length = len(self.content)
 43 | 
 44 |         self.name = self._get_name()
 45 |         self.extension = self._get_extension()
 46 |         self.empty = True if self.length == 0 else False
 47 | 
 48 |         if offsets is not None:
 49 |             self.line_offsets = offsets
 50 | 
 51 |         if not self.empty and len(self.line_offsets) == 0:
 52 |             self._calc_offsets()
 53 | 
 54 |     def _get_name(self) -> str:
 55 |         by_slash = self.path.split('/')
 56 |         return by_slash[-1].split('.')[0]
 57 | 
 58 | 
 59 |     def _get_extension(self) -> Optional[str]:
 60 |         by_dot = self.path.split('.')
 61 |         if len(by_dot) == 1:
 62 |             return None
 63 | 
 64 |         return by_dot[-1]
 65 | 
 66 |     def _calc_offsets(self) -> None:
 67 |         line_breaks = [i.start() for i in re.finditer('\n', self.content)]
 68 |         for i, lb in enumerate(line_breaks):
 69 |             start = line_breaks[i - 1] + 1 if i > 0 else 0
 70 |             self.line_offsets[i + 1] = (start, lb)
 71 | 
 72 |         if len(self.line_offsets) == 0 and self.length > 0:
 73 |             self.line_offsets[1] = (0, self.length)
 74 | 
 75 |     def _get_contents(self) -> str:
 76 |         with open(self.path) as f:
 77 |             raw = f.read()
 78 |             if raw[-1] != '\n':
 79 |                 raw += '\n'
 80 |             return raw
 81 | 
 82 |     def get_line_number(self, position: int) -> Optional[int]:
 83 |         return self._get_line_number_for_position(position=position)
 84 | 
 85 |     def _get_line_number_for_position(self, position: int) -> Optional[int]:
 86 |         for linum, offsets in self.line_offsets.items():
 87 |             if offsets[1] < position:
 88 |                 continue
 89 |             return linum
 90 |         return None
 91 | 
 92 |     def get_line_contents(self, line_number: int) -> Optional[str]:
 93 |         if line_number is None:
 94 |             return
 95 | 
 96 |         if line_number not in self.line_contents_cache:
 97 |             self.line_contents_cache[line_number] = self.content[
 98 |                 self.line_offsets[line_number][0]:self.line_offsets[line_number][1]
 99 |             ]
100 |         return self.line_contents_cache[line_number]
101 | 
102 |     def get_full_line_for_position(self, span_end: int) -> Optional[str]:
103 |         linum = self._get_line_number_for_position(span_end)
104 |         if linum is None:
105 |             return None
106 | 
107 |         return self.get_line_contents(linum)
108 | 
109 |     def get_span_for_string(self, str: str, between: Optional[Tuple[int, int]] = None) -> Optional[Tuple[int, int]]:
110 |         if between is None:
111 |             between = (0, self.length)
112 | 
113 |         search_window = self.content[between[0] : between[1]]
114 | 
115 |         pattern = re.escape(str)
116 |         pattern = pattern.replace('\\\n', '\n').replace('\\\t', '\t')
117 |         detects = re.finditer(pattern, search_window)
118 |         for detect in detects:
119 |             span = detect.span()
120 |             return (between[0] + span[0], between[0] + span[1])
121 |         return None
122 | 
123 |     def __repr__(self) -> str:  # pragma: no cover
124 |         return self.path
125 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/finding.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from hashlib import sha256
  4 | from typing import Any, Dict, List, Optional
  5 | 
  6 | from pydantic import BaseModel, Field, PrivateAttr
  7 | 
  8 | from deepsecrets.core.model.file import File
  9 | from deepsecrets.core.model.rules.rule import Rule
 10 | 
 11 | 
 12 | class Finding(BaseModel):
 13 |     file: Optional['File'] = Field(default=None)
 14 |     rules: List[Rule] = Field(default=[])
 15 |     detection: str
 16 |     full_line: Optional[str] = Field(default=None)
 17 |     linum: Optional[int] = Field(default=None)
 18 |     start_pos: int
 19 |     end_pos: int
 20 |     reason: str = Field(default='')
 21 |     final_rule: Optional[Rule] = Field(default=None)
 22 |     _mapped_on_file: bool = PrivateAttr(default=False)
 23 | 
 24 |     def map_on_file(self, relative_start: int, file: Optional['File'] = None) -> None:
 25 |         if self._mapped_on_file:
 26 |             return
 27 | 
 28 |         if file is None and self.file is None:
 29 |             raise Exception('No file to match on')
 30 |         if self.file is None:
 31 |             self.file = file
 32 | 
 33 |         self.start_pos += relative_start
 34 |         self.end_pos += relative_start
 35 |         self.linum = self.file.get_line_number(self.end_pos)
 36 |         if not self.full_line:
 37 |             self.full_line = self.file.get_line_contents(self.linum)
 38 |         self._mapped_on_file = True
 39 | 
 40 |     def get_reason(self) -> str:
 41 |         if self.final_rule is None:
 42 |             self.choose_final_rule()
 43 | 
 44 |         return f'{self.final_rule.name} | {self.get_fingerprint()}'  # type: ignore
 45 | 
 46 |     def get_fingerprint(self) -> str:
 47 |         return sha256(self.detection.encode('utf-8')).hexdigest()[23:33]
 48 | 
 49 |     class Config:
 50 |         arbitrary_types_allowed = True
 51 | 
 52 |     def choose_final_rule(self) -> None:
 53 |         self.final_rule = sorted(
 54 |             self.rules, key=lambda r: r.confidence,
 55 |             reverse=True
 56 |         )[0]
 57 | 
 58 |     def __hash__(self) -> int:  # pragma: nocover
 59 |         if not self.file:
 60 |             raise Exception()
 61 | 
 62 |         return hash(f'{self.file.path}{self.detection}{self.start_pos}{self.end_pos}')
 63 | 
 64 |     def __eq__(self, other: Any) -> bool:
 65 |         if not isinstance(other, Finding):
 66 |             return False
 67 | 
 68 |         if other.file and self.file:
 69 |             if other.file.path != self.file.path:
 70 |                 return False
 71 | 
 72 |         if other.detection and self.detection:
 73 |             if other.detection != self.detection:
 74 |                 return False
 75 | 
 76 |         if other.start_pos and self.start_pos:
 77 |             if other.start_pos != self.start_pos:
 78 |                 return False
 79 | 
 80 |         if other.end_pos and self.end_pos:
 81 |             if other.end_pos != self.end_pos:
 82 |                 return False
 83 | 
 84 |         return True
 85 | 
 86 |     def merge(self, other: Any) -> bool:
 87 |         if not isinstance(other, Finding):
 88 |             return False
 89 | 
 90 |         if other != self:
 91 |             return False
 92 | 
 93 |         self.rules.extend(other.rules)
 94 |         self.rules = list(set(self.rules))
 95 | 
 96 |         return True
 97 | 
 98 | 
 99 | class FindingMerger:
100 |     all: List[Finding]
101 | 
102 |     def __init__(self, full_list: List[Finding]) -> None:
103 |         self.all = full_list
104 | 
105 |     def merge(self) -> List[Finding]:
106 |         interm_dict: Dict[int, Finding] = {}
107 | 
108 |         for elem in self.all:
109 |             hash = elem.__hash__()
110 |             if hash not in interm_dict:
111 |                 interm_dict[hash] = elem
112 | 
113 |             interm_dict[hash].merge(elem)
114 | 
115 |         return list(interm_dict.values())
116 | 
117 | 
118 | class FindingResponse:
119 |     @classmethod
120 |     def from_list(cls, list: List[Finding]) -> Dict[str, List[Dict]]:
121 |         resp: Dict[str, List[Dict]] = {}
122 |         for finding in list:
123 |             if finding.file is None:
124 |                 continue
125 | 
126 |             if finding.file.path not in resp:
127 |                 resp[finding.file.path] = []
128 | 
129 |             resp[finding.file.path].append(FindingApiModel.from_finding(finding).dict())
130 |         return resp
131 | 
132 | 
133 | class FindingApiModel(BaseModel):
134 |     line: str
135 |     string: str
136 |     line_number: int
137 |     rule: str
138 |     reason: str
139 |     confidence: int
140 |     fingerprint: str
141 | 
142 |     @classmethod
143 |     def from_finding(cls, finding: Finding) -> FindingApiModel:
144 |         finding.choose_final_rule()
145 |         return FindingApiModel(
146 |             line=finding.full_line,
147 |             string=finding.detection,
148 |             line_number=finding.linum,
149 |             rule=finding.final_rule.id,
150 |             reason=finding.get_reason(),
151 |             confidence=finding.final_rule.confidence,
152 |             fingerprint=finding.get_fingerprint(),
153 |         )
154 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/model/rules/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/exlcuded_path.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from pydantic import BaseModel
 3 | from deepsecrets.core.model.rules.regex import RegexRuleWithoutId
 4 | 
 5 | 
 6 | class ExcludePathRule(RegexRuleWithoutId):
 7 |     disabled: bool = False
 8 | 
 9 | 
10 | class ExcludePatternsList(BaseModel):
11 |     __root__: List[ExcludePathRule]
12 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/false_finding.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.rules.regex import RegexRuleWithoutId
2 | 
3 | 
4 | class FalseFindingRule(RegexRuleWithoutId):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/hashed_secret.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | import mmh3
 4 | from pydantic import root_validator
 5 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm
 6 | 
 7 | from deepsecrets.core.model.rules.rule import Rule
 8 | 
 9 | 
10 | class HashedSecretRule(Rule):
11 |     hashed_val: str
12 |     token_length: int
13 |     algorithm: HashingAlgorithm
14 | 
15 |     def __eq__(self, other: Any) -> bool:
16 |         if not isinstance(other, HashedSecretRule):
17 |             return False
18 | 
19 |         if other.hashed_val == self.hashed_val:
20 |             return True
21 | 
22 |         if other.id == self.id:
23 |             return True
24 | 
25 |         return False
26 | 
27 |     def __hash__(self) -> int:  # pragma: nocover
28 |         return hash(self.hashed_val)
29 | 
30 |     @root_validator(pre=True)
31 |     def fill_id(cls, values: Dict) -> Dict:
32 |         hashed_val = values.get('hashed_val', None)
33 |         if hashed_val is None:
34 |             return values
35 | 
36 |         algorithm = values.get('algorithm', None)
37 |         
38 |         if algorithm is None:
39 |             values['algorithm'] = HashingAlgorithm.SHA_512
40 |         else:
41 |             try:
42 |                 values['algorithm'] = HashingAlgorithm(algorithm)
43 |             except:
44 |                 raise Exception(f'Unsupported hashing algorithm: {algorithm}')
45 | 
46 | 
47 |         if values.get('id', None) is None:
48 |             int_hash = abs(mmh3.hash(hashed_val))
49 |             first_3 = str(int_hash)[:3]
50 |             last_2 = str(int_hash)[-2:]
51 |             values['id'] = f'S{first_3}{last_2}'
52 | 
53 |         return values
54 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/hashing.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | 
3 | 
4 | class HashingAlgorithm(Enum):
5 |     SHA_1 = 'sha1'
6 |     SHA_256 = 'sha256'
7 |     SHA_512 = 'sha512'
8 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/regex.py:
--------------------------------------------------------------------------------
 1 | import regex as re
 2 | from typing import Dict, ForwardRef, List, Optional, Union
 3 | 
 4 | from pydantic import Field, root_validator
 5 | 
 6 | from deepsecrets.core.helpers.entropy import EntropyHelper
 7 | from deepsecrets.core.model.rules.rule import Rule
 8 | from deepsecrets.core.model.token import Token
 9 | 
10 | RegexRule = ForwardRef('RegexRule')
11 | 
12 | 
13 | class RegexRule(Rule):  # type: ignore
14 |     pattern: re.Pattern
15 |     match_rules: Optional[Dict[int, RegexRule]] = Field(default={})  # type: ignore
16 |     target_group: int = Field(default=0)
17 |     entropy_settings: Optional[float] = Field(default=None)
18 |     escaping_needed: bool = False
19 | 
20 |     class Config:
21 |         arbitrary_types_allowed = True
22 |         json_encoders = {
23 |             re.Pattern: lambda v: v.pattern,
24 |         }
25 | 
26 |     @root_validator(pre=True)
27 |     def build_pattern(cls, values: Dict) -> Dict:
28 |         pattern_str = values.get('pattern', None)
29 |         if pattern_str is not None and isinstance(pattern_str, str):
30 |             escaping_needed = values.get('escaping_needed', False)
31 |             if escaping_needed:
32 |                 pattern_str = re.escape(pattern_str)
33 | 
34 |             values['pattern'] = re.compile(pattern_str, re.IGNORECASE)
35 | 
36 |         match_rules = values.get('match_rules', {})
37 |         for _, match_rule in match_rules.items():
38 |             match_rule['id'] = ''
39 |             match_rule['confidence'] = 9
40 | 
41 |         return values
42 | 
43 |     def __hash__(self) -> int:  # pragma: nocover
44 |         return hash(self.id)
45 | 
46 |     def match(self, token: Union[Token, str]) -> List[re.Match]:
47 |         good_matches = []
48 |         contents = []
49 |         contents.append(token.content if isinstance(token, Token) else token)
50 |         contents.extend(token.uncovered_content if isinstance(token, Token) else [])
51 | 
52 |         for i, content in enumerate(contents):
53 |             matches = re.finditer(self.pattern, content)
54 | 
55 |             for match in matches:
56 |                 if not self._verify(match):
57 |                     continue
58 | 
59 |                 good_matches.append(match.span(self.target_group) if i == 0 else (0, len(contents[0])))
60 | 
61 |         return good_matches
62 | 
63 |     def _verify(self, match: re.Match) -> bool:
64 |         match_ok = True
65 |         entropy_ok = True
66 | 
67 |         if self.match_rules is not None:
68 |             for group_i, match_rule in self.match_rules.items():
69 |                 span = match.span(group_i)
70 |                 window = match.string[span[0] : span[1]]
71 |                 if not match_rule.match(window):  # type: ignore
72 |                     match_ok = False
73 |                     return False
74 | 
75 |         if self.entropy_settings is not None:
76 |             span = match.span(self.target_group)
77 |             str_to_check = match.string[span[0] : span[1]]
78 |             ent = EntropyHelper.get_for_string(str_to_check)
79 |             if ent < self.entropy_settings:
80 |                 entropy_ok = False
81 | 
82 |         return match_ok and entropy_ok
83 | 
84 | 
85 | RegexRule.update_forward_refs()  # type: ignore
86 | 
87 | 
88 | class RegexRuleWithoutId(RegexRule):
89 |     id: Optional[str] = Field(default=None)
90 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/rule.py:
--------------------------------------------------------------------------------
 1 | import regex as re
 2 | from typing import Dict, List, Optional
 3 | 
 4 | from pydantic import BaseModel, Field, root_validator
 5 | 
 6 | 
 7 | class Rule(BaseModel):
 8 |     id: str
 9 |     name: Optional[str]
10 |     confidence: int = Field(default=9)
11 |     applicable_file_patterns: List[re.Pattern] = Field(default=[])
12 | 
13 |     @root_validator(pre=True)
14 |     def fill_confidence(cls, values: Dict) -> Dict:
15 |         file_patterns = values.get('applicable_file_patterns', [])
16 |         if len(file_patterns) > 0:
17 |             pattеrns = [re.compile(p) for p in file_patterns]
18 |             values['applicable_file_patterns'] = pattеrns
19 | 
20 |         if values.get('confidence', None) is None and values.get('id') is not None:
21 |             values['confidence'] = 9
22 | 
23 |         return values
24 | 
25 |     def __hash__(self) -> int:  # pragma: nocover
26 |         return hash(self.id)
27 | 
28 |     class Config:
29 |         arbitrary_types_allowed = True
30 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/rules/semantic.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.rules.rule import Rule
2 | 
3 | 
4 | class SemanticRule(Rule):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/semantic.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | from deepsecrets.core.model.token import Token
 3 | 
 4 | 
 5 | class Variable:
 6 |     name: Token
 7 |     value: Token
 8 |     span: List[int]
 9 |     found_by: 'VaribleDetector'
10 | 
11 | 
12 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.detector import (
13 |     VaribleDetector,
14 | )
15 | 


--------------------------------------------------------------------------------
/deepsecrets/core/model/token.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from enum import Enum
 4 | from typing import List, Optional, Type
 5 | 
 6 | from deepsecrets.core.model.file import File
 7 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm
 8 | from deepsecrets.core.utils.hashing import get_hash
 9 | 
10 | 
11 | class SemanticType(Enum):
12 |     VAR = 1
13 | 
14 | 
15 | class Semantic:
16 |     type: SemanticType
17 |     name: str
18 |     creds_probability: int
19 | 
20 |     def __init__(self, type: SemanticType, name: str, creds_probability: int = 0) -> None:
21 |         self.type = type
22 |         self.name = name
23 |         self.creds_probability = creds_probability
24 | 
25 | 
26 | class Token:
27 |     content: str
28 |     uncovered_content: List[str]
29 |     span: List[int]
30 |     file: 'File'
31 |     type: List[Type]
32 |     length: int
33 |     hashed_value: Optional[str]
34 |     semantic: Optional[Semantic]
35 |     previous: Optional['Token']
36 |     next: Optional['Token']
37 | 
38 |     def __init__(self, file: File, content: Optional[str] = None, span: Optional[List[int]] = None) -> None:
39 |         self.file = file
40 |         self.content = content
41 |         self.span = span
42 |         self.length = len(content) if self.content else 0
43 |         self.hashed_value = None
44 |         self.previous = None
45 |         self.next = None
46 |         self.type: List[Type] = []  # type: ignore
47 |         self.semantic = None
48 |         self.uncovered_content = []
49 | 
50 |     def set_type(self, type: List[Type]) -> None:
51 |         self.type = type  # type: ignore
52 | 
53 |     def val_hash(self) -> int:
54 |         return hash(self.content)
55 | 
56 |     def calculate_hashed_value(self, algorithm: HashingAlgorithm) -> None:
57 |         if self.hashed_value:
58 |             return
59 | 
60 |         self.hashed_value = get_hash(payload=self.content, algorithm=algorithm)
61 | 
62 |     def __repr__(self) -> str:  # pragma: no cover
63 |         if self.semantic is None and self.type is not None:
64 |             return f'{self.content} | {self.type[0]}\n'
65 | 
66 |         out = f'======== VAR: {self.semantic.name} = {self.content}'  # type: ignore
67 |         if self.type is not None:
68 |             out += f' | {self.type[0]}\n'
69 | 
70 |         return out
71 | 


--------------------------------------------------------------------------------
/deepsecrets/core/modes/iscan_mode.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from multiprocessing import get_context
  3 | import os
  4 | from abc import abstractmethod, abstractstaticmethod
  5 | from datetime import datetime
  6 | from functools import partial
  7 | from multiprocessing.pool import Pool
  8 | from typing import Any, Callable, List, Optional, Type
  9 | import regex as re
 10 | 
 11 | from dotwiz import DotWiz
 12 | 
 13 | from deepsecrets import PLATFORM, PROFILER_ON, logger
 14 | from deepsecrets.config import Config
 15 | from deepsecrets.core.model.finding import Finding, FindingMerger
 16 | from deepsecrets.core.model.rules.exlcuded_path import ExcludePathRule
 17 | from deepsecrets.core.rulesets.excluded_paths import ExcludedPathsBuilder
 18 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder
 19 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer
 20 | from deepsecrets.core.utils.fs import get_abspath
 21 | 
 22 | 
 23 | class ScanMode:
 24 |     config: Config
 25 |     filepaths: List[str]
 26 |     path_exclusion_rules: List[ExcludePathRule] = []
 27 |     file_analyzer: FileAnalyzer
 28 |     pool_engine: Type
 29 | 
 30 |     def __init__(self, config: Config, pool_engine: Optional[Any] = None) -> None:
 31 |         if pool_engine is None:
 32 |             if PLATFORM == 'Darwin':
 33 |                 self.pool_engine = get_context('fork').Pool
 34 |             else:
 35 |                 self.pool_engine = Pool
 36 |         else:
 37 |             self.pool_engine = pool_engine
 38 | 
 39 |         self.config = config
 40 |         self.filepaths = self._get_files_list()
 41 |         self.prepare_for_scan()
 42 | 
 43 |     def _get_process_count_for_runner(self) -> int:
 44 |         limit = self.config.process_count
 45 | 
 46 |         file_count = len(self.filepaths)
 47 |         if file_count == 0:
 48 |             return 0
 49 |         return limit if file_count >= limit else file_count
 50 | 
 51 |     def run(self) -> List[Finding]:
 52 |         final: List[Finding] = []
 53 | 
 54 |         bundle = self.analyzer_bundle()
 55 |         proc_count = self._get_process_count_for_runner()
 56 |         if proc_count == 0:
 57 |             return final
 58 | 
 59 |         if PROFILER_ON:
 60 |             for file in self.filepaths:
 61 |                 final.extend(self._per_file_analyzer(file=file, bundle=bundle))
 62 |         else:
 63 |             with self.pool_engine(processes=proc_count) as pool:
 64 |                 per_file_findings: List[List[Finding]] = pool.map(
 65 |                     partial(pool_wrapper, bundle, self._per_file_analyzer),
 66 |                     self.filepaths,
 67 |                 )  # type: ignore
 68 |                 
 69 |             for file_findings in list(per_file_findings):
 70 |                 if not file_findings:
 71 |                     continue
 72 |                 final.extend(file_findings)
 73 | 
 74 |         fin = FindingMerger(final).merge()
 75 |         fin = self.filter_false_positives(fin)
 76 |         return fin
 77 | 
 78 |     def _get_files_list(self) -> List[str]:
 79 |         flist = []
 80 |         if not self.path_exclusion_rules:
 81 |             excl_paths_builder = ExcludedPathsBuilder()
 82 |             for path in self.config.global_exclusion_paths:
 83 |                 excl_paths_builder.with_rules_from_file(path)
 84 | 
 85 |             self.path_exclusion_rules = excl_paths_builder.rules
 86 | 
 87 |         for fpath, _, files in os.walk(get_abspath(self.config.workdir_path)):
 88 |             for filename in files:
 89 |                 full_path = os.path.join(fpath, filename)
 90 |                 rel_path = full_path.replace(f'{self.config.workdir_path}/', '')
 91 |                 if not self._path_included(rel_path):
 92 |                     continue
 93 | 
 94 |                 flist.append(full_path)
 95 | 
 96 |         return flist
 97 | 
 98 |     def _path_included(self, path: str) -> bool:
 99 |         if self.path_exclusion_rules is None or len(self.path_exclusion_rules) == 0:
100 |             return True
101 | 
102 |         if any(excl_rule.match(path) for excl_rule in self.path_exclusion_rules):
103 |             return False
104 |         return True
105 | 
106 |     @abstractmethod
107 |     def prepare_for_scan(self) -> None:
108 |         pass
109 | 
110 |     def analyzer_bundle(self) -> DotWiz:
111 |         return DotWiz(
112 |             workdir=self.config.workdir_path,
113 |             path_exclusion_rules=self.path_exclusion_rules,
114 |             engines={}
115 |         )
116 | 
117 |     @abstractstaticmethod
118 |     def _per_file_analyzer(bundle, file: Any) -> List[Finding]:  # type: ignore
119 |         pass
120 | 
121 |     def filter_false_positives(self, results: List[Finding]) -> List[Finding]:
122 |         false_finding_rules = self.rulesets.get(FalseFindingsBuilder.ruleset_name)
123 |         if false_finding_rules is None:
124 |             return results
125 |         
126 | 
127 |         final: List[Finding] = []       
128 |         for result in results:
129 |             good_result = True
130 |             for false_pattern in false_finding_rules:
131 |                 if re.match(false_pattern.pattern, result.detection) is not None:
132 |                     good_result = False
133 |                     break
134 |             if not good_result:
135 |                 continue
136 | 
137 |             final.append(result)
138 | 
139 |         return final
140 | 
141 | 
142 | def pool_wrapper(bundle: DotWiz, runner: Callable, file: str) -> List[Finding]:  # pragma: nocover
143 |     start_ts = datetime.now()
144 |     result = runner(bundle, file)
145 |     if logger.level == logging.DEBUG:
146 |         logger.debug(
147 |             f' ✓ [{file}] {(datetime.now() - start_ts).total_seconds()}s elapsed \t {len(result)} potential findings'
148 |         )
149 |     else:
150 |         logger.info(f' ✓ [{file}] \t {len(result)} potential findings')
151 |     return result
152 | 


--------------------------------------------------------------------------------
/deepsecrets/core/rulesets/excluded_paths.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.rules.exlcuded_path import ExcludePathRule
2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder
3 | 
4 | 
5 | class ExcludedPathsBuilder(IRulesetBuilder):
6 |     rule_model = ExcludePathRule
7 |     ruleset_name = 'excluded_paths'
8 | 


--------------------------------------------------------------------------------
/deepsecrets/core/rulesets/false_findings.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.rules.false_finding import FalseFindingRule
2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder
3 | 
4 | 
5 | class FalseFindingsBuilder(IRulesetBuilder):
6 |     rule_model = FalseFindingRule
7 |     ruleset_name = 'false_findings'
8 | 


--------------------------------------------------------------------------------
/deepsecrets/core/rulesets/hashed_secrets.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import tarfile
 3 | from os.path import exists
 4 | 
 5 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule
 6 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder
 7 | 
 8 | 
 9 | class HashedSecretsRulesetBuilder(IRulesetBuilder):
10 |     rule_model = HashedSecretRule
11 |     ruleset_name = 'hashed'
12 | 
13 |     def with_rules_from_file(self, file: str, compressed: bool = False) -> object:
14 |         rules_raw = None
15 |         true_file = file
16 |         if compressed:
17 |             if not exists(file):
18 |                 return
19 | 
20 |             with tarfile.open(file, 'r:gz') as tar:
21 |                 true_file = tar.extractfile('secrets').read()
22 | 
23 |         with open(true_file) as sec:
24 |             rules_raw = json.load(sec)
25 | 
26 |         rules_set = set()
27 |         for secret in rules_raw:
28 |             path = secret.get('path')
29 |             if path is not None:
30 |                 path = '/'.join(path.split('/')[1:3])
31 |                 if 'non-prod' in path:
32 |                     continue
33 | 
34 |             rules_set.add(
35 |                 HashedSecretRule(
36 |                     id=None,  # calculated inside the constructor  # type: ignore
37 |                     name=f'{path}:{secret["name"]}',
38 |                     hashed_val=secret['hash'],
39 |                     algorithm=secret['algorithm'],
40 |                     token_length=secret['length'],
41 |                     confidence=9,
42 |                 )
43 |             )
44 | 
45 |         self.rules = list(rules_set)
46 |         return self
47 | 


--------------------------------------------------------------------------------
/deepsecrets/core/rulesets/ibuilder.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import List, Type
 3 | 
 4 | from deepsecrets.core.model.rules.rule import Rule
 5 | 
 6 | 
 7 | class IRulesetBuilder:
 8 |     rules: List[Rule]
 9 |     rule_model: Type
10 |     ruleset_name = 'rules'
11 | 
12 |     def __init__(self) -> None:
13 |         self.rules = []
14 | 
15 |     def with_rules_from_file(self, file: str) -> object:
16 |         rules_raw = None
17 |         with open(file) as f:
18 |             rules_raw = json.load(f)
19 | 
20 |         self.rules.extend([self.rule_model(**rule) for rule in rules_raw])
21 |         return self
22 | 
23 |     @property
24 |     def high_confidence_rules(self) -> List[Rule]:
25 |         return [rule for rule in self.rules if rule.confidence == 9]
26 | 


--------------------------------------------------------------------------------
/deepsecrets/core/rulesets/regex.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.model.rules.regex import RegexRule
2 | from deepsecrets.core.rulesets.ibuilder import IRulesetBuilder
3 | 
4 | 
5 | class RegexRulesetBuilder(IRulesetBuilder):
6 |     rule_model = RegexRule
7 |     ruleset_name = 'regex'
8 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/__init__.py:
--------------------------------------------------------------------------------
1 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
2 | from deepsecrets.core.tokenizers.per_line import PerLineTokenizer
3 | 
4 | fallback_ladder = {LexerTokenizer: PerLineTokenizer}
5 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/full_content.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer
 4 | from deepsecrets.core.model.file import File
 5 | from deepsecrets.core.model.token import Token
 6 | 
 7 | 
 8 | class FullContentTokenizer(Tokenizer):
 9 |     def tokenize(self, file: File) -> List[Token]:
10 |         return [Token(file=file, content=file.content, span=[0, file.length])]
11 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/semantic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/semantic/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/semantic/language.py:
--------------------------------------------------------------------------------
 1 | from aenum import MultiValueEnum
 2 | 
 3 | 
 4 | class Language(MultiValueEnum):
 5 |     PYTHON = 'py'
 6 |     GOLANG = 'go'
 7 |     PHP = 'php'
 8 |     JS = 'js','jsx'
 9 |     TOML = 'toml'
10 |     JSON = 'json'
11 |     YAML = 'yaml'
12 |     INI = 'ini'
13 |     PUPPET = 'pp'
14 |     SHELL = 'sh'
15 |     CSHARP = 'cs'
16 |     JAVA = 'java'
17 |     KOTLIN = 'kt'
18 |     SWIFT = 'swift'
19 | 
20 |     ANY = 'any'
21 |     UNKNOWN = 'unknown'
22 | 
23 |     @classmethod
24 |     def from_text(cls, text: str) -> object:
25 |         ext = text.split('.')[-1]
26 |         return cls(ext)
27 | 
28 |     @classmethod
29 |     def _missing_(cls, value: str) -> object:
30 |         return Language.UNKNOWN
31 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/semantic/var_detection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/tokenizers/helpers/semantic/var_detection/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/semantic/var_detection/detector.py:
--------------------------------------------------------------------------------
  1 | import regex as re
  2 | from typing import Any, Dict, List, Optional
  3 | 
  4 | from pydantic import BaseModel, Field, validator
  5 | 
  6 | from deepsecrets.core.model.token import Token
  7 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language
  8 | 
  9 | 
 10 | class Match(BaseModel):
 11 |     types: List[Any] = Field(default_factory=list)
 12 |     values: List[re.Pattern] = Field(default_factory=list)
 13 |     not_values: List[re.Pattern] = Field(default_factory=list)
 14 | 
 15 |     def check(self, tokens: List[Token]) -> bool:
 16 |         
 17 |         types_match = self._check_types(tokens)
 18 |         values_match = self._check_values(tokens)
 19 |         not_values_match = self._check_not_values(tokens)
 20 | 
 21 |         if not types_match:
 22 |             return False
 23 |         
 24 |         if not values_match:
 25 |             return False 
 26 |         
 27 |         if not_values_match:
 28 |             return False
 29 | 
 30 |         return True
 31 | 
 32 |     
 33 | 
 34 |     def _check_types(self, tokens):
 35 |         if len(self.types) == 0:
 36 |             return True # should match any type
 37 |         
 38 |         for token in tokens:
 39 |             if token.type[0] in self.types:
 40 |                 return True
 41 |         return False
 42 | 
 43 | 
 44 |     def _check_values(self, tokens):
 45 |         if len(self.values) == 0:
 46 |             return True # should match any value
 47 |         
 48 |         for token in tokens:
 49 |             for pattern in self.values:
 50 |                 if re.match(pattern, token.content) is not None:
 51 |                     return True
 52 |         return False
 53 |     
 54 | 
 55 |     def _check_not_values(self, tokens):
 56 |         if len(self.not_values) == 0:
 57 |             return False
 58 |         
 59 |         for token in tokens:
 60 |             for pattern in self.not_values:
 61 |                 if re.match(pattern, token.content) is not None:
 62 |                     return True
 63 |         return False     
 64 |             
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 |     @validator('values', 'not_values', pre=True)
 71 |     def regexify_values(cls, values: Dict) -> List[re.Pattern]:
 72 |         if values is None:
 73 |             return values
 74 | 
 75 |         if not isinstance(values, list):
 76 |             raise Exception('value must be an array')
 77 | 
 78 |         patterns = []
 79 |         for val in values:
 80 |             if isinstance(val, re.Pattern):
 81 |                 patterns.append(val)
 82 |                 continue
 83 | 
 84 |             patterns.append(re.compile(re.escape(val), re.IGNORECASE))
 85 | 
 86 |         return patterns
 87 | 
 88 |     class Config:
 89 |         arbitrary_types_allowed = True
 90 | 
 91 | 
 92 | class VaribleDetector(BaseModel):
 93 |     language: Optional[Language] = None
 94 |     stream_pattern: re.Pattern
 95 |     match_rules: Dict[int, Match]
 96 |     match_semantics: Dict[int, str]
 97 |     creds_probability: int = 0
 98 | 
 99 |     class Config:
100 |         arbitrary_types_allowed = True
101 | 
102 |     def match(self, tokens: List[Token], token_stream: str) -> List['Variable']:
103 |         true_detections = []
104 | 
105 |         for match in re.finditer(self.stream_pattern, token_stream, overlapped=True):
106 |             if not self._verify(match, tokens):
107 |                 continue
108 | 
109 |             var = Variable()
110 |             for i, name in self.match_semantics.items():
111 |                 setattr(var, name, tokens[match.span(i)[0]])
112 |             var.found_by = self
113 |             var.span = [match.span(0)[0], match.span(0)[1]]
114 | 
115 |             true_detections.append(var)
116 | 
117 |         return true_detections
118 | 
119 |     def _verify(self, match: re.Match, tokens: List[Token]) -> bool:
120 |         for group_i, match_rule in self.match_rules.items():
121 |             span = match.span(group_i)
122 |             window = tokens[span[0] : span[1]]
123 | 
124 |             if not match_rule.check(window):
125 |                 return False
126 | 
127 |         return True
128 | 
129 | 
130 | class VaribleSuppressor(VaribleDetector):
131 | 
132 |     def match(self, tokens: List[Token], token_stream: str) -> List['Variable']:
133 |         detections = super().match(tokens, token_stream)
134 |         spans = []
135 |         for detection in detections:
136 |             spans.append(detection.span)
137 |         
138 |         return spans
139 |     
140 | 
141 | 
142 | from deepsecrets.core.model.semantic import Variable
143 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/semantic/var_detection/rules.py:
--------------------------------------------------------------------------------
  1 | import regex as re
  2 | from typing import List
  3 | 
  4 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language
  5 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.detector import Match, VaribleDetector, VaribleSuppressor
  6 | from pygments.token import Token as PygmentsToken
  7 | 
  8 | 
  9 | class VariableDetectionRules:
 10 |     rules = [
 11 |         VaribleDetector(
 12 |             language=Language.PYTHON,
 13 |             stream_pattern=re.compile('(n)(o|p)(?:\n?)(L)(?:\n|p|\?)'),  # noqa
 14 |             match_rules={2: Match(values=[
 15 |                 re.compile('^=$'),
 16 |                 re.compile('^:$')
 17 |             ])},
 18 |             match_semantics={1: 'name', 3: 'value'},
 19 |         ),
 20 |         VaribleDetector(
 21 |             language=Language.PYTHON,
 22 |             stream_pattern=re.compile('(L)(p)(L)(?:p|\n)'),
 23 |             match_rules={2: Match(values=[':'])},
 24 |             match_semantics={1: 'name', 3: 'value'},
 25 |         ),
 26 |         VaribleDetector(
 27 |             language=Language.PYTHON,
 28 |             stream_pattern=re.compile('(L)(p)(o)(L)'),
 29 |             match_rules={2: Match(values=[']']), 3: Match(values=['='])},
 30 |             match_semantics={1: 'name', 4: 'value'},
 31 |         ),
 32 |         # GOLANG
 33 |         VaribleDetector(
 34 |             language=Language.GOLANG,
 35 |             stream_pattern=re.compile('(n)(p)(L)(?:p|\n)?'),
 36 |             match_rules={2: Match(values=[':', '='])},
 37 |             match_semantics={1: 'name', 3: 'value'},
 38 |         ),
 39 |         VaribleDetector(
 40 |             language=Language.GOLANG,
 41 |             stream_pattern=re.compile('(n)(p)(L)(?:p|\n)?(L)(p)'),
 42 |             match_rules={
 43 |                 1: Match(values=['Setenv', 'Getenv']),
 44 |                 2: Match(values=['(']),
 45 |                 5: Match(values=[')']),
 46 |             },
 47 |             match_semantics={3: 'name', 4: 'value'},
 48 |         ),
 49 |         
 50 |        VaribleDetector(
 51 |             language=Language.GOLANG,
 52 |             stream_pattern=re.compile('(n)(?:p|n|u){0,3}?(o).*(n)(p)(L)'),
 53 |             match_rules={
 54 |                2: Match(values=[':=']),
 55 |                3: Match(not_values=['Getenv', 'Setenv', 'Format']),
 56 |            },
 57 |             match_semantics={1: 'name', 5: 'value'},
 58 |         ),
 59 | 
 60 |         VaribleDetector(
 61 |             language=Language.GOLANG,
 62 |             stream_pattern=re.compile('(n)(?:o|p){1,3}(\?|u)p(L)p'),  # noqa
 63 |             match_rules={2: Match(values=['byte', 'string'])},
 64 |             match_semantics={1: 'name', 3: 'value'},
 65 |         ),
 66 |         # PHP
 67 |         VaribleDetector(
 68 |             language=Language.PHP,
 69 |             stream_pattern=re.compile('(n|v|L)(o)(L)'),
 70 |             match_rules={2: Match(values=['=', '=>'])},
 71 |             match_semantics={1: 'name', 3: 'value'},
 72 |         ),
 73 |         VaribleDetector(
 74 |             language=Language.PHP,
 75 |             stream_pattern=re.compile('(L)(o)(n)(p)Lp(L)p'),
 76 |             match_rules={
 77 |                 2: Match(values=['=>']),
 78 |                 3: Match(values=['env']),
 79 |                 4: Match(values=['(']),
 80 |             },
 81 |             match_semantics={1: 'name', 5: 'value'},
 82 |         ),
 83 |         # CONFIGS AND FORMATS
 84 |         VaribleDetector(
 85 |             language=Language.TOML,
 86 |             stream_pattern=re.compile('(n)(o)(L)\n'),
 87 |             match_rules={2: Match(values=['='])},
 88 |             match_semantics={1: 'name', 3: 'value'},
 89 |         ),
 90 |         VaribleDetector(
 91 |             language=Language.YAML,
 92 |             stream_pattern=re.compile('(L)(p)(L)'),
 93 |             match_rules={2: Match(values=[':'])},
 94 |             match_semantics={1: 'name', 3: 'value'},
 95 |         ),
 96 |         VaribleDetector(
 97 |             language=Language.INI,
 98 |             stream_pattern=re.compile('(n)(o)(L)'),
 99 |             match_rules={2: Match(values=['='])},
100 |             match_semantics={1: 'name', 3: 'value'},
101 |         ),
102 |         VaribleDetector(
103 |             language=Language.PUPPET,
104 |             stream_pattern=re.compile('(v|n)(o)(L)'),
105 |             match_rules={2: Match(values=['=>', '='])},
106 |             match_semantics={1: 'name', 3: 'value'},
107 |         ),
108 |         VaribleDetector(
109 |             language=Language.ANY,
110 |             stream_pattern=re.compile('(v|n)(p|o)(L)'),
111 |             match_rules={
112 |                 2: Match(values=[
113 |                     re.compile('^:$'),
114 |                     re.compile('^=$'),
115 |                 ])},
116 |             match_semantics={1: 'name', 3: 'value'},
117 |         ),
118 |         VaribleDetector(
119 |             language=Language.SHELL,
120 |             stream_pattern=re.compile('(L)(L)(L)(L)'),
121 |             match_rules={
122 |                 1: Match(values=[re.compile('^curl$')]),
123 |                 2: Match(values=[re.compile('^-u$')]),
124 |                 4: Match(not_values=[re.compile('^\\$')]),
125 |             },
126 |             match_semantics={3: 'name', 4: 'value'},
127 |             creds_probability=9,
128 |         ),
129 | 
130 |         VaribleDetector(
131 |             language=Language.CSHARP,
132 |             stream_pattern=re.compile('(n).{0,6}(u|L)p(L)(p)'),
133 |             match_rules={
134 |                 1: Match(values=[re.compile('^KeyValuePair$')]),
135 |                 4: Match(not_values=[re.compile('^}$')]),
136 |             },
137 |             match_semantics={2: 'name', 3: 'value'},
138 |         ),
139 | 
140 |         VaribleDetector(
141 |             language=Language.CSHARP,
142 |             stream_pattern=re.compile('(p)(.)(p)(L)(p)'),
143 |             match_rules={
144 |                 1: Match(values=[re.compile('^{$')]),
145 |                 3: Match(values=[re.compile('^,$')]),
146 |                 5: Match(values=[re.compile('^}$')]),
147 |             },
148 |             match_semantics={2: 'name', 4: 'value'},
149 |         ),
150 | 
151 |         VaribleDetector(
152 |             language=Language.JAVA,
153 |             stream_pattern=re.compile('(n)(p)(.)(p)(L)'),
154 |             match_rules={
155 |                 1: Match(values=[re.compile('^put$')]),
156 |                 2: Match(values=[re.compile('^\\($')]),
157 |                 4: Match(values=[re.compile('^,$')]),
158 |             },
159 |             match_semantics={3: 'name', 5: 'value'},
160 |         ),
161 | 
162 |     ]
163 | 
164 |     @classmethod
165 |     def for_language(cls, language: Language) -> List[VaribleDetector]:
166 |         return list(filter(lambda x: x.language in [language, Language.ANY], cls.rules))
167 | 
168 | 
169 | class VariableSuppressionRules(VariableDetectionRules):
170 |     rules=[
171 |         VaribleSuppressor(
172 |             language=Language.JS,
173 |             stream_pattern=re.compile('(p)(n).+?(p)(u|L|\n)'),
174 |             match_rules={
175 |                 1: Match(values=[
176 |                     re.compile('^<$'),
177 |                     re.compile('^(}|{)$'),
178 |                 ]),
179 |                 2: Match(
180 |                     types=[
181 |                         PygmentsToken.Name.Tag,
182 |                         PygmentsToken.Name.Attribute
183 |                     ]
184 |                 ),
185 |                 3: Match(values=[
186 |                     re.compile('^>$'),
187 |                     re.compile('^(}|{)$'),
188 |                 ]),
189 |             },
190 |             match_semantics={}
191 |         ),
192 | 
193 |         
194 |         VaribleSuppressor(
195 |             language=Language.JS,
196 |             stream_pattern=re.compile('(n)(o)L.{0,4}(?:u|\n)(n)(o)(?:L|u)'),
197 |             match_rules={
198 |                 1: Match(values=[
199 |                     re.compile('^key$'),
200 |                 ]),
201 |                 2: Match(values=[
202 |                     re.compile('^:$'),
203 |                 ]),
204 |                 3: Match(values=[
205 |                     re.compile('^value$'),
206 |                 ]),
207 |                 4: Match(values=[
208 |                     re.compile('^:$'),
209 |                 ]),
210 |             },
211 |             match_semantics={}
212 |         ),
213 | 
214 | 
215 |         
216 | 
217 |         VaribleSuppressor(
218 |             language=Language.SWIFT,
219 |             stream_pattern=re.compile('(n)(p)(n)(p)L'),
220 |             match_rules={
221 |                 1: Match(values=[
222 |                     re.compile('^decode$'),
223 |                     re.compile('^decodeIfPresent$'),
224 |                     re.compile('^unbox$')
225 |                 ]),
226 |                 2: Match(values=[re.compile('^\($')]),
227 |                 3: Match(values=[re.compile('^(key|keyPath)$')]),
228 |                 4: Match(values=[re.compile('^:$')]),
229 |             },
230 |             match_semantics={}
231 |         ),
232 | 
233 | 
234 |         VaribleSuppressor(
235 |             language=Language.GOLANG,
236 |             stream_pattern=re.compile('(p)(n)(p)L(p)(n)(p).'),
237 |             match_rules={
238 |                 1: Match(values=[
239 |                     re.compile('^{$'),
240 |                 ]),
241 |                 2: Match(values=[
242 |                     re.compile('^key$', re.IGNORECASE),
243 |                 ]),
244 |                 3: Match(values=[
245 |                     re.compile('^:$'),
246 |                 ]),
247 |                 4: Match(values=[
248 |                     re.compile('^,$'),
249 |                 ]),
250 |                 5: Match(values=[
251 |                     re.compile('^value$', re.IGNORECASE),
252 |                 ]),
253 |                 6: Match(values=[
254 |                     re.compile('^:$'),
255 |                 ]),
256 |             },
257 |             match_semantics={}
258 |         )
259 | 
260 |         
261 |     ]


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/spot_improvements.py:
--------------------------------------------------------------------------------
 1 | import regex as re
 2 | from typing import Callable, List
 3 | 
 4 | from pygments.token import Token as PygmentsToken
 5 | 
 6 | from deepsecrets.core.model.token import Token
 7 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language
 8 | from deepsecrets.core.tokenizers.helpers.type_stream import token_to_typestream_item
 9 | 
10 | 
11 | class SpotImprovements:
12 |     language: Language
13 |     acc: dict[Language, List[Callable]]
14 | 
15 |     def __init__(self, lang: Language) -> None:
16 |         self.language = lang
17 |         self.acc = {Language.SHELL: [self._curl_argstring_breakdown]}
18 | 
19 |     def improve_token(self, so_far_tokens: List[Token], so_far_type_stream: str, current_token: Token) -> List[Token]:
20 |         tokens = []
21 |         for improvement in self.acc.get(self.language, []):
22 |             tokens.extend(improvement(so_far_tokens, so_far_type_stream, current_token))
23 | 
24 |         if len(tokens) == 0:
25 |             return [current_token]
26 | 
27 |         return tokens
28 | 
29 |     def _curl_argstring_breakdown(
30 |         self, so_far_tokens: List[Token], so_far_type_stream: str, current_token: Token
31 |     ) -> List[Token]:
32 |         projected_typestream = so_far_type_stream + token_to_typestream_item(current_token)
33 |         rule = {'pattern': re.compile('(L)(L)$'), 'checks': {1: re.compile('^-u$')}}
34 |         match: re.Match = rule['pattern'].search(projected_typestream)
35 |         if not match:
36 |             return [current_token]
37 | 
38 |         for group_i, pattern in rule['checks'].items():
39 |             span = match.span(group_i)
40 |             group_token: Token = so_far_tokens[span[0]]
41 |             if not pattern.search(group_token.content):
42 |                 return [current_token]
43 | 
44 |         new_parts = current_token.content.split(':')
45 |         if new_parts[0] == '' or new_parts[1] == '':
46 |             return [current_token]
47 | 
48 |         final = []
49 |         for part in new_parts:
50 |             t = Token(
51 |                 file=current_token.file,
52 |                 content=part,
53 |                 span=current_token.file.get_span_for_string(part, between=current_token.span),
54 |             )
55 |             t.set_type([PygmentsToken.Text])
56 |             final.append(t)
57 | 
58 |         return final
59 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/helpers/type_stream.py:
--------------------------------------------------------------------------------
 1 | from pygments.token import Token as PygmentsToken
 2 | 
 3 | from deepsecrets.core.model.token import Token
 4 | 
 5 | types_to_filter_before = [
 6 |     PygmentsToken.Text.Whitespace,
 7 |     PygmentsToken.Error,
 8 |     PygmentsToken.Keyword,
 9 |     PygmentsToken.Generic,
10 |     PygmentsToken.Literal.Date,
11 |     PygmentsToken.Literal.Number,
12 |     PygmentsToken.Literal.String.Char,
13 |     PygmentsToken.Literal.String.Delimiter,
14 |     PygmentsToken.Literal.String.Escape,
15 |     PygmentsToken.Literal.String.Affix,
16 |     PygmentsToken.Literal.String.Interpol,
17 |     PygmentsToken.Comment.Hashbang,
18 |     PygmentsToken.Name.Namespace,
19 |     PygmentsToken.Name.Builtin.Pseudo,
20 | ]
21 | 
22 | 
23 | types_to_filter_after = [
24 |     PygmentsToken.Punctuation,
25 |     PygmentsToken.Operator,
26 |     PygmentsToken.Name,
27 | ]
28 | 
29 | 
30 | acc = {
31 |     PygmentsToken.Operator: 'o',
32 |     PygmentsToken.Name: 'n',
33 |     PygmentsToken.Name.Variable: 'v',
34 |     PygmentsToken.Name.Variable.Global: 'v',
35 |     PygmentsToken.Name.Variable.Instance: 'v',
36 |     PygmentsToken.Name.Variable.Magic: 'v',
37 |     PygmentsToken.Name.Other: 'n',
38 |     PygmentsToken.Name.Tag: 'n',
39 |     PygmentsToken.Name.Constant: 'n',
40 |     PygmentsToken.Name.Attribute: 'n',
41 |     PygmentsToken.Keyword.Constant: 'k',
42 |     PygmentsToken.Punctuation: 'p',
43 |     PygmentsToken.Punctuation.Indicator: 'p',
44 |     PygmentsToken.Literal: 'L',
45 |     PygmentsToken.Literal.Scalar.Plain: 'L',
46 |     PygmentsToken.Literal.String: 'L',
47 |     PygmentsToken.String: 'L',
48 |     PygmentsToken.String.Single: 'L',
49 |     PygmentsToken.String.Double: 'L',
50 |     PygmentsToken.Text: 'L',
51 |     PygmentsToken.Literal.String.Backtick: 'p',  # technically it's a punc
52 | }
53 | 
54 | 
55 | def token_to_typestream_item(token: Token) -> str:
56 |     if token.content == '\n':
57 |         return '\n'
58 | 
59 |     if any(type in token.type for type in types_to_filter_before):  # type: ignore
60 |         return 'u'
61 | 
62 |     return acc.get(token.type[0], '?')  # type: ignore
63 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/itokenizer.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from collections import namedtuple
 3 | from typing import List, NamedTuple
 4 | 
 5 | from deepsecrets.core.model.file import File
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | 
 9 | class Tokenizer:
10 |     tokens: List[Token]
11 |     settings: NamedTuple
12 | 
13 |     def __init__(self, **kwargs) -> None:
14 |         self.tokens = []
15 |         Settings = namedtuple('Settings', kwargs.keys())  # type: ignore
16 |         self.settings = Settings._make(kwargs.values())  # type: ignore
17 | 
18 |     @abstractmethod
19 |     def tokenize(self, file: File) -> List[Token]:
20 |         pass
21 | 
22 |     def __hash__(self) -> int:  # pragma: nocover
23 |         return hash(type(self))
24 | 
25 |     def __repr__(self) -> str:  # pragma: no cover
26 |         return self.__class__.__name__
27 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/lexer.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Sequence, Set, Type, Union
  2 | 
  3 | from deepsecrets import logger
  4 | 
  5 | from ordered_set import OrderedSet
  6 | from pygments import highlight
  7 | from pygments.formatters import RawTokenFormatter
  8 | from pygments.lexers.special import Lexer, RawTokenLexer
  9 | from pygments.token import Token as PygmentsToken
 10 | 
 11 | from deepsecrets.core.model.file import File
 12 | from deepsecrets.core.model.semantic import Variable
 13 | from deepsecrets.core.model.token import Semantic, SemanticType, Token
 14 | from deepsecrets.core.tokenizers.helpers.semantic.language import Language
 15 | from deepsecrets.core.tokenizers.helpers.semantic.var_detection.rules import VariableDetectionRules, VariableSuppressionRules
 16 | from deepsecrets.core.tokenizers.helpers.spot_improvements import SpotImprovements
 17 | from deepsecrets.core.tokenizers.helpers.type_stream import (
 18 |     token_to_typestream_item,
 19 |     types_to_filter_after,
 20 |     types_to_filter_before,
 21 | )
 22 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer
 23 | from deepsecrets.core.utils.lexer_finder import LexerFinder
 24 | 
 25 | empty_tokens = ['\n', '\t', "'", "''", '"', '""']
 26 | 
 27 | 
 28 | class LexerTokenizer(Tokenizer):
 29 |     token_stream: str
 30 |     lexer: Lexer
 31 |     language: Language
 32 | 
 33 |     def _get_types_for_token(self, token: PygmentsToken) -> List[Type]:  # type: ignore
 34 |         types = []
 35 |         types.append(token)
 36 |         if token.parent is not None:
 37 |             if token.parent == PygmentsToken:
 38 |                 return types
 39 |             deep = self._get_types_for_token(token.parent)
 40 |             types.extend(deep)
 41 |         return types
 42 | 
 43 |     def sanitize(self, content: str) -> Union[str, bool]:
 44 |         quotes = ["'", "''", '"', '""']
 45 | 
 46 |         whitespace_cleaned = content.replace(' ', '')
 47 |         if 0 <= len(whitespace_cleaned) == 0:
 48 |             return False
 49 | 
 50 |         # some lexers (eq. TextLexer) leave \n
 51 |         # at the end of a Token
 52 |         if len(content) > 1 and content[-1] == '\n':
 53 |             content = content[:-1]
 54 | 
 55 |         if content[0] == content[-1]:
 56 |             if content[0] in quotes:
 57 |                 content = content[1:-1]
 58 | 
 59 |         if content in quotes:
 60 |             return False
 61 | 
 62 |         return content
 63 | 
 64 |     def _find_lexer_for_file(self, file: File):
 65 |         lexer = LexerFinder().find(file=file)
 66 |         if lexer is not None and lexer.name == 'Text only':
 67 |             return None
 68 |         return lexer
 69 | 
 70 | 
 71 |     def tokenize(self, file: File, post_filter=True) -> List[Token]:
 72 |         self.token_stream = ''
 73 |         # TODO: don't trust the extension, use 'file' utility ?
 74 | 
 75 |         self.lexer = self._find_lexer_for_file(file)
 76 |         if not self.lexer:
 77 |             return self.tokens
 78 | 
 79 |         try:
 80 |             self.language: Language = Language.from_text(self.lexer.filenames[0])
 81 |         except (ValueError, IndexError):
 82 |             self.language: Language = Language.from_text(file.extension)
 83 |         except Exception as e:
 84 |             logger.exception(e)
 85 | 
 86 |         result = highlight(file.content, self.lexer, RawTokenFormatter())
 87 |         raw_tokens = list(RawTokenLexer().get_tokens(result))
 88 |         token_improver = SpotImprovements(lang=self.language)
 89 | 
 90 |         current_position = 0
 91 | 
 92 |         for i, raw_token in enumerate(raw_tokens):
 93 |             content: str = raw_token[1]
 94 |             types: List[Type] = self._get_types_for_token(raw_token[0])
 95 |             start = current_position
 96 |             end = start + len(content)
 97 |             current_position = end
 98 | 
 99 |             try:
100 |                 content = self.sanitize(content)
101 |                 if not content:
102 |                     continue
103 | 
104 |                 span = file.get_span_for_string(content, between=[start - 1, end + 1])
105 |                 token = Token(file=file, content=content, span=span)
106 |                 token.set_type(types)
107 | 
108 |                 improved_tokens = token_improver.improve_token(self.tokens, self.token_stream, token)
109 | 
110 |                 self.tokens.extend(improved_tokens)
111 |                 self.add_to_token_stream(improved_tokens)
112 |             except Exception as e:
113 |                 str(e)
114 | 
115 |         tokens_to_be_excluded = []
116 |         if self.settings.deep_token_inspection is True:  # type: ignore
117 |             tokens_to_be_excluded = self.deep_analyze()
118 | 
119 |         return self.final_cleanup(self.tokens, tokens_to_be_excluded) if post_filter else list(self.tokens)
120 | 
121 |     def add_to_token_stream(self, tokens: List[Token]) -> None:
122 |         for token in tokens:
123 |             self.token_stream += token_to_typestream_item(token=token)
124 | 
125 |     def final_cleanup(self, tokens_all: Sequence[Token], tokens_to_be_excluded: Sequence[Token]) -> List[Token]:
126 |         if not isinstance(tokens_all, OrderedSet):
127 |             tokens_all = OrderedSet(tokens_all)
128 | 
129 |         tokens_all = tokens_all - tokens_to_be_excluded
130 |         final = []
131 |         for token in tokens_all:
132 |             if any(type in token.type for type in types_to_filter_before):  # type: ignore
133 |                 continue
134 | 
135 |             if any(type in token.type for type in types_to_filter_after):  # type: ignore
136 |                 continue
137 | 
138 |             if token.content.replace(' ', '') in empty_tokens:
139 |                 continue
140 | 
141 |             final.append(token)
142 | 
143 |         return final
144 | 
145 |     def deep_analyze(self) -> Set[Token]:
146 |         tokens_all = OrderedSet(self.tokens)
147 |         if self.language is None:
148 |             return tokens_all
149 | 
150 |         exclude_after = set()
151 | 
152 |         true_detections: List[Variable] = []
153 |         suppression_regions: List[List[int]] = []
154 |         
155 |         detection_rules = VariableDetectionRules.for_language(self.language)
156 |         suppression_rules = VariableSuppressionRules.for_language(self.language)
157 |     
158 |         for rule in detection_rules:
159 |             true_detections.extend(rule.match(self.tokens, self.token_stream))
160 |         
161 |         for rule in suppression_rules:
162 |             suppression_regions.extend(rule.match(self.tokens, self.token_stream))
163 | 
164 |         suppression_regions = self._collapse_suppression_regions(suppression_regions)
165 | 
166 |         for var in true_detections:
167 |             suppressed = self._if_suppressed(var, suppression_regions)
168 |             if suppressed:
169 |                 exclude_after.update([var.name, var.value])
170 |                 continue
171 | 
172 |             var.value.semantic = Semantic(
173 |                 type=SemanticType.VAR,
174 |                 name=var.name.content,
175 |                 creds_probability=var.found_by.creds_probability,
176 |             )
177 |             exclude_after.add(var.name)
178 | 
179 |         return exclude_after
180 |     
181 |     def _if_suppressed(self, var: Variable, regions):
182 |         for reg in regions:
183 |             if var.span[0] >= reg[0] and var.span[1] <= reg[1]:
184 |                 return True
185 |         return False
186 | 
187 | 
188 |     def get_variables(self, tokens: Optional[List[Token]] = None) -> List[Token]:
189 |         tokens = tokens if tokens is not None else self.tokens
190 |         vars = []
191 |         if len(tokens) == 0:
192 |             return []
193 | 
194 |         for token in tokens:
195 |             if token.semantic is None:
196 |                 continue
197 | 
198 |             if token.semantic.type != SemanticType.VAR:
199 |                 continue
200 | 
201 |             vars.append(token)
202 | 
203 |         return vars
204 | 
205 |     def print_token_type_stream(self) -> None:
206 |         print(self.token_stream)
207 | 
208 |     def _collapse_suppression_regions(self, suppression_regions):
209 |         regions = []
210 |         if len(suppression_regions) == 0:
211 |             return regions
212 |         
213 |         for i, reg in enumerate(suppression_regions):
214 |             if i == 0:
215 |                 regions.append(suppression_regions[0])
216 |                 continue
217 |             
218 |             if reg[0] == regions[-1][1]:
219 |                 regions[-1][1] = reg[1]
220 |             else:
221 |                 regions.append(reg)
222 |             
223 |         return regions
224 |             
225 | 
226 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/per_line.py:
--------------------------------------------------------------------------------
 1 | import regex as re
 2 | from typing import List
 3 | 
 4 | from deepsecrets.core.model.file import File
 5 | from deepsecrets.core.model.token import Token
 6 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer
 7 | 
 8 | separator = re.compile(r'\n')
 9 | 
10 | 
11 | class PerLineTokenizer(Tokenizer):
12 |     def tokenize(self, file: File) -> List[Token]:
13 |         separs = separator.finditer(file.content)
14 |         prev_end = 0
15 |         for sep in separs:
16 |             s, e = sep.span()
17 |             self.tokens.append(Token(file=file, content=file.content[prev_end:s], span=[prev_end, s]))
18 |             prev_end = e
19 | 
20 |         return self.tokens
21 | 


--------------------------------------------------------------------------------
/deepsecrets/core/tokenizers/per_word.py:
--------------------------------------------------------------------------------
 1 | import regex as re
 2 | from typing import List, Optional
 3 | 
 4 | from deepsecrets.core.model.file import File
 5 | from deepsecrets.core.model.token import Token
 6 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer
 7 | 
 8 | separator = re.compile(r'[ ,"\'\n:={}\[\]\+]+')
 9 | 
10 | 
11 | class PerWordTokenizer(Tokenizer):
12 |     def tokenize(self, file: File, content: Optional[str] = None) -> List[Token]:
13 |         cnt = content if content is not None else file.content
14 |         length = len(cnt)
15 |         separs = separator.finditer(cnt)
16 |         prev_end = 0
17 | 
18 |         for sep in separs:
19 |             s, e = sep.span()
20 |             if s == prev_end:
21 |                 prev_end = e
22 |                 continue
23 | 
24 |             token = Token(file=file, content=content[prev_end : e - 1], span=[prev_end, e - 1])
25 |             self.tokens.append(token)
26 |             prev_end = e
27 | 
28 |         if prev_end != length:
29 |             token = Token(file=file, content=content[prev_end:length], span=[prev_end, length])
30 |             self.tokens.append(token)
31 | 
32 |         return self.tokens
33 | 


--------------------------------------------------------------------------------
/deepsecrets/core/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/deepsecrets/core/utils/__init__.py


--------------------------------------------------------------------------------
/deepsecrets/core/utils/cpu.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import cpu_count
 2 | 
 3 | from deepsecrets.core.utils.fs import path_exists
 4 | 
 5 | QUOTA_FILE = '/sys/fs/cgroup/cpu/cpu.cfs_quota_us'
 6 | PERIOD_FILE = '/sys/fs/cgroup/cpu/cpu.cfs_period_us'
 7 | CGROUP_2_MAX = '/sys/fs/cgroup/cpu.max'
 8 | 
 9 | 
10 | class CpuHelper:
11 | 
12 |     def get_limit(self) -> int:
13 |         multiproc_limit = self._by_multiproc()
14 |         cgroup = self._by_cgroup()
15 | 
16 |         final = cgroup if cgroup != -1 else multiproc_limit
17 |         return final if final > 0 else 0
18 |     
19 |     def _by_multiproc(self):
20 |         return cpu_count()
21 |     
22 |     def _by_cgroup(self):
23 |         quota = 1
24 |         period = -1
25 | 
26 | 	    # cgroup 2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
27 |         if path_exists(CGROUP_2_MAX):
28 |             try:
29 |                 quota, period = self.__cgroup2()
30 |                 return quota // period
31 |             except Exception:
32 |                 pass
33 | 
34 |         # cgroup 1: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/index.html
35 |         if path_exists(QUOTA_FILE) and path_exists(PERIOD_FILE):
36 |             try:
37 |                 quota, period = self.__cgroup1()
38 |                 return quota // period
39 |             except Exception:
40 |                 pass
41 | 
42 |         return quota // period
43 |     
44 |     def __cgroup1(self):
45 |         quota = 1
46 |         period = -1
47 | 
48 |         with open(QUOTA_FILE) as f:
49 |             quota = int(f.read())
50 |         
51 |         with open(PERIOD_FILE) as f:
52 |             period = int(f.read())
53 | 
54 |         return quota, period
55 | 
56 |     
57 |     def __cgroup2(self):
58 |         quota = 1
59 |         period = -1
60 | 
61 |         with open(CGROUP_2_MAX) as f:
62 |             str_quota_period = f.read().split(' ')
63 |             quota = int(str_quota_period[0])
64 |             period = int(str_quota_period[1])
65 |         
66 |         return quota, period
67 | 
68 | 


--------------------------------------------------------------------------------
/deepsecrets/core/utils/exceptions.py:
--------------------------------------------------------------------------------
1 | class FileNotFoundException(Exception):
2 |     pass
3 | 
4 | 
5 | class TokenizationException(Exception):
6 |     pass
7 | 


--------------------------------------------------------------------------------
/deepsecrets/core/utils/file_analyzer.py:
--------------------------------------------------------------------------------
 1 | from multiprocessing import RLock
 2 | from multiprocessing.pool import Pool
 3 | from typing import Dict, List, Optional, Type
 4 | 
 5 | from pydantic import BaseModel
 6 | 
 7 | from deepsecrets import logger
 8 | from deepsecrets.core.engines.iengine import IEngine
 9 | from deepsecrets.core.model.file import File
10 | from deepsecrets.core.model.finding import Finding
11 | from deepsecrets.core.model.token import Token
12 | from deepsecrets.core.tokenizers.itokenizer import Tokenizer
13 | 
14 | 
15 | class EngineWithTokenizer(BaseModel):
16 |     engine: IEngine
17 |     tokenizer: Tokenizer
18 | 
19 |     class Config:
20 |         arbitrary_types_allowed = True
21 | 
22 | 
23 | class FileAnalyzer:
24 |     file: File
25 |     engine_tokenizers: List[EngineWithTokenizer]
26 |     tokens: Dict[Type, List[Token]]
27 |     pool_class: Type
28 | 
29 |     def __init__(self, file: File, pool_class: Optional[Type] = None):
30 |         if pool_class is not None:
31 |             self.pool_class = Pool
32 |         else:
33 |             self.pool_class = pool_class
34 | 
35 |         self.engine_tokenizers = []
36 |         self.file = file
37 |         self.tokens = {}
38 |         self.tokenizers_lock = RLock()
39 | 
40 |     def add_engine(self, engine: IEngine, tokenizers: List[Tokenizer]) -> None:
41 |         for tokenizer in tokenizers:
42 |             self.engine_tokenizers.append(EngineWithTokenizer(engine=engine, tokenizer=tokenizer))
43 | 
44 |     def process(self, threaded: bool = False) -> List[Finding]:
45 |         results: List[Finding] = []
46 | 
47 |         if threaded:  # pragma: nocover
48 |             with self.pool_class(2) as pool:
49 |                 engine_results = pool.imap(self._run_engine, self.engine_tokenizers)
50 |                 pool.close()
51 |                 pool.join()
52 | 
53 |             if engine_results is None:
54 |                 return results
55 | 
56 |             for er in engine_results:
57 |                 if not er:
58 |                     continue
59 |                 results.extend(er)
60 | 
61 |         else:
62 |             for et in self.engine_tokenizers:
63 |                 results.extend(self._run_engine(et))
64 | 
65 |         return results
66 | 
67 |     def _run_engine(self, et: EngineWithTokenizer) -> List[Finding]:
68 |         results: List[Finding] = []
69 |         processed_values: Dict[int, bool] = {}
70 | 
71 |         with self.tokenizers_lock:
72 |             if et.tokenizer not in self.tokens:
73 |                 self.tokens[et.tokenizer] = et.tokenizer.tokenize(self.file)
74 | 
75 |         tokens: List[Token] = self.tokens[et.tokenizer]
76 | 
77 |         for token in tokens:
78 |             is_known_content = processed_values.get(token.val_hash())
79 |             if is_known_content is not None and is_known_content is False:
80 |                 continue
81 | 
82 |             processed_values[token.val_hash()] = False
83 | 
84 |             try:
85 |                 findings: List[Finding] = et.engine.search(token)
86 |                 for finding in findings:
87 |                     finding.map_on_file(file=self.file, relative_start=token.span[0])
88 |                     results.append(finding)
89 |                     processed_values[token.val_hash()] = True
90 | 
91 |             except Exception as e:
92 |                 logger.exception('Unable to process token')
93 |                 continue
94 | 
95 |         return results
96 | 


--------------------------------------------------------------------------------
/deepsecrets/core/utils/fs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from deepsecrets import BASE_DIR, MODULE_NAME
 5 | 
 6 | 
 7 | def get_abspath(filepath: str) -> str:
 8 |     if filepath.startswith('/'):
 9 |         return filepath
10 | 
11 |     filepath = filepath.replace('./', '')
12 |     return os.path.join(BASE_DIR, filepath)
13 | 
14 | 
15 | def path_exists(filepath: str) -> bool:
16 |     abs_path = get_abspath(filepath)
17 |     return os.path.exists(abs_path)
18 | 
19 | 
20 | def get_path_inside_package(filepath: str) -> str:
21 |     pkg_root = sys.modules[MODULE_NAME].__path__[0]
22 |     return os.path.join(pkg_root, filepath)
23 | 


--------------------------------------------------------------------------------
/deepsecrets/core/utils/guess_filetype.py:
--------------------------------------------------------------------------------
 1 | from configparser import ConfigParser
 2 | import json
 3 | import tomllib
 4 | from typing import Optional
 5 | from puppetparser.parser import parse
 6 | 
 7 | 
 8 | 
 9 | class FileTypeGuesser:
10 | 
11 |     def __init__(self) -> None:
12 |         self.probes = {
13 |             'json': self._is_json,
14 |             'toml': self._is_toml,
15 |             'pp': self._is_puppet,
16 |             'conf': self._is_conf,
17 |         }
18 | 
19 |     def guess(self, content: str) -> Optional[str]:
20 |         for ext, probe in self.probes.items():
21 |             if probe(content):
22 |                 return ext
23 |         
24 |         # TODO: Guesslang
25 |         '''
26 |         ml_guesser = Guess()
27 |         guess = ml_guesser.language_name(content)
28 |         if not guess:
29 |             return None
30 | 
31 |         for ext, name in ml_guesser._extension_map.items():
32 |             if name == guess:
33 |                 return ext
34 |         '''
35 |         return None
36 |         
37 | 
38 |     
39 |     def _is_json(self, content: str):
40 |         try:
41 |             json.loads(content)
42 |         except Exception:
43 |             return False
44 |         
45 |         return True
46 | 
47 |     def _is_toml(self, content: str):
48 |         try:
49 |             tomllib.loads(content)
50 |         except Exception:
51 |             return False
52 |         
53 |         return True
54 |     
55 |     def _is_puppet(self, content: str):
56 |         try:
57 |             _, _ = parse(content)
58 |         except Exception:
59 |             return False
60 |         
61 |         return True
62 | 
63 |     def _is_conf(self, content):
64 |         try:
65 |             conf = ConfigParser().read_string(content)
66 |         except Exception as e:
67 |             return False
68 |         return True


--------------------------------------------------------------------------------
/deepsecrets/core/utils/hashing.py:
--------------------------------------------------------------------------------
 1 | from hashlib import sha1, sha256, sha512
 2 | 
 3 | from deepsecrets.core.model.rules.hashing import HashingAlgorithm
 4 | 
 5 | 
 6 | def c_sha1(payload: str) -> str:
 7 |     return sha1(payload.encode('UTF-8')).hexdigest()
 8 | 
 9 | 
10 | def c_sha256(payload: str) -> str:
11 |     return sha256(payload.encode('UTF-8')).hexdigest()
12 | 
13 | 
14 | def c_sha512(payload: str) -> str:
15 |     return sha512(payload.encode('UTF-8')).hexdigest()
16 | 
17 | 
18 | algorithm_to_method = {
19 |     HashingAlgorithm.SHA_512: c_sha512,
20 |     HashingAlgorithm.SHA_256: c_sha256,
21 |     HashingAlgorithm.SHA_1: c_sha1
22 | }
23 | 
24 | 
25 | def get_hash(payload: str, algorithm: HashingAlgorithm) -> str:
26 |     method = algorithm_to_method.get(algorithm)
27 |     if method is None:
28 |         raise Exception(f'Unable to calculate hash for algorithm {algorithm.name}')
29 | 
30 |     return method(payload)


--------------------------------------------------------------------------------
/deepsecrets/core/utils/lexer_finder.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional
  2 | 
  3 | from deepsecrets.core.model.file import File
  4 | from deepsecrets.core.utils.guess_filetype import FileTypeGuesser
  5 | from pygments.lexers import load_lexer_from_file, get_lexer_for_filename, get_lexer_by_name
  6 | from pygments.util import ClassNotFound
  7 | from jsx import lexer as lexer_mod
  8 | 
  9 | 
 10 | class LexerFinder:
 11 | 
 12 |     file: File
 13 |     extension: str
 14 |     distinguishing_feature: List[str]
 15 | 
 16 |     alias_exceptions: Dict
 17 |     probes: Dict
 18 | 
 19 |     def __init__(self) -> None:
 20 |         self._init_custom_lexers()
 21 |         self._init_alias_exceptions()
 22 |         self._init_probes()
 23 | 
 24 |     def _init_custom_lexers(self):
 25 |         load_lexer_from_file(lexer_mod.__file__, "JsxLexer")
 26 |     
 27 |     def _init_alias_exceptions(self):
 28 |         self.alias_exceptions = {
 29 |             'js+react': 'react'
 30 |         }
 31 |     
 32 |     def _init_probes(self):
 33 |         self.probes = {
 34 |             'js': [
 35 |                 _probe_react
 36 |             ]
 37 |         }
 38 | 
 39 |     def find(self, file: File):
 40 |         self.file = file
 41 |         self.extension = self._determine_extension()
 42 |         self.distinguishing_feature = self._determine_distinguishing_feature()
 43 |         
 44 |         filename = self._projected_filename()
 45 |         alias = self._projected_alias()
 46 |         lexer = None
 47 | 
 48 |         try:
 49 |             lexer = get_lexer_by_name(alias)
 50 |             return lexer
 51 |         except ClassNotFound as e:
 52 |             pass
 53 | 
 54 |         try:
 55 |             lexer = get_lexer_for_filename(filename, file.content)
 56 |             return lexer
 57 |         except ClassNotFound as e:
 58 |             pass
 59 | 
 60 |         return lexer
 61 |     
 62 |     def _determine_extension(self):
 63 |         if self.file.extension is None:
 64 |             return self._try_guess_extension()
 65 |         
 66 |         return self.file.extension
 67 |     
 68 |     def _try_guess_extension(self) -> Optional[str]:
 69 |         return FileTypeGuesser().guess(self.file.content)
 70 |     
 71 |     def _determine_distinguishing_feature(self):
 72 |         applicable_strategies = self.probes.get(self.extension, [])
 73 |         for strategy in applicable_strategies:
 74 |             f = strategy(self.file)
 75 |             if f is None:
 76 |                 continue
 77 |             return f
 78 |         
 79 |     def _projected_alias(self):
 80 |         alias = self.extension
 81 |         if self.distinguishing_feature is not None:
 82 |             alias += f'+{self.distinguishing_feature}'
 83 |         
 84 |         return self.alias_exceptions.get(alias, alias)
 85 |     
 86 |     def _projected_filename(self):
 87 |         filename = self.file.name
 88 |         if self.extension is not None:
 89 |             filename += f'.{self.extension}'
 90 |         
 91 |         return filename
 92 |         
 93 | 
 94 | def _probe_react(file: File):
 95 |     # very simple approach at the moment
 96 |     evidences = [
 97 |         'import React',
 98 |         'ReactDOM',
 99 |     ]
100 |     if any(evidence in file.content for evidence in evidences):
101 |         return 'react'
102 |     return None


--------------------------------------------------------------------------------
/deepsecrets/rules/excluded_paths.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "name": "Path excluded",
  4 |         "pattern": "\\.git"
  5 |     },
  6 |     {
  7 |         "name": "Path excluded",
  8 |         "pattern": ".*package-lock\\.json.*"
  9 |     },
 10 |     {
 11 |         "name": "Path excluded",
 12 |         "pattern": ".*-requirements.txt"
 13 |     },
 14 |     {
 15 |         "name": "Path excluded",
 16 |         "pattern": ".*Pipfile\\.lock$"
 17 |     },
 18 |     {
 19 |         "name": "Path excluded",
 20 |         "pattern": ".*package.json.*"
 21 |     },
 22 |     {
 23 |         "name": "Images",
 24 |         "pattern": ".*\\.(jpg|png|bmp|gif|tiff)$"
 25 |     },
 26 |     {
 27 |         "name": "Executives",
 28 |         "pattern": ".*\\.(exe|dll)$"
 29 |     },
 30 |     {
 31 |         "name": "Path excluded",
 32 |         "pattern": "vendor/"
 33 |     },
 34 |     {
 35 |         "name": "Path excluded",
 36 |         "pattern": ".*Gopkg\\.lock.*"
 37 |     },
 38 |     {
 39 |         "name": "Path excluded",
 40 |         "pattern": "venv/"
 41 |     },
 42 |     {
 43 |         "name": "Path excluded",
 44 |         "pattern": "requirements.txt$"
 45 |     },
 46 |     {
 47 |         "name": "Path excluded",
 48 |         "pattern": ".*Gopkg\\.lck$"
 49 |     },
 50 |     {
 51 |         "name": "Path excluded",
 52 |         "pattern": ".*Podfile\\.lock$"
 53 |     },
 54 |     {
 55 |         "name": "Path excluded",
 56 |         "pattern": "\\.gitignore"
 57 |     },
 58 |     {
 59 |         "name": "Path excluded",
 60 |         "pattern": ".*xcodeproj.*"
 61 |     },
 62 |     {
 63 |         "name": "Path excluded",
 64 |         "pattern": ".*__snapshots__.*"
 65 |     },
 66 |     {
 67 |         "name": "Path excluded",
 68 |         "pattern": "internal/generated/"
 69 |     },
 70 |     {
 71 |         "name": "Path excluded",
 72 |         "pattern": "npm-shrinkwrap.json"
 73 |     },
 74 |     {
 75 |         "name": "Path excluded",
 76 |         "pattern": ".*composer.json.*"
 77 |     },
 78 |     {
 79 |         "name": "Path excluded",
 80 |         "pattern": ".*brief"
 81 |     },
 82 |     {
 83 |         "name": "Path excluded",
 84 |         "pattern": ".*Godeps\\.json$"
 85 |     },
 86 |     {
 87 |         "name": "Path excluded",
 88 |         "pattern": ".*composer\\.lock.*"
 89 |     },
 90 |     {
 91 |         "name": "Path excluded",
 92 |         "pattern": "src/Generated/"
 93 |     },
 94 |     {
 95 |         "name": "Path excluded",
 96 |         "pattern": ".*yarn\\.lock$"
 97 |     },
 98 |     {
 99 |         "name": "Path excluded",
100 |         "pattern": ".*node_modules\\/.*"
101 |     },
102 |     {
103 |         "name": "Path excluded",
104 |         "pattern": ".*symfony\\.lock$"
105 |     },
106 |     {
107 |         "name": "Path excluded",
108 |         "pattern": "Gopkg.toml"
109 |     },
110 |     {
111 |         "name": "Path excluded",
112 |         "pattern": "lib/generated/"
113 |     },
114 |     {
115 |         "name": "Path excluded",
116 |         "pattern": ".*/vendor\\/.*"
117 |     },
118 |     {
119 |         "name": "Path excluded",
120 |         "pattern": ".*pbxproj$"
121 |     },
122 |     {
123 |         "name": "Path excluded",
124 |         "pattern": ".*go\\.sum$"
125 |     },
126 |     {
127 |         "name": "Postman collection files",
128 |         "pattern": ".*postman_collection\\.json$"
129 |     }
130 | ]


--------------------------------------------------------------------------------
/deepsecrets/rules/regexes.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "id": "S0",
  4 |         "name": "Slack Token",
  5 |         "confidence": 9,
  6 |         "pattern": "xox(?:a|b|p|o|s|r)-(?:\\d+-)+[a-z0-9]+"
  7 |     },
  8 |     {
  9 |         "id": "S1",
 10 |         "name": "RSA private key",
 11 |         "confidence": 9,
 12 |         "pattern": "-----BEGIN RSA PRIVATE KEY-----[\\S\\s]{15,}?-----END RSA PRIVATE KEY-----"
 13 |     },
 14 |     {
 15 |         "id": "S2",
 16 |         "name": "SSH (OPENSSH) private key",
 17 |         "confidence": 9,
 18 |         "pattern": "-----BEGIN OPENSSH PRIVATE KEY-----[\\S\\s]{15,}?-----END OPENSSH PRIVATE KEY-----"
 19 |     },
 20 |     {
 21 |         "id": "S3",
 22 |         "name": "SSH (DSA) private key",
 23 |         "confidence": 9,
 24 |         "pattern": "-----BEGIN DSA PRIVATE KEY-----[\\S\\s]{15,}?-----END DSA PRIVATE KEY-----"
 25 |     },
 26 |     {
 27 |         "id": "S4",
 28 |         "name": "SSH (EC) private key",
 29 |         "confidence": 9,
 30 |         "pattern": "-----BEGIN EC PRIVATE KEY-----[\\S\\s]{15,}?-----END EC PRIVATE KEY-----"
 31 |     },
 32 |     {
 33 |         "id": "S5",
 34 |         "name": "PGP private key block",
 35 |         "confidence": 9,
 36 |         "pattern": "-----BEGIN PGP PRIVATE KEY BLOCK-----"
 37 |     },
 38 |     {
 39 |         "id": "S7",
 40 |         "name": "Facebook Oauth",
 41 |         "confidence": 9,
 42 |         "pattern": "facebook.*['|\"][0-9a-f]{32}['|\"]"
 43 |     },
 44 |     {
 45 |         "id": "S8",
 46 |         "name": "Twitter Oauth",
 47 |         "confidence": 9,
 48 |         "pattern": "twitter.*['|\"][0-9a-zA-Z]{35,44}['|\"]"
 49 |     },
 50 |     {
 51 |         "id": "S10",
 52 |         "name": "Google Oauth",
 53 |         "confidence": 9,
 54 |         "pattern": "(\"client_secret\":\"[a-zA-Z0-9-_]{24}\")"
 55 |     },
 56 |     {
 57 |         "id": "S12",
 58 |         "name": "Heroku API Key",
 59 |         "confidence": 9,
 60 |         "pattern": "heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}"
 61 |     },
 62 |     {
 63 |         "id": "S17",
 64 |         "name": "Slack Webhook",
 65 |         "confidence": 9,
 66 |         "pattern": "https://hooks.slack.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+"
 67 |     },
 68 |     {
 69 |         "id": "S18",
 70 |         "name": "Google (GCP) Service-account",
 71 |         "confidence": 9,
 72 |         "pattern": "\"type\": \"service_account\""
 73 |     },
 74 |     {
 75 |         "id": "S19",
 76 |         "name": "Password in URL",
 77 |         "confidence": 9,
 78 |         "pattern": "://([^.\"]+):([^.\"]+)@\\S+",
 79 |         "target_group": 2,
 80 |         "match_rules": {
 81 |             "2": {
 82 |                 "pattern": "^[^\\$|{|%|<].+[^\\$|}|%|>]$"
 83 |             }
 84 |         }
 85 |     },
 86 |     {
 87 |         "id": "S20",
 88 |         "name": "BAuth",
 89 |         "confidence": 9,
 90 |         "pattern": "Basic @[a-zA-Z0-9+/]+={0,2}"
 91 |     },
 92 |     {
 93 |         "id": "S22",
 94 |         "name": "Tableau PATS",
 95 |         "confidence": 0,
 96 |         "pattern": "\\bpersonalAccessTokenSecret\\b"
 97 |     },
 98 |     {
 99 |         "id": "S23",
100 |         "name": "Tableau PAT",
101 |         "confidence": 0,
102 |         "pattern": "\\bpersonalAccessToken\\b"
103 |     },
104 |     {
105 |         "id": "S25",
106 |         "name": "Slack App Token",
107 |         "confidence": 9,
108 |         "pattern": "xapp-[0-9]+-[A-Za-z0-9_]+-[0-9]+-[a-f0-9]+"
109 |     },
110 |     {
111 |         "id": "S26",
112 |         "name": "Custom private key",
113 |         "confidence": 9,
114 |         "pattern": "-----BEGIN PRIVATE KEY-----[\\S\\s]{15,}?-----END PRIVATE KEY-----"
115 |     },
116 |     {
117 |         "id": "S28",
118 |         "name": "Suspicious password declaration",
119 |         "pattern": "\\b(pass|password|pwd|passwd)\\b(\\W+)([A-Za-z0-9()$]*)\\b",
120 |         "confidence": 0,
121 |         "match_rules": {
122 |             "2": {
123 |                 "pattern": "^\\s*(?:'|:|=)*\\s*$"
124 |             }
125 |         },
126 |         "target_group": 3,
127 |         "entropy_settings": 3.72
128 |     },
129 |     {
130 |         "id": "S29",
131 |         "name": "Ansible vault",
132 |         "confidence": 9,
133 |         "pattern": "\\$ANSIBLE_VAULT;[0-9]\\.[0-9];AES256"
134 |     },
135 |     {
136 |         "id": "S30",
137 |         "name": "AWS MWS",
138 |         "confidence": 9,
139 |         "applicable_file_patterns": [
140 |             ".*.txt$"
141 |         ],
142 |         "pattern": "amzn\\.mws\\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
143 |     }
144 |     
145 | ]


--------------------------------------------------------------------------------
/deepsecrets/scan_modes/cli.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from typing import Any, Dict, List, Type
 4 | 
 5 | from dotwiz import DotWiz
 6 | 
 7 | from deepsecrets import PROFILER_ON, logger
 8 | from deepsecrets.core.engines.hashed_secret import HashedSecretEngine
 9 | from deepsecrets.core.engines.regex import RegexEngine
10 | from deepsecrets.core.engines.semantic import SemanticEngine
11 | from deepsecrets.core.model.file import File
12 | from deepsecrets.core.model.finding import Finding
13 | from deepsecrets.core.modes.iscan_mode import ScanMode
14 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder
15 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder
16 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer
17 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
18 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer
19 | 
20 | 
21 | class CliScanMode(ScanMode):
22 |     engines_enabled: Dict[Type, bool] = {}
23 |     rulesets: Dict[str, List] = {}
24 | 
25 |     def prepare_for_scan(self) -> None:
26 |         logger.info(f'Found {len(self.filepaths)} applicable files for the scan')
27 |         if len(self.filepaths) == 0:
28 |             return
29 | 
30 |         for engine in self.config.engines:
31 |             self.engines_enabled[engine.name] = True
32 | 
33 |         for ruleset_builder, paths in self.config.rulesets.items():
34 |             builder = ruleset_builder()
35 |             for path in paths:
36 |                 builder.with_rules_from_file(os.path.abspath(path))
37 |             self.rulesets[builder.ruleset_name] = builder.rules
38 | 
39 | 
40 |     def analyzer_bundle(self) -> DotWiz:
41 |         bundle = super().analyzer_bundle()
42 |         bundle.update(
43 |             workdir=self.config.workdir_path,
44 |             engines=self.engines_enabled,
45 |             rulesets=self.rulesets,
46 |         )
47 |         return bundle
48 | 
49 | 
50 |     @staticmethod
51 |     def _per_file_analyzer(bundle, file: Any) -> List[Finding]:
52 |         if logger.level == logging.DEBUG:
53 |             logger.debug(f'Starting analysis for {file}')
54 | 
55 |         results: List[Finding] = []
56 | 
57 |         if not isinstance(file, str):
58 |             raise Exception('Filepath as str expected')
59 | 
60 |         file = File(path=file, relative_path=file.replace(f'{bundle.workdir}/', ''))
61 |         if file.length == 0:
62 |             return results
63 | 
64 |         file_analyzer = FileAnalyzer(file)
65 |         fct = FullContentTokenizer()
66 |         lex = LexerTokenizer(deep_token_inspection=True)
67 | 
68 |         regex_engine = RegexEngine(
69 |             ruleset=bundle.rulesets.get(RegexRulesetBuilder.ruleset_name, []),
70 |         )
71 | 
72 |         for eng, enabled in bundle.engines.items():
73 |             if not enabled:
74 |                 continue
75 | 
76 |             if eng == RegexEngine.name:
77 |                 file_analyzer.add_engine(regex_engine, [fct])
78 | 
79 |             if eng == HashedSecretEngine.name:
80 |                 hashed_secret_engine = HashedSecretEngine(
81 |                     ruleset=bundle.ruleset.get(HashedSecretsRulesetBuilder.ruleset_name, [])
82 |                 )
83 |                 file_analyzer.add_engine(hashed_secret_engine, [lex])
84 | 
85 |             if eng == SemanticEngine.name:
86 |                 semantic_engine = SemanticEngine(regex_engine)
87 |                 file_analyzer.add_engine(semantic_engine, [lex])
88 | 
89 |         try:
90 |             results = file_analyzer.process(threaded=False)
91 |         except Exception as e:
92 |             logger.exception(e)
93 | 
94 |         if PROFILER_ON:
95 |             pass
96 | 
97 |         return results
98 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "deepsecrets"
 3 | version = "1.1.3"
 4 | description = "A better tool for secrets search"
 5 | license = "MIT"
 6 | authors = [
 7 |   "Nikolai Khechumov <khechumov@gmail.com>",
 8 | ]
 9 | keywords = ["security", "secrets", "credentials", "scanning", "appsec"]
10 | packages = [{include = "deepsecrets"}]
11 | 
12 | 
13 | readme = "README.md"
14 | classifiers = [
15 |     "Programming Language :: Python :: 3",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Operating System :: OS Independent",
18 |     "Environment :: Console",
19 |     "Topic :: Security"
20 |     
21 | ]
22 | 
23 | [tool.poetry.urls]
24 | "Homepage" = "https://github.com/avito-tech/deepsecrets"
25 | "Bug Tracker" = "https://github.com/avito-tech/deepsecrets/issues"
26 | 
27 | [tool.poetry.scripts]
28 | deepsecrets = "deepsecrets:__main__"
29 | 
30 | 
31 | [tool.poetry.dependencies]
32 | python = ">=3.9,<3.12"
33 | pydantic = "^1.10.4"
34 | pyyaml = "^6.0.0"
35 | pygments = "^2.14.0"
36 | ordered-set = "^4.1.0"
37 | dotwiz = "^0.4.0"
38 | mmh3 = "^3.0.0"
39 | regex = "^2023.3.23"
40 | jsx-lexer = "^2.0.1"
41 | aenum = "^3.1.15"
42 | puppetparser = "^0.2.0"
43 | 
44 | 
45 | [tool.poetry.group.test.dependencies]
46 | pytest = "^7.2.1"
47 | coverage = "^7.2.0"
48 | pytest-cov = "^4.0.0"
49 | 
50 | [tool.poetry.group.dev.dependencies]
51 | black = "^23.1.0"
52 | 
53 | 
54 | [build-system]
55 | requires = ["poetry-core"]
56 | build-backend = "poetry.core.masonry.api"
57 | 
58 | [tool.mypy]
59 | packages=["deepsecrets"]
60 | disallow_untyped_defs = true
61 | disallow_any_unimported = true
62 | no_implicit_optional = true
63 | check_untyped_defs = true
64 | warn_return_any = true
65 | show_error_codes = true
66 | 
67 | exclude = [
68 |   '^tests/*',  # TOML literal string (single-quotes, no escaping necessary)
69 | ]
70 | plugins = [
71 |   "pydantic.mypy"
72 | ]
73 | 
74 | [tool.pytest.ini_options]
75 | pythonpath = [
76 |   "."
77 | ]
78 | 
79 | [tool.black]
80 | line-length = 120
81 | skip-string-normalization = 1
82 | 
83 | [tool.coverage]
84 | run.omit = ["deepsecrets/__main__.py"]
85 | 
86 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [pycodestyle]
2 | show-source = 1
3 | max-line-length = 120
4 | ignore = E402, W605
5 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/cli/test_cli.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.cli import DeepSecretsCliTool
 4 | 
 5 | 
 6 | @pytest.fixture(scope='module')
 7 | def args_1():
 8 |     return [
 9 |         '',
10 |         '--target-dir',
11 |         '/app/tests/fixtures/',
12 |         '--false-findings',
13 |         '/app/tests/fixtures/false_findings.json',
14 |         '--outfile',
15 |         './fdsafad.json',
16 |         '--verbose',
17 |     ]
18 | 
19 | @pytest.fixture(scope='module')
20 | def args_2():
21 |     return [
22 |         '',
23 |         '--target-dir',
24 |         '/app/tests/fixtures/',
25 |         '--false-findings',
26 |         '/app/tests/fixtures/false_findings.json',
27 |         '--excluded-paths',
28 |         'built-in',
29 |         '/app/tests/fixtures/false_findings.json',
30 |         '--outfile',
31 |         './fdsafad.json',
32 |     ]
33 | 
34 | 
35 | def test_1_cli(args_1):
36 |     tool = DeepSecretsCliTool(args=args_1)
37 |     tool.parse_arguments()
38 | 
39 |     assert tool.config is not None
40 |     assert len(tool.config.rulesets) == 2
41 |     assert len(tool.config.engines) == 2
42 |     assert len(tool.config.global_exclusion_paths) == 1
43 | 
44 |     assert tool.config.output.path == './fdsafad.json'
45 |     assert tool.config.workdir_path == '/app/tests/fixtures/'
46 | 
47 | 
48 | def test_2_cli(args_2):
49 |     tool = DeepSecretsCliTool(args=args_2)
50 |     tool.parse_arguments()
51 | 
52 |     assert tool.config is not None
53 |     assert len(tool.config.global_exclusion_paths) == 2


--------------------------------------------------------------------------------
/tests/config/test_config.py:
--------------------------------------------------------------------------------
 1 | from deepsecrets.config import Config, Output
 2 | from deepsecrets.core.engines.regex import RegexEngine
 3 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder
 4 | from deepsecrets.core.utils.exceptions import FileNotFoundException
 5 | 
 6 | 
 7 | def test_config():
 8 |     config = Config()
 9 |     config.set_workdir('tests')
10 |     config.engines.append(RegexEngine)
11 |     config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/1.conf'])
12 |     config.output = Output(type='json', path='tests/1.json')
13 |     config.set_global_exclusion_paths(['tests/fixtures/1.conf'])
14 | 
15 |     exception = None
16 |     try:
17 |         config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/0.conf'])
18 |     except FileNotFoundException as e:
19 |         exception = e
20 | 
21 |     assert exception is not None
22 | 
23 |     assert config.workdir_path == '/app/tests'
24 |     assert len(config.engines) == 1
25 |     assert len(config.rulesets) == 1
26 |     assert config.rulesets[RegexRulesetBuilder] == ['/app/tests/fixtures/1.conf']


--------------------------------------------------------------------------------
/tests/core/engines/hashed_secret/test_hs.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pytest
 4 | 
 5 | from deepsecrets.core.engines.hashed_secret import HashedSecretEngine
 6 | from deepsecrets.core.engines.regex import RegexEngine
 7 | from deepsecrets.core.model.file import File
 8 | from deepsecrets.core.model.finding import Finding
 9 | from deepsecrets.core.model.rules.hashed_secret import HashedSecretRule
10 | from deepsecrets.core.model.token import Token
11 | from deepsecrets.core.rulesets.hashed_secrets import HashedSecretsRulesetBuilder
12 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
13 | 
14 | 
15 | @pytest.fixture(scope='module')
16 | def file():
17 |     path = 'tests/fixtures/1.py'
18 |     return File(path=path, relative_path=path)
19 | 
20 | 
21 | @pytest.fixture(scope='module')
22 | def engine():
23 |     builder = HashedSecretsRulesetBuilder()
24 |     builder.with_rules_from_file('tests/fixtures/hashed_secrets.json')
25 |     return HashedSecretEngine(ruleset=builder.rules)
26 | 
27 | 
28 | def test_1(file: File, engine: RegexEngine):
29 |     findings: List[Finding] = []
30 |     tokens: List[Token] = LexerTokenizer(deep_token_inspection=True).tokenize(file)
31 |     for token in tokens:
32 |         findings.extend(engine.search(token))
33 | 
34 |     assert len(findings) == 1
35 |     assert isinstance(findings[0].rules[0], HashedSecretRule)
36 |     assert findings[0].rules[0].hashed_val == '8c535f99d6d0fa55b64af0fae6e3b6829eda413b'
37 | 
38 | 
39 | def test_2(engine: HashedSecretEngine):
40 |     rules = engine.ruleset
41 | 
42 |     assert rules[0] == rules[0]
43 |     assert rules[1] == rules[1]
44 |     assert rules[1] != rules[0]
45 | 


--------------------------------------------------------------------------------
/tests/core/engines/regex/test_regex.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import pytest
  4 | 
  5 | from deepsecrets.core.engines.regex import RegexEngine
  6 | from deepsecrets.core.model.file import File
  7 | from deepsecrets.core.model.finding import Finding, FindingMerger, FindingResponse
  8 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder
  9 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer
 10 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 11 | from deepsecrets.core.utils.fs import get_path_inside_package
 12 | 
 13 | 
 14 | @pytest.fixture(scope='module')
 15 | def file():
 16 |     path = 'tests/fixtures/regex_checks.txt'
 17 |     return File(path=path, relative_path=path)
 18 | 
 19 | @pytest.fixture(scope='module')
 20 | def file_extless():
 21 |     path = 'tests/fixtures/extless/radius'
 22 |     return File(path=path, relative_path=path)
 23 | 
 24 | @pytest.fixture(scope='module')
 25 | def file_go_7():
 26 |     path = 'tests/fixtures/7.go'
 27 |     return File(path=path, relative_path=path)
 28 | 
 29 | 
 30 | @pytest.fixture(scope='module')
 31 | def regex_engine():
 32 |     builder = RegexRulesetBuilder()
 33 |     builder.with_rules_from_file(get_path_inside_package('rules/regexes.json'))
 34 |     return RegexEngine(ruleset=builder.rules)
 35 | 
 36 | 
 37 | def test_1(file: File, regex_engine: RegexEngine):
 38 |     findings: List[Finding] = []
 39 |     tokens = FullContentTokenizer().tokenize(file)
 40 |     for token in tokens:
 41 |         token_findings = regex_engine.search(token)
 42 |         for finding in token_findings:
 43 |             finding.map_on_file(file=file, relative_start=token.span[0])
 44 |             findings.append(finding)
 45 | 
 46 |     for finding in findings:
 47 |         finding.map_on_file(file=file, relative_start=finding.start_pos)
 48 |         finding.choose_final_rule()
 49 | 
 50 |     assert len(findings) == 9
 51 |     assert findings[0].rules[0].id == 'S0'
 52 |     assert findings[1].rules[0].id == 'S0'
 53 |     assert findings[2].rules[0].id == 'S1'
 54 |     assert findings[3].rules[0].id == 'S2'
 55 |     assert findings[4].rules[0].id == 'S3'
 56 |     assert findings[5].rules[0].id == 'S4'
 57 |     assert findings[6].rules[0].id == 'S5'
 58 |     assert findings[7].rules[0].id == 'S19'
 59 |     assert findings[8].rules[0].id == 'S19'
 60 | 
 61 |     findings = FindingMerger(findings).merge()
 62 |     assert len(findings) == 9
 63 | 
 64 |     response = FindingResponse.from_list(findings)
 65 | 
 66 | 
 67 | def test_extless(file_extless: File, regex_engine: RegexEngine):
 68 |     findings: List[Finding] = []
 69 |     tokens = FullContentTokenizer().tokenize(file_extless)
 70 |     tokens_lex = LexerTokenizer(deep_token_inspection=True).tokenize(file_extless)
 71 | 
 72 |     for token in tokens:
 73 |         token_findings = regex_engine.search(token)
 74 |         for finding in token_findings:
 75 |             finding.map_on_file(file=file_extless, relative_start=token.span[0])
 76 |             findings.append(finding)
 77 | 
 78 |     for finding in findings:
 79 |         finding.map_on_file(file=file_extless, relative_start=finding.start_pos)
 80 |         finding.choose_final_rule()
 81 | 
 82 |     assert len(findings) == 1
 83 |     assert findings[0].rules[0].id == 'S28'
 84 | 
 85 | 
 86 | 
 87 | def test_go_7(file_go_7: File, regex_engine: RegexEngine):
 88 |     findings: List[Finding] = []
 89 |     tokens = FullContentTokenizer().tokenize(file_go_7)
 90 | 
 91 |     for token in tokens:
 92 |         token_findings = regex_engine.search(token)
 93 |         for finding in token_findings:
 94 |             finding.map_on_file(file=file_go_7, relative_start=token.span[0])
 95 |             findings.append(finding)
 96 | 
 97 |     for finding in findings:
 98 |         finding.map_on_file(file=file_go_7, relative_start=finding.start_pos)
 99 |         finding.choose_final_rule()
100 | 
101 |     assert len(findings) == 0
102 | 


--------------------------------------------------------------------------------
/tests/core/engines/semantic/test_semantic.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | import pytest
  3 | 
  4 | from deepsecrets.core.engines.semantic import SemanticEngine
  5 | from deepsecrets.core.model.file import File
  6 | from deepsecrets.core.model.finding import Finding, FindingMerger
  7 | from deepsecrets.core.model.token import SemanticType
  8 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
  9 | 
 10 | 
 11 | @pytest.fixture(scope='module')
 12 | def file() -> File:
 13 |     path = 'tests/fixtures/4.py'
 14 |     return File(path=path, relative_path=path)
 15 | 
 16 | @pytest.fixture(scope='module')
 17 | def file_json_2() -> File:
 18 |     path = 'tests/fixtures/2.json'
 19 |     return File(path=path, relative_path=path)
 20 | 
 21 | @pytest.fixture(scope='module')
 22 | def file_toml_1() -> File:
 23 |     path = 'tests/fixtures/1.toml'
 24 |     return File(path=path, relative_path=path)
 25 | 
 26 | @pytest.fixture(scope='module')
 27 | def file_toml_2() -> File:
 28 |     path = 'tests/fixtures/2.toml'
 29 |     return File(path=path, relative_path=path)
 30 | 
 31 | @pytest.fixture(scope='module')
 32 | def file_sh_2() -> File:
 33 |     path = 'tests/fixtures/2.sh'
 34 |     return File(path=path, relative_path=path)
 35 | 
 36 | @pytest.fixture(scope='module')
 37 | def file_html_1() -> File:
 38 |     path = 'tests/fixtures/1.html'
 39 |     return File(path=path, relative_path=path)
 40 | 
 41 | 
 42 | def test_1_semantic_engine(file: File):
 43 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file)
 44 |     assert len(tokens) == 13
 45 | 
 46 |     assert tokens[3].semantic.type == SemanticType.VAR
 47 |     assert tokens[3].semantic.name == 'pass'
 48 | 
 49 |     engine = SemanticEngine(subengine=None)
 50 |     findings = engine.search(tokens[3])
 51 |     assert len(findings) == 1
 52 |     assert findings[0].rules[0].name == 'Var naming'
 53 | 
 54 | 
 55 | def test_2_semantic_engine(file_json_2: File):
 56 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_json_2)
 57 |     assert len(tokens) == 6
 58 | 
 59 |     assert tokens[0].semantic.type == SemanticType.VAR
 60 |     assert tokens[0].semantic.name == 'access_Token'
 61 | 
 62 |     assert tokens[1].semantic.type == SemanticType.VAR
 63 |     assert tokens[1].semantic.name == 'accessToken'
 64 | 
 65 |     engine = SemanticEngine(subengine=None)
 66 | 
 67 |     findings = []
 68 |     for token in tokens:
 69 |         findings.extend(engine.search(token))
 70 |     
 71 |     assert len(findings) == 3
 72 |     assert findings[0].rules[0].name == 'Entropy+Var naming'
 73 |     assert findings[1].rules[0].name == 'Entropy+Var naming'
 74 |     assert findings[2].rules[0].name == 'Var naming'
 75 | 
 76 | 
 77 | 
 78 | def test_3_semantic_engine(file_toml_1: File):
 79 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_toml_1)
 80 |     assert len(tokens) == 51
 81 | 
 82 |     assert tokens[50].semantic.type == SemanticType.VAR
 83 |     assert tokens[50].semantic.name == 'MATTERMOST_BOT_TOKEN'
 84 | 
 85 |     engine = SemanticEngine(subengine=None)
 86 | 
 87 |     findings = []
 88 |     for token in tokens:
 89 |         findings.extend(engine.search(token))
 90 |     
 91 |     assert len(findings) == 2
 92 |     assert findings[0].rules[0].name == 'Var naming'
 93 |     assert findings[1].rules[0].name == 'Var naming'
 94 | 
 95 | 
 96 | 
 97 | def test_4_semantic_engine(file_toml_2: File):
 98 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_toml_2)
 99 |     assert len(tokens) == 13
100 | 
101 |     engine = SemanticEngine(subengine=None)
102 | 
103 |     findings = []
104 |     findings.extend(engine.search(tokens[4]))
105 |     findings.extend(engine.search(tokens[10]))
106 |     findings.extend(engine.search(tokens[12]))
107 | 
108 |     assert len(findings) == 1
109 |     assert findings[0].rules[0].name == 'Var naming'
110 | 
111 | 
112 | def test_5_semantic_engine(file_sh_2: File):
113 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_sh_2)
114 |     assert len(tokens) == 16
115 | 
116 |     engine = SemanticEngine(subengine=None)
117 | 
118 |     findings: List[Finding] = []
119 |     for token in tokens:
120 |         findings.extend(engine.search(token))
121 | 
122 |     for finding in findings:
123 |         finding.map_on_file(file=file_sh_2, relative_start=finding.start_pos)
124 |         finding.choose_final_rule()
125 | 
126 | 
127 |     findings = FindingMerger(findings).merge()
128 |     assert len(findings) == 1
129 |     assert findings[0].final_rule.name == 'Dangerous condition'
130 | 
131 | 
132 | def test_6_semantic_engine(file_html_1: File):
133 |     tokens = LexerTokenizer(deep_token_inspection=True).tokenize(file_html_1)
134 |     #assert len(tokens) == 16
135 | 
136 |     engine = SemanticEngine(subengine=None)
137 | 
138 |     findings: List[Finding] = []
139 |     for token in tokens:
140 |         findings.extend(engine.search(token))
141 | 
142 |     for finding in findings:
143 |         finding.map_on_file(file=file_html_1, relative_start=finding.start_pos)
144 |         finding.choose_final_rule()
145 | 
146 | 
147 |     findings = FindingMerger(findings).merge()
148 |     assert len(findings) == 0


--------------------------------------------------------------------------------
/tests/core/helpers/test_content_analyzer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.helpers.content_analyzer import ContentAnalyzer
 4 | from deepsecrets.core.model.file import File
 5 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer
 6 | 
 7 | STR = 'hellofrominsidethebase64'
 8 | BASE_64_STR = 'aGVsbG9mcm9taW5zaWRldGhlYmFzZTY0'
 9 | 
10 | 
11 | @pytest.fixture(scope='module')
12 | def file() -> File:
13 |     path = 'test.txt'
14 |     return File(path=path, relative_path=path, content=BASE_64_STR)
15 | 
16 | 
17 | def test_semantic_engine(file: File):
18 |     tokens = FullContentTokenizer().tokenize(file)
19 |     assert len(tokens) == 1
20 | 
21 |     token = tokens[0]
22 |     assert len(token.uncovered_content) == 0
23 | 
24 |     ContentAnalyzer(engine=None).analyze(token)
25 |     assert len(token.uncovered_content) == 1
26 |     assert token.uncovered_content[0] == STR
27 | 


--------------------------------------------------------------------------------
/tests/core/helpers/test_entropy.py:
--------------------------------------------------------------------------------
 1 | from deepsecrets.core.helpers.entropy import EntropyHelper
 2 | 
 3 | 
 4 | def test_high_entropy():
 5 |     test_string = 'qwertyuiopasdfghjklzxcvbnm,123456789'
 6 |     entropy = EntropyHelper.get_for_string(test_string)
 7 | 
 8 |     assert 5.16 <= entropy <= 5.17
 9 | 
10 | 
11 | def test_some_entropy():
12 |     test_string = 'hello and very warm welcome, let\'s get the party started'
13 |     entropy = EntropyHelper.get_for_string(test_string)
14 | 
15 |     assert 3.91 <= entropy <= 3.92
16 | 
17 | 
18 | def test_password_entropy():
19 |     test_string = 'v3ry$tongp@ssw0rd'
20 |     entropy = EntropyHelper.get_for_string(test_string)
21 | 
22 |     assert 3.85 <= entropy <= 3.86
23 | 
24 | 
25 | # Oops, it seems like the password has less entropy that a statement
26 | 


--------------------------------------------------------------------------------
/tests/core/model/test_file.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from deepsecrets.core.model.file import File
  4 | 
  5 | LINE_BREAK = '\n'
  6 | 
  7 | 
  8 | @pytest.fixture(scope='module')
  9 | def model() -> File:
 10 |     path = 'tests/fixtures/4.go'
 11 |     return File(path=path, relative_path=path)
 12 | 
 13 | 
 14 | def test_basic_info(model):
 15 |     assert model.path == '/app/tests/fixtures/4.go'
 16 |     assert model.relative_path == 'tests/fixtures/4.go'
 17 |     assert model.extension == 'go'
 18 |     assert model.length == 395
 19 |     assert len(model.line_offsets) == 15
 20 | 
 21 | 
 22 | def test_line_offsets(model):
 23 |     assert model.line_offsets[1] == (0, 48)
 24 |     assert model.content[48] == LINE_BREAK
 25 | 
 26 |     assert model.line_offsets[2] == (49, 152)
 27 |     assert model.content[152] == LINE_BREAK
 28 | 
 29 |     assert model.line_offsets[3] == (153, 154)
 30 |     assert model.content[154] == LINE_BREAK
 31 | 
 32 |     assert model.line_offsets[4] == (155, 194)
 33 |     assert model.content[194] == LINE_BREAK
 34 | 
 35 |     assert model.line_offsets[5] == (195, 240)
 36 |     assert model.content[240] == LINE_BREAK
 37 | 
 38 |     assert model.line_offsets[6] == (241, 293)
 39 |     assert model.content[293] == LINE_BREAK
 40 | 
 41 |     assert model.line_offsets[7] == (294, 294)
 42 |     assert model.content[294] == LINE_BREAK
 43 | 
 44 |     assert model.line_offsets[8] == (295, 311)
 45 |     assert model.content[311] == LINE_BREAK
 46 | 
 47 |     assert model.line_offsets[9] == (312, 325)
 48 |     assert model.content[325] == LINE_BREAK
 49 | 
 50 |     assert model.line_offsets[10] == (326, 328)
 51 |     assert model.content[328] == LINE_BREAK
 52 | 
 53 |     assert model.line_offsets[11] == (329, 358)
 54 |     assert model.content[358] == LINE_BREAK
 55 | 
 56 |     assert model.line_offsets[12] == (359, 375)
 57 |     assert model.content[375] == LINE_BREAK
 58 | 
 59 |     assert model.line_offsets[13] == (376, 389)
 60 |     assert model.content[389] == LINE_BREAK
 61 | 
 62 |     assert model.line_offsets[14] == (390, 392)
 63 |     assert model.content[392] == LINE_BREAK
 64 | 
 65 |     assert model.line_offsets[15] == (393, 394)
 66 |     assert model.content[394] == LINE_BREAK
 67 | 
 68 |     assert (
 69 |         model.content[-1]
 70 |         == model.content[model.length - 1]
 71 |         == model.content[394]
 72 |         == '\n'
 73 |     )
 74 | 
 75 | 
 76 | def test_caching(model: File):
 77 |     LINUM = 4
 78 |     line_contents = model.get_line_contents(LINUM)
 79 |     assert line_contents == '''\ttest2 := os.Getenv(`TEST_TEST`, "lol")'''
 80 |     assert model.line_contents_cache[LINUM] == line_contents
 81 | 
 82 | 
 83 | def test_get_full_line_for_position(model: File):
 84 |     POSITION = 94
 85 |     projected_line_number = 2
 86 |     line_contents = model.get_full_line_for_position(POSITION)
 87 |     assert (
 88 |         line_contents
 89 |         == '\tos.Setenv("RABBITMQ_URL", "amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp")'
 90 |     )
 91 |     assert projected_line_number in model.line_contents_cache.keys()
 92 | 
 93 | 
 94 | def test_get_line_number(model: File):
 95 |     POSITION = 94
 96 |     projected_line_number = 2
 97 |     line_number = model.get_line_number(POSITION)
 98 |     assert line_number == projected_line_number
 99 | 
100 | 
101 | def test_1_span_for_string(model: File):
102 |     looking_for = 'rabbitmq-esp01'
103 |     span = model.get_span_for_string(looking_for)
104 |     assert span == (109, 123)
105 | 
106 | 
107 | def test_2_span_for_string(model: File):
108 |     looking_for = 'rabbitmq-esp01'
109 |     span = model.get_span_for_string(looking_for, between=(130, 150))
110 |     assert span is None
111 | 


--------------------------------------------------------------------------------
/tests/core/model/test_finding.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.model.finding import Finding
 5 | from deepsecrets.core.model.rules.rule import Rule
 6 | from deepsecrets.core.model.token import Token
 7 | 
 8 | TEST_TOKEN_CONTENTS = (
 9 |     '"amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp"'
10 | )
11 | TOKEN_SPAN = (76, 151)
12 | 
13 | FINDING_CONTENT = 'TESTSECRET1234'
14 | FINDING_SPAN_INSIDE_TOKEN = (18, 32)
15 | 
16 | 
17 | @pytest.fixture(scope='module')
18 | def file() -> File:
19 |     path = 'tests/fixtures/4.go'
20 |     return File(path=path, relative_path=path)
21 | 
22 | 
23 | @pytest.fixture(scope='module')
24 | def rule() -> Rule:
25 |     return Rule(id='test')
26 | 
27 | 
28 | @pytest.fixture(scope='module')
29 | def token(file: File) -> Token:
30 |     return Token(
31 |         file=file,
32 |         content=TEST_TOKEN_CONTENTS,
33 |         span=file.get_span_for_string(TEST_TOKEN_CONTENTS),
34 |     )
35 | 
36 | 
37 | def test_1_finding(file: File, token: Token, rule: Rule):
38 |     assert file.content[token.span[0] : token.span[1]] == TEST_TOKEN_CONTENTS
39 | 
40 |     new_finding = Finding(
41 |         file=file,
42 |         rules=[rule],
43 |         start_pos=FINDING_SPAN_INSIDE_TOKEN[0],
44 |         end_pos=FINDING_SPAN_INSIDE_TOKEN[1],
45 |         detection=token.content[
46 |             FINDING_SPAN_INSIDE_TOKEN[0] : FINDING_SPAN_INSIDE_TOKEN[1]
47 |         ],
48 |     )
49 | 
50 |     assert new_finding.detection == FINDING_CONTENT
51 |     new_finding.map_on_file(relative_start=token.span[0])
52 | 
53 |     assert new_finding.start_pos == TOKEN_SPAN[0] + FINDING_SPAN_INSIDE_TOKEN[0]
54 |     assert new_finding.end_pos == TOKEN_SPAN[0] + FINDING_SPAN_INSIDE_TOKEN[1]
55 | 


--------------------------------------------------------------------------------
/tests/core/model/test_token.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.model.semantic import Variable
 5 | from deepsecrets.core.model.token import Semantic, SemanticType, Token
 6 | 
 7 | TEST_TOKEN_CONTENTS = (
 8 |     '"amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp"'
 9 | )
10 | TOKEN_SPAN = (76, 151)
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def file() -> File:
15 |     path = 'tests/fixtures/4.go'
16 |     return File(path=path, relative_path=path)
17 | 
18 | 
19 | def test_token(file: File):
20 |     token = Token(
21 |         file=file,
22 |         content=TEST_TOKEN_CONTENTS,
23 |         span=file.get_span_for_string(TEST_TOKEN_CONTENTS),
24 |     )
25 | 
26 |     assert token.span == TOKEN_SPAN
27 |     assert token.length == 75
28 |     assert token.semantic is None
29 |     assert len(token.type) == 0
30 | 
31 | 
32 | def test_semantic_token(file: File):
33 |     token = Token(
34 |         file=file,
35 |         content=TEST_TOKEN_CONTENTS,
36 |         span=file.get_span_for_string(TEST_TOKEN_CONTENTS),
37 |     )
38 | 
39 |     token.set_type(['Variable'])
40 |     variable = Variable()
41 |     variable.name = token
42 |     variable.value = token
43 | 
44 |     token.semantic = Semantic(type=SemanticType.VAR, name=variable.name.content)
45 | 
46 |     assert token.span == TOKEN_SPAN
47 |     assert token.length == 75
48 |     assert token.semantic is not None
49 |     assert len(token.type) == 1
50 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_conf.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from deepsecrets.core.model.file import File
  4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
  5 | 
  6 | 
  7 | @pytest.fixture(scope='module')
  8 | def file_toml_1():
  9 |     path = 'tests/fixtures/1.toml'
 10 |     return File(path=path, relative_path=path)
 11 | 
 12 | 
 13 | @pytest.fixture(scope='module')
 14 | def file_json_1():
 15 |     path = 'tests/fixtures/1.json'
 16 |     return File(path=path, relative_path=path)
 17 | 
 18 | @pytest.fixture(scope='module')
 19 | def file_json_2_broken():
 20 |     path = 'tests/fixtures/2.json'
 21 |     return File(path=path, relative_path=path)
 22 | 
 23 | 
 24 | @pytest.fixture(scope='module')
 25 | def file_yaml_1():
 26 |     path = 'tests/fixtures/1.yaml'
 27 |     return File(path=path, relative_path=path)
 28 | 
 29 | @pytest.fixture(scope='module')
 30 | def file_yml_1():
 31 |     path = 'tests/fixtures/1.yml'
 32 |     return File(path=path, relative_path=path)
 33 | 
 34 | 
 35 | @pytest.fixture(scope='module')
 36 | def file_ini_1():
 37 |     path = 'tests/fixtures/1.ini'
 38 |     return File(path=path, relative_path=path)
 39 | 
 40 | 
 41 | @pytest.fixture(scope='module')
 42 | def file_pp_1():
 43 |     path = 'tests/fixtures/1.pp'
 44 |     return File(path=path, relative_path=path)
 45 | 
 46 | 
 47 | def test_1(file_toml_1):
 48 |     lex = LexerTokenizer(deep_token_inspection=True)
 49 |     lex.tokenize(file_toml_1, post_filter=False)
 50 | 
 51 |     variables = lex.get_variables()
 52 |     assert len(variables) == 50
 53 | 
 54 | 
 55 | def test_2(file_json_1):
 56 |     lex = LexerTokenizer(deep_token_inspection=True)
 57 |     lex.tokenize(file_json_1, post_filter=False)
 58 | 
 59 |     variables = lex.get_variables()
 60 |     assert len(variables) == 1
 61 | 
 62 | 
 63 | def test_3(file_yaml_1):
 64 |     lex = LexerTokenizer(deep_token_inspection=True)
 65 |     lex.tokenize(file_yaml_1, post_filter=False)
 66 | 
 67 |     variables = lex.get_variables()
 68 |     assert len(variables) == 4
 69 | 
 70 | 
 71 | def test_4(file_ini_1):
 72 |     lex = LexerTokenizer(deep_token_inspection=True)
 73 |     lex.tokenize(file_ini_1, post_filter=False)
 74 | 
 75 |     variables = lex.get_variables()
 76 |     assert len(variables) == 9
 77 | 
 78 | 
 79 | def test_5(file_pp_1):
 80 |     lex = LexerTokenizer(deep_token_inspection=True)
 81 |     lex.tokenize(file_pp_1, post_filter=False)
 82 | 
 83 |     variables = lex.get_variables()
 84 |     assert len(variables) == 37
 85 | 
 86 | 
 87 | def test_6(file_json_2_broken):
 88 |     lex = LexerTokenizer(deep_token_inspection=True)
 89 |     lex.tokenize(file_json_2_broken, post_filter=False)
 90 | 
 91 |     variables = lex.get_variables()
 92 |     assert len(variables) == 6
 93 | 
 94 | 
 95 | def test_7(file_yml_1):
 96 |     lex = LexerTokenizer(deep_token_inspection=True)
 97 |     lex.tokenize(file_yml_1, post_filter=False)
 98 | 
 99 |     variables = lex.get_variables()
100 |     assert len(variables) == 1
101 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_cs.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_cs_1():
 9 |     path = 'tests/fixtures/1.cs'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_1(file_cs_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     lex.tokenize(file_cs_1, post_filter=False)
16 | 
17 |     variables = lex.get_variables()
18 |     assert len(variables) == 9
19 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_go.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_go_1():
 9 |     path = 'tests/fixtures/1.go'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def file_go_2():
15 |     path = 'tests/fixtures/2.go'
16 |     return File(path=path, relative_path=path)
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def file_go_3():
21 |     path = 'tests/fixtures/3.go'
22 |     return File(path=path, relative_path=path)
23 | 
24 | 
25 | @pytest.fixture(scope='module')
26 | def file_go_4():
27 |     path = 'tests/fixtures/4.go'
28 |     return File(path=path, relative_path=path)
29 | 
30 | 
31 | @pytest.fixture(scope='module')
32 | def file_go_5():
33 |     path = 'tests/fixtures/5.go'
34 |     return File(path=path, relative_path=path)
35 | 
36 | 
37 | @pytest.fixture(scope='module')
38 | def file_go_6():
39 |     path = 'tests/fixtures/6.go'
40 |     return File(path=path, relative_path=path)
41 | 
42 | 
43 | @pytest.fixture(scope='module')
44 | def file_go_7():
45 |     path = 'tests/fixtures/7.go'
46 |     return File(path=path, relative_path=path)
47 | 
48 | 
49 | def test_1(file_go_1):
50 |     lex = LexerTokenizer(deep_token_inspection=True)
51 |     tokens = lex.tokenize(file_go_1, post_filter=False)
52 |     variables = lex.get_variables(tokens)
53 |     assert len(variables) == 65
54 | 
55 | 
56 | def test_2(file_go_2):
57 |     lex = LexerTokenizer(deep_token_inspection=True)
58 |     lex.tokenize(file_go_2, post_filter=False)
59 |     variables = lex.get_variables()
60 |     assert len(variables) == 86
61 | 
62 | 
63 | def test_3(file_go_3):
64 |     lex = LexerTokenizer(deep_token_inspection=True)
65 |     lex.tokenize(file_go_3, post_filter=False)
66 |     variables = lex.get_variables()
67 |     assert len(variables) == 2
68 | 
69 | 
70 | def test_4(file_go_4):
71 |     lex = LexerTokenizer(deep_token_inspection=True)
72 |     lex.tokenize(file_go_4, post_filter=False)
73 |     variables = lex.get_variables()
74 |     assert len(variables) == 2
75 | 
76 | 
77 | def test_5(file_go_5):
78 |     lex = LexerTokenizer(deep_token_inspection=True)
79 |     lex.tokenize(file_go_5, post_filter=False)
80 |     variables = lex.get_variables()
81 |     assert len(variables) == 1
82 | 
83 | 
84 | def test_6(file_go_6):
85 |     lex = LexerTokenizer(deep_token_inspection=True)
86 |     lex.tokenize(file_go_6, post_filter=False)
87 |     variables = lex.get_variables()
88 |     assert len(variables) == 4
89 | 
90 | 
91 | def test_7(file_go_7):
92 |     lex = LexerTokenizer(deep_token_inspection=True)
93 |     lex.tokenize(file_go_7, post_filter=False)
94 |     variables = lex.get_variables()
95 |     assert len(variables) == 1
96 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_html.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_html_1():
 9 |     path = 'tests/fixtures/1.html'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_1(file_html_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     lex.tokenize(file_html_1, post_filter=False)
16 | 
17 |     variables = lex.get_variables()
18 |     assert len(variables) == 32
19 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_java.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_java_1():
 9 |     path = 'tests/fixtures/1.java'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_1(file_java_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     lex.tokenize(file_java_1, post_filter=False)
16 | 
17 |     variables = lex.get_variables()
18 |     assert len(variables) == 2
19 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_js.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_js_3():
 9 |     path = 'tests/fixtures/3.js'
10 |     return File(path=path, relative_path=path)
11 | 
12 | @pytest.fixture(scope='module')
13 | def file_jsx_1():
14 |     path = 'tests/fixtures/1.jsx'
15 |     return File(path=path, relative_path=path)
16 | 
17 | @pytest.fixture(scope='module')
18 | def file_jsx_2():
19 |     path = 'tests/fixtures/2.jsx'
20 |     return File(path=path, relative_path=path)
21 | 
22 | @pytest.fixture(scope='module')
23 | def file_jsx_3():
24 |     path = 'tests/fixtures/3.jsx'
25 |     return File(path=path, relative_path=path)
26 | 
27 | @pytest.fixture(scope='module')
28 | def file_js_4():
29 |     path = 'tests/fixtures/4.js'
30 |     return File(path=path, relative_path=path)
31 | 
32 | 
33 | 
34 | def test_1(file_js_3):
35 |     lex = LexerTokenizer(deep_token_inspection=True)
36 |     tokens = lex.tokenize(file_js_3, post_filter=True)
37 |     assert lex.lexer.name == 'react'
38 | 
39 |     variables = lex.get_variables(tokens)
40 |     assert len(variables) == 2
41 | 
42 | 
43 | def test_2_jsx(file_jsx_1):
44 |     lex = LexerTokenizer(deep_token_inspection=True)
45 |     tokens = lex.tokenize(file_jsx_1, post_filter=True)
46 |     assert lex.lexer.name == 'react'
47 | 
48 |     variables = lex.get_variables(tokens)
49 |     assert len(variables) == 1
50 | 
51 | 
52 | def test_3_jsx(file_jsx_2):
53 |     lex = LexerTokenizer(deep_token_inspection=True)
54 |     tokens = lex.tokenize(file_jsx_2, post_filter=True)
55 |     assert lex.lexer.name == 'react'
56 | 
57 |     variables = lex.get_variables(tokens)
58 |     assert len(variables) == 0
59 | 
60 | 
61 | def test_4_jsx(file_jsx_3):
62 |     lex = LexerTokenizer(deep_token_inspection=True)
63 |     tokens = lex.tokenize(file_jsx_3, post_filter=True)
64 |     assert lex.lexer.name == 'react'
65 | 
66 |     variables = lex.get_variables(tokens)
67 |     assert len(variables) == 0
68 | 
69 | 
70 | def test_5_js(file_js_4):
71 |     lex = LexerTokenizer(deep_token_inspection=True)
72 |     tokens = lex.tokenize(file_js_4, post_filter=True)
73 | 
74 |     variables = lex.get_variables(tokens)
75 |     assert len(variables) == 0


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_php.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_php_1():
 9 |     path = 'tests/fixtures/1.php'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_1(file_php_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     lex.tokenize(file_php_1, post_filter=False)
16 | 
17 |     variables = lex.get_variables()
18 |     assert len(variables) == 12
19 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_py.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_py_1():
 9 |     path = 'tests/fixtures/1.py'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | @pytest.fixture(scope='module')
14 | def file_py_2():
15 |     path = 'tests/fixtures/2.py'
16 |     return File(path=path, relative_path=path)
17 | 
18 | 
19 | @pytest.fixture(scope='module')
20 | def file_py_3():
21 |     path = 'tests/fixtures/3.py'
22 |     return File(path=path, relative_path=path)
23 | 
24 | 
25 | @pytest.fixture(scope='module')
26 | def file_py_4():
27 |     path = 'tests/fixtures/4.py'
28 |     return File(path=path, relative_path=path)
29 | 
30 | 
31 | def test_1(file_py_1):
32 |     lex = LexerTokenizer(deep_token_inspection=True)
33 |     lex.tokenize(file_py_1, post_filter=False)
34 |     variables = lex.get_variables()
35 |     assert len(variables) == 5
36 | 
37 | 
38 | def test_2(file_py_2):
39 |     lex = LexerTokenizer(deep_token_inspection=True)
40 |     lex.tokenize(file_py_2, post_filter=False)
41 |     variables = lex.get_variables()
42 |     assert len(variables) == 92
43 | 
44 | 
45 | def test_3(file_py_3):
46 |     lex = LexerTokenizer(deep_token_inspection=True)
47 |     lex.tokenize(file_py_3, post_filter=False)
48 | 
49 |     variables = lex.get_variables()
50 |     assert len(variables) == 3
51 |     assert variables[1].semantic.name == 'password'
52 |     assert variables[1].content == 'TESTSECRET1234'
53 | 
54 |     assert variables[2].semantic.name == 'pwd'
55 |     assert variables[2].content == '2TESTSECRET1234'
56 | 
57 | 
58 | def test_4(file_py_4):
59 |     lex = LexerTokenizer(deep_token_inspection=True)
60 |     lex.tokenize(file_py_4, post_filter=False)
61 | 
62 |     variables = lex.get_variables()
63 |     assert len(variables) == 11
64 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_sh.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_sh_1():
 9 |     path = 'tests/fixtures/1.sh'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_1(file_sh_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     lex.tokenize(file_sh_1, post_filter=False)
16 | 
17 |     variables = lex.get_variables()
18 |     assert len(variables) == 7
19 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/lexer/variable_detection/test_swift.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_swift_1():
 9 |     path = 'tests/fixtures/1.swift'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_suppress(file_swift_1):
14 |     lex = LexerTokenizer(deep_token_inspection=True)
15 |     vars = lex.tokenize(file_swift_1, post_filter=True)
16 | 
17 |     assert len(vars) == 0
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/test_full_content.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers.full_content import FullContentTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_toml_1():
 9 |     path = 'tests/fixtures/1.toml'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_full_content(file_toml_1: File):
14 |     tokenizer = FullContentTokenizer()
15 |     tokens = tokenizer.tokenize(file=file_toml_1)
16 |     assert len(tokens) == 1
17 |     assert tokens[0].content == file_toml_1.content
18 | 


--------------------------------------------------------------------------------
/tests/core/tokenizers/test_per_line.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.model.file import File
 4 | from deepsecrets.core.tokenizers import PerLineTokenizer
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | def file_toml_1():
 9 |     path = 'tests/fixtures/1.toml'
10 |     return File(path=path, relative_path=path)
11 | 
12 | 
13 | def test_per_line(file_toml_1: File):
14 |     tokenizer = PerLineTokenizer()
15 |     tokens = tokenizer.tokenize(file=file_toml_1)
16 |     assert len(tokens) == 76
17 | 


--------------------------------------------------------------------------------
/tests/core/utils/test_file_analyzer.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.core.engines.semantic import SemanticEngine
 4 | from deepsecrets.core.model.file import File
 5 | from deepsecrets.core.tokenizers.lexer import LexerTokenizer
 6 | from deepsecrets.core.utils.file_analyzer import FileAnalyzer
 7 | 
 8 | 
 9 | @pytest.fixture(scope='module')
10 | def file_toml_1():
11 |     path = 'tests/fixtures/1.toml'
12 |     return File(path=path, relative_path=path)
13 | 
14 | 
15 | def test_file_analyzer(file_toml_1):
16 |     file_analyzer = FileAnalyzer(file_toml_1)
17 |     lex = LexerTokenizer(deep_token_inspection=True)
18 |     semantic_engine = SemanticEngine(subengine=None)
19 |     file_analyzer.add_engine(engine=semantic_engine, tokenizers=[lex])
20 | 
21 |     findings = file_analyzer.process()
22 |     assert findings is not None
23 | 


--------------------------------------------------------------------------------
/tests/core/utils/test_fs.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/avito-tech/deepsecrets/4afd597d3997a2bdbac8059e405659715faa51d4/tests/core/utils/test_fs.py


--------------------------------------------------------------------------------
/tests/core/utils/test_lexer_finder.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from deepsecrets.core.model.file import File
 3 | from deepsecrets.core.utils.lexer_finder import LexerFinder
 4 | 
 5 | 
 6 | @pytest.fixture(scope='module')
 7 | def file_extless_json():
 8 |     path = 'tests/fixtures/extless/json'
 9 |     return File(path=path, relative_path=path)
10 | 
11 | @pytest.fixture(scope='module')
12 | def file_js_react():
13 |     path = 'tests/fixtures/3.js'
14 |     return File(path=path, relative_path=path)
15 | 
16 | 
17 | 
18 | def test_extless_json(file_extless_json):
19 |     lf = LexerFinder()
20 |     lexer = lf.find(file_extless_json)
21 | 
22 |     assert lexer.name == 'JSON'
23 | 
24 | def test_js_react(file_js_react):
25 |     lf = LexerFinder()
26 |     lexer = lf.find(file_js_react)
27 | 
28 |     assert lexer.name == 'react'


--------------------------------------------------------------------------------
/tests/fixtures/1.conf:
--------------------------------------------------------------------------------
1 | imap = {
2 |     'host': 'example.test.com',
3 |     'username': 'loremipsum@test.com',
4 |     'password': 'passwordstrongbutleaked'
5 | }


--------------------------------------------------------------------------------
/tests/fixtures/1.cs:
--------------------------------------------------------------------------------
 1 | string name = "John";
 2 | int myNum = 15;
 3 | string myText;
 4 | myText = "Hello";
 5 | 
 6 | var people = new Dictionary<int, string>()
 7 | {
 8 |     { 5, "Tom"},
 9 |     { 3, "Sam"},
10 |     { 11, "Bob"}
11 | };
12 | 
13 | var people = new Dictionary<int, string>()
14 | {
15 |     [5] = "Tom", // to be covered
16 |     [6] = "Sam", // to be covered
17 |     [7] = "Bob" // to be covered
18 | };  
19 | 
20 | 
21 | var mike = new KeyValuePair<int, string>(56, "Mike"); // to be covered
22 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.erb:
--------------------------------------------------------------------------------
 1 | <%
 2 |   require 'digest/sha1'
 3 |   actual_password = Digest::SHA1.hexdigest [scope['bareos::secret'], @director_password].join('')
 4 | -%>
 5 | Director {                            # define myself
 6 |   Name = "<%= scope['bareos::director'] %>"
 7 |   QueryFile = "/usr/lib/bareos/scripts/query.sql"
 8 |   Maximum Concurrent Jobs = 80
 9 |   Maximum Console Connections = 48
10 |   Plugin Directory = /usr/lib/bareos/plugins
11 |   Plugin Names = "python"
12 |   Password = "<%= actual_password %>"         # Console password
13 |   Messages = "bareos:messages:daemon"
14 |   Auditing = yes
15 |   Optimize For Speed = yes
16 | }
17 | 
18 | Catalog {
19 |   Name = "<%= scope['bareos::catalog'] %>"
20 |   dbdriver = "postgresql"
21 |   dbname = "bareos"
22 |   dbuser = "service_bareos_production_01"
23 |   dbpassword = "asdfasdf"
24 |   dbaddress = db19 
25 |   dbport = 6432
26 |   Reconnect = yes
27 | }


--------------------------------------------------------------------------------
/tests/fixtures/1.html:
--------------------------------------------------------------------------------
  1 | {% extends 'labels/base.html' %}
  2 | {% load static %}
  3 | 
  4 | {% block contentcss %}
  5 |     <link rel="stylesheet" href="{% static "labels/css/labeling.task.css" %}">
  6 | {% endblock %}
  7 | 
  8 | {% block content %}
  9 |     <div class="content">
 10 |         {% if session and not user.is_authenticated %}
 11 |             <span id="session">session: {{ session }}</span>
 12 |         {% endif %}
 13 |         <div class="container">
 14 |             <div class="row justify-content-center">
 15 |                 <div class="card col-9 mt-2">
 16 |                     <div class="card-body">
 17 |                         <h5 class="card-title"><span style="font-weight: normal">Запрос </span>
 18 |                             <a href="https://example.com?q={{ task.query_text }}" target="labeling-queries">
 19 |                                 <span>{{ task.query_text }}</span>
 20 |                             </a></h5>
 21 |                         <hr/>
 22 |                         <h5 class="card-title"><a href="https://example.com/{{ task.item_id }}" target="labeling-items">
 23 |                             <span>{{ task.item_title }}</span>
 24 |                         </a></h5>
 25 |                         <h6 class="card-subtitle mb-2 text-muted">{{ task.item_category }}</h6>
 26 |                         {% if task.item_img %}
 27 |                             <img style="display: block; margin: 0 auto;" src="{{ task.item_img }}" alt="item image"/>
 28 |                         {% endif %}
 29 |                         <div class="card-text" style="white-space: pre-line">{{ task.item_description|safe }}</div>
 30 |                     </div>
 31 |                 </div>
 32 |             </div>
 33 |         </div>
 34 |     </div>
 35 |     <footer class="footer">
 36 |         <div id="submission">
 37 |             <button class="btn btn-success" data-label="2">Подходит</button>
 38 |             <button class="btn btn-secondary" data-label="1">Непонятно</button>
 39 |             <button class="btn btn-danger" data-label="0">Не подходит</button>
 40 |         </div>
 41 |     </footer>
 42 | {% endblock %}
 43 | 
 44 | {% block contentjs %}
 45 |     <script>
 46 |         $('#submission').find('button').each(function () {
 47 |             $(this).click(function () {
 48 |                 $(this).attr('disabled', 'disabled');
 49 |                 {% autoescape off %}
 50 |                     var url = "{% url 'labeling_task_submit' %}";
 51 |                 {% endautoescape %}
 52 |                 var label = $(this).data('label');
 53 |                 var params = {
 54 |                     "project_id": {{ project_id }},
 55 |                     "query_id": {{ task.query_id }},
 56 |                     "item_id": {{ task.item_id }},
 57 |                     "label": label
 58 |                 };
 59 |                 $.post(url, params, function (data, status) {
 60 |                     ShowNext()
 61 |                 });
 62 |             })
 63 |         });
 64 | 
 65 |         function ShowNext() {
 66 |             {% autoescape off %}
 67 |                 var url = "{% url 'labeling_task' %}?project_id={{ project_id }}";
 68 |             {% endautoescape %}
 69 |             {% if steps_back %}
 70 |                 var steps_back = {{ steps_back }};
 71 |                 url = url + '&steps_back=' + (steps_back - 1);
 72 |             {% endif %}
 73 |             window.location.href = url;
 74 |         }
 75 | 
 76 |         function ShowPrevious() {
 77 |             {% autoescape off %}
 78 |                 var url = "{% url 'labeling_task' %}?project_id={{ project_id }}";
 79 |             {% endautoescape %}
 80 |             var steps_back = 0;
 81 |             {% if steps_back %}
 82 |                 steps_back = {{ steps_back }};
 83 |             {% endif %}
 84 |             url = url + '&steps_back=' + (steps_back + 1);
 85 |             window.location.href = url;
 86 |         }
 87 | 
 88 |         $(window).keydown(function (e) {
 89 |             if (e.key === '1') {
 90 |                 $('#submission .btn-success').click();
 91 |             } else if (e.key === '2') {
 92 |                 $('#submission .btn-secondary').click();
 93 |             } else if (e.key === '3') {
 94 |                 $('#submission .btn-danger').click();
 95 |             } else if (e.key === ',' && e.metaKey && e.shiftKey) {
 96 |                 ShowPrevious();
 97 |             } else if (e.key === 'ArrowLeft' && e.ctrlKey && e.shiftKey) {
 98 |                 ShowPrevious();
 99 |             } else if (e.key === '.' && e.metaKey && e.shiftKey) {
100 |                 ShowNext();
101 |             } else if (e.key === 'ArrowRight' && e.ctrlKey && e.shiftKey) {
102 |                 ShowNext();
103 |             } else {
104 |                 return
105 |             }
106 |             $(window).off('keydown');
107 |         });
108 |     </script>
109 | {% endblock %}


--------------------------------------------------------------------------------
/tests/fixtures/1.ini:
--------------------------------------------------------------------------------
 1 | [auth]
 2 | login = cchecker
 3 | password = fbyuihqwjlkfr
 4 | 
 5 | [api]
 6 | jira = https://jr.example.com/rest
 7 | cf = https://cf.example.com/rest
 8 | 
 9 | [mail]
10 | to = security@example.com
11 | from = logs@example.com
12 | subject = JR|CF Keyword Alert
13 | send_debug = nice@example.com
14 | sender = http://prod.example.com:8888/service-email-sender
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.java:
--------------------------------------------------------------------------------
1 | String name = "John";
2 | 
3 | Map<String, String> map = new HashMap<String, String>();
4 | map.put("dog", "type of animal");
5 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.js:
--------------------------------------------------------------------------------
1 | const ATLASSIAN_USERNAME = process.env.ATLASSIAN_USERNAME || 'user';
2 | const ATLASSIAN_PASSWORD = process.env.ATLASSIAN_PASSWORD || 'TESTSECRET1234';


--------------------------------------------------------------------------------
/tests/fixtures/1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "queue": {
3 |         "connection": "amqp://guest:guest@10.10.11.104:32769/"
4 |     }
5 | }


--------------------------------------------------------------------------------
/tests/fixtures/1.jsx:
--------------------------------------------------------------------------------
 1 | import Link from "@/components/Link";
 2 | import PageHeader from "@/components/PageHeader";
 3 | import Menu from "antd/lib/menu";
 4 | import PropTypes from "prop-types";
 5 | import React from "react";
 6 | 
 7 | import "./layout.less";
 8 | 
 9 | export default function Layout({ activeTab, children }) {
10 |   return (
11 |     <div className="admin-page-layout">
12 |       <div className="container">
13 |         <PageHeader title="Admin" />
14 |         <div className="bg-white tiled">
15 |           <Menu selectedKeys={[activeTab]} selectable={false} mode="horizontal">
16 |             <Menu.Item key="system_status">
17 |               <Link href="admin/status">System Status</Link>
18 |             </Menu.Item>
19 |             <Menu.Item key="jobs">
20 |               <Link href="admin/queries/jobs">RQ Status</Link>
21 |             </Menu.Item>
22 |             <Menu.Item key="outdated_queries">
23 |               <Link href="admin/queries/outdated">Outdated Queries</Link>
24 |             </Menu.Item>
25 |           </Menu>
26 |           {children}
27 |         </div>
28 |       </div>
29 |     </div>
30 |   );
31 | }
32 | 
33 | Layout.propTypes = {
34 |   activeTab: PropTypes.string,
35 |   children: PropTypes.node,
36 | };
37 | 
38 | Layout.defaultProps = {
39 |   activeTab: "system_status",
40 |   children: null,
41 | };


--------------------------------------------------------------------------------
/tests/fixtures/1.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | 
 4 | use App\Notifications\ResetPassword;
 5 | use App\Utilities\QueryFilters\CommonFilters\Name;
 6 | use App\Utilities\QueryFilters\FilterBuilder;
 7 | use App\Utilities\QueryFilters\UserFilters\Email;
 8 | use Database\Factories\UserFactory;
 9 | use Illuminate\Database\Eloquent\Builder;
10 | use Illuminate\Database\Eloquent\Collection;
11 | use Illuminate\Database\Eloquent\Factories\HasFactory;
12 | use Illuminate\Database\Eloquent\Model;
13 | use Illuminate\Database\Eloquent\SoftDeletes;
14 | use Illuminate\Database\Query\Builder as QueryBuilder;
15 | use Illuminate\Foundation\Auth\User as Authenticatable;
16 | use Illuminate\Notifications\DatabaseNotification;
17 | use Illuminate\Notifications\DatabaseNotificationCollection;
18 | use Illuminate\Notifications\Notifiable;
19 | use Illuminate\Support\Carbon;
20 | use jeremykenedy\LaravelRoles\Traits\HasRoleAndPermission;
21 | use Laravel\Sanctum\HasApiTokens;
22 | use Laravel\Sanctum\PersonalAccessToken;
23 | use Venturecraft\Revisionable\Revision;
24 | use Venturecraft\Revisionable\RevisionableTrait;
25 | 
26 | 
27 | $txt = "Hello world!";
28 | $x = 5;
29 | $y = 10.5;
30 | 
31 | $array = [
32 |     "foo" => "bar",
33 |     "bar" => "foo",
34 | ];
35 | 
36 | 
37 | $array["foo"] = "test";
38 | 
39 | $array2 = array(
40 |     "foo" => "bar",
41 |     "bar" => "foo",
42 | );
43 | 
44 | $today = 'tuesday';
45 | 
46 | class CSP
47 | {
48 |     public const GRAFANA_CSP_METRIC_BASE = 'products.example.security.csp.';
49 | 
50 | }
51 | 
52 | 'smtp' => [
53 |     'transport' => 'smtp',
54 |     'host' => env('MAIL_HOST', 'smtp.mailtrap.io'),
55 |     'port' => env('MAIL_PORT', 2525),
56 |     'encryption' => env('MAIL_ENCRYPTION', 'tls'),
57 |     'username' => env('MAIL_USERNAME', 'mailer'),
58 |     'password' => env('MAIL_PASSWORD', 'fjanflkdsanfkjdsanf'),
59 |     'timeout' => null,
60 | ]


--------------------------------------------------------------------------------
/tests/fixtures/1.pp:
--------------------------------------------------------------------------------
  1 | 
  2 | ### Users and groups ###
  3 | # Each user should have primary group as username
  4 | # Additional groups can be passed over $groups var
  5 | 
  6 | ###
  7 | # NOTICE for $sshaccess:
  8 | # hash keys must specify full hostnames and must not contain ^ and $ - they are kind of added automatically
  9 | # Example:
 10 | # sshaccess => { 'deployer' => ['app00'] }    ### regexp is /^deployer$/ and matches only deployer and not deployer-jessie
 11 | # sshaccess => { '^deployer' => ['app00'] }   ### regexp is /^^deployer$/ AND WILL MATCH NOTHING
 12 | # sshaccess => { 'deployer.*' => ['app00'] }  ### regexp is /^deployer.*$/ and matches both deployer and deployer-jessie. Also deployer-test, deployer-killallhumans and deployer-blah-blah-blah.
 13 | ###
 14 | 
 15 | define add_user_sshaccess (
 16 |   $sshaccess,
 17 |   $usersshaccess='',
 18 |   $home=''
 19 | ) {
 20 |   $username = $title
 21 |   if $home != '' {
 22 |     $realhome=$home
 23 |   } else {
 24 |     $realhome="/home/${username}"
 25 |   }
 26 |   include concat::setup
 27 |   unless defined(Concat["${realhome}/.ssh/authorized_keys"]) {
 28 |     concat { "${realhome}/.ssh/authorized_keys":
 29 |       owner   => $username,
 30 |       group   => $username,
 31 |       mode    => '0600',
 32 |     }
 33 |     concat::fragment { "ssh_authorized_keys::${username}::header":
 34 |       target  => "${realhome}/.ssh/authorized_keys",
 35 |       order   => '00',
 36 |       content => "# This file is managed by Puppet\n",
 37 |     }
 38 |   }
 39 |   if ( $sshaccess != '' ) {
 40 |     if regexp_key_in_hash($sshaccess, $::hostname)==true {
 41 |       file {"${realhome}/.ssh/id_rsa":
 42 |         ensure  => present,
 43 |         owner   => $title,
 44 |         group   => $title,
 45 |         mode    => '0600',
 46 |         content => generate('/usr/local/sbin/generate-ssh-key', $::hostname, $username),
 47 |       }
 48 |       file {"${realhome}/.ssh/id_rsa.pub":
 49 |         ensure  => present,
 50 |         owner   => $title,
 51 |         group   => $title,
 52 |         mode    => '0644',
 53 |         content => generate('/usr/local/sbin/generate-ssh-key', $::hostname, $username, 1),
 54 |       }
 55 |     } else {
 56 |       file { "${realhome}/.ssh/id_rsa":
 57 |         ensure => absent
 58 |       }
 59 |       file { "${realhome}/.ssh/id_rsa.pub":
 60 |         ensure => absent
 61 |       }
 62 |     }
 63 |     concat::fragment { "ssh_authorized_keys::${username}::sshaccess":
 64 |       target  => "${realhome}/.ssh/authorized_keys",
 65 |       order   => '50',
 66 |       content => ssh_get_public_keys($sshaccess, $::hostname, $username),
 67 |     }
 68 |   } else {
 69 |     file { "${realhome}/.ssh/id_rsa":
 70 |       ensure => absent
 71 |     }
 72 |     file { "${realhome}/.ssh/id_rsa.pub":
 73 |       ensure => absent
 74 |     }
 75 |   }
 76 |   if ( $usersshaccess != '' ) {
 77 |     concat::fragment { "ssh_authorized_keys::${username}::usersshaccess":
 78 |       target  => "${realhome}/.ssh/authorized_keys",
 79 |       order   => '75',
 80 |       content => get_user_public_keys($usersshaccess, $::hostname, $username),
 81 |     }
 82 |   }
 83 | }
 84 | 
 85 | ## Adds a user and manages its keys and password
 86 | define add_user (
 87 |   $name,
 88 |   $password,
 89 |   $shell,
 90 |   $groups,
 91 |   $sshkeytype,
 92 |   $sshkey,
 93 |   $sshaccess='',
 94 |   $usersshaccess='',
 95 |   $home='',
 96 |   $sudoers='',
 97 |   $uid = undef,
 98 |   $managehome=true,
 99 |   $homemode='0751'
100 | ) {
101 |   include concat::setup
102 |   $username = $title
103 |   if $groups == 'UNSET' {
104 |     $real_groups = [$username,]
105 |   } else {
106 |     $real_groups = $groups
107 |   }
108 | 
109 |   if $home == '' {
110 |     $homedir = "/home/${username}"
111 |   } else {
112 |     $homedir = $home
113 |   }
114 | 
115 |   group { $username:
116 |     ensure  => present,
117 |     gid     => $uid,
118 |   }
119 |   user { $username:
120 |     comment           => $name,
121 |     home              => $homedir,
122 |     shell             => $shell,
123 |     uid               => $uid,
124 |     gid               => $username,
125 |     managehome        => true,
126 |     password          => $password,
127 |     groups            => $real_groups,
128 |     membership        => inclusive,
129 |     password_min_age  => 99999,
130 |   }
131 |   if $managehome and ! defined(File[$homedir]){
132 |     file { $homedir:
133 |       ensure => directory,
134 |       mode   => $homemode,
135 |       owner  => $username,
136 |       group  => $username,
137 |     }
138 |   }
139 | 
140 |   if ! defined(File["${homedir}/.ssh"]) {
141 |     file { "${homedir}/.ssh":
142 |       ensure  => directory,
143 |       mode    => '0700',
144 |       owner   => $username,
145 |       group   => $username,
146 |     }
147 |   }
148 | 
149 |   unless defined(Concat["${homedir}/.ssh/authorized_keys"]) {
150 |     concat { "${homedir}/.ssh/authorized_keys":
151 |       owner   => $username,
152 |       group   => $username,
153 |       mode    => '0600',
154 |     }
155 |     concat::fragment { "ssh_authorized_keys::${username}::header":
156 |       target  => "${homedir}/.ssh/authorized_keys",
157 |       order   => '00',
158 |       content => "# This file is managed by Puppet\n",
159 |     }
160 |   }
161 |   if ( $sshkey != '' ) {
162 |     concat::fragment { "ssh_authorized_keys::${username}::personal":
163 |       target  => "${homedir}/.ssh/authorized_keys",
164 |       order   => '25',
165 |       content => "${sshkeytype} ${sshkey} ${username}\n",
166 |     }
167 |     if ( $sshaccess != '' ) {
168 |       notify {'You can use only one of $sshkey or $sshaccess variables in user_add class!': loglevel => error }
169 |     }
170 |   } else {
171 |     if ( $sshaccess != '') {
172 |       add_user_sshaccess{$username:
173 |         home          => $homedir,
174 |         sshaccess     => $sshaccess,
175 |         usersshaccess => $usersshaccess,
176 |       }
177 |     }
178 |   }
179 |   if ( $sudoers != '' ) {
180 |     sudoers::rules{$username: rules => $sudoers, }
181 |   }
182 | }
183 | 
184 | # Deletes user
185 | define del_user {
186 |   $username = $title
187 |   exec { "pkill -u ${username}":
188 |     onlyif  => "getent passwd ${username}",
189 |     returns => [0, 1],
190 |   }
191 |   -> user { $username:
192 |     ensure => absent,
193 |   }
194 |   -> group { $username:
195 |     ensure => absent,
196 |   }
197 |   file { "/home/${username}/.ssh":
198 |     ensure => absent,
199 |     force  => true,
200 |   }
201 | }
202 | 
203 | 
204 | # Manages user
205 | class user_one( $groups = 'UNSET' ) {
206 |   add_user { 'userone':
207 |     name        => 'John Doe',
208 |     uid         => '7005',
209 |     password    => '*',
210 |     shell       => '/bin/bash',
211 |     groups      => $groups,
212 |     sshkeytype  => 'ssh-rsa',
213 |     sshkey      => get_pubkey('pubkeys/jdoe.pub'),
214 |     sudoers     => [{command => 'ALL', nopasswd => true, user => 'exacron', nodes => 'crons', },
215 |                     {command => '/usr/bin/puppet agent -t', nopasswd => true, nodes => ['crons']},
216 |                     {command => 'ALL', user => 'postgres', nopasswd => true, nodes =>['crons','^app\d+', 'exa-sql','exa-sql-indexer[0-9]+','deployer','pg-int']},
217 |                     {command => 'ALL', nopasswd => true, user => 'root', nodes => ['app00','exa-dwh15'],},
218 |                     {command => '/usr/bin/strace', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']},
219 |                     {command => '/bin/netstat', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']},
220 |                     {command => '/bin/ss', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']},
221 |                     {command => '/usr/bin/lsof', nopasswd => true, user => 'root', nodes => ['crons','^app\d+']},
222 |                     {command => 'ALL', user => 'sampler', nodes => 'sql-sample01'},],
223 |   }
224 | }


--------------------------------------------------------------------------------
/tests/fixtures/1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | LICENSE_KEY='TESTSECRET1234'
 4 | 
 5 | jira_final_file="jira_access_`date +'%Y-%m-%d'`.csv"
 6 | cf_final_file="cf_access_`date +'%Y-%m-%d'`.csv"
 7 | stash_final_file="stash_access_`date +'%Y-%m-%d'`.csv"
 8 | 
 9 | curl -u 'login01:password01' -s "https://example.com/test" | jq -r '.space.name + " (" + .space.key + "): " + .title'
10 | 
11 | # should not be matched as a variable
12 | curl -u 'login01:$password_var' -s "https://example.com/test"
13 | 
14 | 
15 | printf "\nCleaning logs remotely...\n"
16 | ssh prx-cf "sudo rm confluence_logs_*.tar.gz"
17 | ssh prx-stash "sudo rm bitbucket_logs_*.tar.gz"
18 | ssh prx-jira "sudo rm jira_logs_*.tar.gz"
19 | printf "\nDone.\n"
20 | 
21 | 
22 | while IFS= read -r line
23 | do
24 |   task_number=`echo $line  | grep -oP '[A-Z]*\-[0-9]*'`
25 |   count_number=`echo $line | tr -s " " | cut -d " " -f1`
26 |   long_link=`echo $line | tr -s " " | cut -d " " -f2`
27 |   task_summary=`curl -u 'login02:password02' -s "https://example.com/test" |  jq -r '.key + ": " + .fields.summary'`
28 | done < accessed_jira_tasks_and_searches.txt
29 | printf "Done.\n"
30 | 
31 | while IFS= read -r line
32 | do
33 |   content_id=`echo $line | cut -d "=" -f2 | cut -d "&" -f1`
34 |   count_number=`echo $line | tr -s " " | cut -d " " -f1`
35 |   long_link=`echo $line | tr -s " " | cut -d " " -f2`
36 |   page_title=`curl -u 'login03:password03' -s "https://example.com/test" | jq -r '.space.name + " (" + .space.key + "): " + .title'`
37 |   printf "%s, \"%s\", %s\n" "$count_number" "$page_title" "$long_link" >> ${cf_final_file}
38 | done < accessed_cf_pages.txt
39 | printf "Done.\n"
40 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.swift:
--------------------------------------------------------------------------------
 1 | extraParameters = decoder.decode(key: "extraParameters", fallbackValue: [:])
 2 | mapSettings = try decoder.decodeIfPresent(key: "mapSettings")
 3 | pointListRequest = RequestComponents(from: try decoder.decode(key: "pointListRequest"))
 4 | pointInfoRequest = RequestComponents(from: try decoder.decode(key: "pointInfoRequest"))
 5 | filtersInfoRequest = try decoder.decodeIfPresent(key: "filtersInfoRequest").map { RequestComponents(from: $0) }
 6 | onOpenEvent = try decoder.decodeIfPresent(key: "onOpenEvent")
 7 | onInitActions = decoder.decode(key: "onInitActions", fallbackValue: [])
 8 | 
 9 | let result = Unboxer(dictionary: try unboxer.unbox(key: "result"))
10 | title = try result.unbox(key: "title")
11 | description = result.unbox(key: "description")
12 | actionTitle = try? result.unbox(keyPath: "actionTitle")
13 | action = try? result.unbox(keyPath: "action")


--------------------------------------------------------------------------------
/tests/fixtures/1.toml:
--------------------------------------------------------------------------------
 1 | name = "ab-lol"
 2 | kind = "unknown"
 3 | 
 4 | [engine]
 5 | name = "python"
 6 | 
 7 | [redis]
 8 | enabled = true
 9 | size = "small"
10 | version = "4.0"
11 | 
12 | [env_vars]
13 | USE_JSON_FORMATTER = "false"
14 | DEBUG = "false"
15 | METRICS_ENABLED = "false"
16 | SENTRY_ENABLED = "false"
17 | REQUEST_LOGGING_ENABLED = "true"
18 | HTTPS_ENABLED = "true"
19 | S3_HOST = "ceph-com.miami.example.com"
20 | S3_BUCKET = "lol-1234"
21 | 
22 | 
23 | [envs.prod.env_vars]
24 | USE_JSON_FORMATTER = "true"
25 | DEBUG = "false"
26 | METRICS_ENABLED = "true"
27 | SENTRY_ENABLED = "true"
28 | SENTRY_DSN = "http://hello:TESTSECRET1234@sentry.miami.example.com/251"
29 | AUTH_ENABLED = "true"
30 | VERTICA_HOST = 'vertica'
31 | VERTICA_PORT = '5437'
32 | VSQL_DATABASE = "DWH"
33 | VSQL_HOST = "vertica.miami.example.com"
34 | VSQL_PORT = "5437"
35 | AUTH_CONFIG_URL = 'https://oauth2.example.com/.well-known/openid-configuration'
36 | CLICKHOUSE_HOSTS="clickhouse-tcp-clickhouse-abcentral-production-rs-rs01.db.example-sd"
37 | CLICKHOUSE_USER="ab_loader"
38 | DWH_CLICKHOUSE_HOSTS="clickhouse-tcp-clickhouse-dwh-cs-production-rs-rs01.db.example-sd"
39 | S3_BUCKET = "configurator-attachments"
40 | 
41 | 
42 | # CRONS
43 | [[crons]]
44 | name = "status_updater"
45 | enabled = true
46 | schedule = "*/1 * * * *"
47 | command = "python /app/scripts/status_updater.py"
48 | 
49 | name = "scrum-poker"
50 | description = "Утилита для оценки задач"
51 | kind = "infrastructure"
52 | replicas = 1
53 | 
54 | [engine]
55 | name = "node"
56 | version = "12.13"
57 | size = "small"
58 | 
59 | [env_vars]
60 | NODE_ENV = "production"
61 | SESSION_TTL = "2592"
62 | SESSION_SECRET = "CHANGE ME PLZ!!!"
63 | SESSION_COLLECTION = "sessions"
64 | 
65 | [envs.local.env_vars]
66 | NODE_ENV = "development"
67 | BUILD_ENV = "local"
68 | 
69 | [envs.dev]
70 | host = "test.host"
71 |   [envs.dev.env_vars]
72 |   NODE_ENV = "development"
73 |   BUILD_ENV = "dev"
74 |   JIRA_USERNAME = "test-stest"
75 |   JIRA_PASSWORD = "iufkdhsafhiuwehf8qw4oifjh9w4ioafja"
76 |   MATTERMOST_BOT_TOKEN = "fjaipu4iwhpfgj8eosiruhjfkea,sjflksea"
77 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <server rootURL="http://teamcity:8111" uuid="e3ef03e4-1aac-46f7-982f-10434351f18e">
 3 |   <version number="727" />
 4 |   <artifacts maxArtifactSize="300000000" artifact-paths="system/artifacts&#xA;" />
 5 |   <build-settings executionTimeout="0" />
 6 |   <db-compact enabled="true">
 7 |     <scheduler hour="3" minute="0" />
 8 |   </db-compact>
 9 |   <comment-transformation>
10 |     <transformation-pattern password="TESTSECRET1234" />
11 |   </comment-transformation>
12 |   <vcs-settings modification-check-interval="60" quiet-period="60" />
13 | </server>


--------------------------------------------------------------------------------
/tests/fixtures/1.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | 
3 | kubernetes::kube_kubeconfig_token: chAng3m3
4 | kubernetes::proxy_kubeconfig_token: chAng3m3
5 | kubernetes::kubelet_kubeconfig_token: chAng3m3
6 | kubernetes::calico_kubeconfig_token: chAng3m3
7 | 


--------------------------------------------------------------------------------
/tests/fixtures/1.yml:
--------------------------------------------------------------------------------
1 | osd_deep_scrub_large_omap_object_key_threshold: 2000000


--------------------------------------------------------------------------------
/tests/fixtures/2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "result": true,
 3 |   "access_Token": "eyJ0eXfdasfdsa5OGRhNmUzYjI0NzkwYTE2ZDg0YmVjYzQwYzhlIn0.eyJpYXQiOjE2Nzk0MDgwNDYsIm5iZiI6MTY3OTQwODA0NiwiZXhwIjoxNjc5NDE0MDQ2LCJzZXR0aW5ncyI6eyJzdWJkaXZpc2lvbl9pZCI6MTAxODE2LCJjcmVkaXRfbGluZV9jaGFubmVsIjoiUFJFU0NPUkUiLCJ0cmFuY2hlX2NoYW5uZWwiOiJFQ09NTSIsInBlcmlvZCI6MzY0LCJjcmVkaXRfbGluZV9wcm9kdWN0X2lkIjoxMDEzNTQsInRyYW5jaGVfcHJvZHVjdF9pZCI6MTAxMzUzLCJwYXltZW50X3R5cGVfaWQiOjEwMjM5NiwiZGRzX3BheW1lbnRfdHlwZV9pZCI6MTAxMTcxLCJjb25zdW1wdGlvbl9pZCI6MTAxNjczLCJjb21pbmdfaWQiOjEwMTY3NiwiYnNfY2hlY2tfZnJlcXVlbmN5IjoxMCwiYnNfYWN0aXZlIjoxLCJic19pc190ZXN0IjowLCJzc3ViX2NoZWNrX2ZyZXF1ZW5jeSI6MTAsInNzdWJfYWN0aXZlIjoxLCJzc3ViX2lzX3Rlc3QiOjEsImlkeF9jaGVja19mcmVxdWVuY3kiOjEwLCJpZHhfbWluX2NvbmZpZGVuY2UiOiIxIiwiaWR4X2FjdGl2ZSI6MCwiaWR4X2lzX3Rlc3QiOjEsInZlcmlmeV9pc190ZXN0IjoxLCJibGF6ZV9jaGVja19mcmVxdWVuY3kiOjEwLCJibGF6ZV9hY3RpdmUiOjEsImJsYXplX2lzX3Rlc3QiOjEsInNtc19pc190ZXN0IjoxLCJzbXNfY29kZV9sZW5ndGgiOjR9LCJ1aWQiOiJhdml0byIsImF1a2V5IjoiYTkxYjQwNjAxZDYwM2U2ZmFhNzhiOTc0ZmFlZDgxM2MiLCJqdGkiOiI4NmEyOThkYTZlM2IyNDc5MGExNmQ4NGJlY2M0MGM4ZSJ9.n1sBlFLMs0dGW_gNp2gXRjhjYrdgfBeVZZ58wdPUPkhMt1fdsafasdfasdfdsafasdmotpcnmkZCr5XxYB0dO6nPxxLwhbZuD5-Eb8nT_kjL6JeVpHG5kNynRkGGTZoV00s8nmE-2X7t24cg96fdsafsdafdgfddfsgdsfk9LnAoamaE_jp0EQ480WBo_ZeDfAdPuNQg7nUwFrEgoIq33YHHzYkksPORKuycYouNXb2eB-jx1Bg7mXuGOuyLX3fdsafdfudsahiufhainEkFKV3S_4FwX734LksQee1DbSXgr3ugWVaoigaUbBD3jGWXQjVQr_E5Svcjtc4gFTRDfpF08x4NofA",
 4 |   "accessToken": "eyJ0eXfdasfdsa5OGRhNmUzYjI0NzkwYTE2ZDg0YmVjYzQwYzhlIn0.eyJpYXQiOjE2Nzk0MDgwNDYsIm5iZiI6MTY3OTQwODA0NiwiZXhwIjoxNjc5NDE0MDQ2LCJzZXR0aW5ncyI6eyJzdWJkaXZpc2lvbl9pZCI6MTAxODE2LCJjcmVkaXRfbGluZV9jaGFubmVsIjoiUFJFU0NPUkUiLCJ0cmFuY2hlX2NoYW5uZWwiOiJFQ09NTSIsInBlcmlvZCI6MzY0LCJjcmVkaXRfbGluZV9wcm9kdWN0X2lkIjoxMDEzNTQsInRyYW5jaGVfcHJvZHVjdF9pZCI6MTAxMzUzLCJwYXltZW50X3R5cGVfaWQiOjEwMjM5NiwiZGRzX3BheW1lbnRfdHlwZV9pZCI6MTAxMTcxLCJjb25zdW1wdGlvbl9pZCI6MTAxNjczLCJjb21pbmdfaWQiOjEwMTY3NiwiYnNfY2hlY2tfZnJlcXVlbmN5IjoxMCwiYnNfYWN0aXZlIjoxLCJic19pc190ZXN0IjowLCJzc3ViX2NoZWNrX2ZyZXF1ZW5jeSI6MTAsInNzdWJfYWN0aXZlIjoxLCJzc3ViX2lzX3Rlc3QiOjEsImlkeF9jaGVja19mcmVxdWVuY3kiOjEwLCJpZHhfbWluX2NvbmZpZGVuY2UiOiIxIiwiaWR4X2FjdGl2ZSI6MCwiaWR4X2lzX3Rlc3QiOjEsInZlcmlmeV9pc190ZXN0IjoxLCJibGF6ZV9jaGVja19mcmVxdWVuY3kiOjEwLCJibGF6ZV9hY3RpdmUiOjEsImJsYXplX2lzX3Rlc3QiOjEsInNtc19pc190ZXN0IjoxLCJzbXNfY29kZV9sZW5ndGgiOjR9LCJ1aWQiOiJhdml0byIsImF1a2V5IjoiYTkxYjQwNjAxZDYwM2U2ZmFhNzhiOTc0ZmFlZDgxM2MiLCJqdGkiOiI4NmEyOThkYTZlM2IyNDc5MGExNmQ4NGJlY2M0MGM4ZSJ9.n1sBlFLMs0dGW_gNp2gXRjhjYrdgfBeVZZ58wdPUPkhMt1fdsafasdfasdfdsafasdmotpcnmkZCr5XxYB0dO6nPxxLwhbZuD5-Eb8nT_kjL6JeVpHG5kNynRkGGTZoV00s8nmE-2X7t24cg96fdsafsdafdgfddfsgdsfk9LnAoamaE_jp0EQ480WBo_ZeDfAdPuNQg7nUwFrEgoIq33YHHzYkksPORKuycYouNXb2eB-jx1Bg7mXuGOuyLX3fdsafdfudsahiufhainEkFKV3S_4FwX734LksQee1DbSXgr3ugWVaoigaUbBD3jGWXQjVQr_E5Svcjtc4gFTRDfpF08x4NofA",
 5 |   "exp": {{now '1h' 'unix'}} ,
 6 |   "nbf": {{now '' 'unix'}} ,
 7 |   "iat": {{now '1h' 'unix'}} ,
 8 |   "jti": "gggg",
 9 |   "authkey": "ggg",
10 |   "uid": "avito",
11 |   "scope": "client_credentials",
12 |   "time": {{now '' 'unix'}}
13 | }


--------------------------------------------------------------------------------
/tests/fixtures/2.jsx:
--------------------------------------------------------------------------------
  1 | import logoUrl from "@/assets/images/redash_icon_small.png";
  2 | import { useCurrentRoute } from "@/components/ApplicationArea/Router";
  3 | import HelpTrigger from "@/components/HelpTrigger";
  4 | import Link from "@/components/Link";
  5 | import PlainButton from "@/components/PlainButton";
  6 | import CreateDashboardDialog from "@/components/dashboards/CreateDashboardDialog";
  7 | import CreateWorkbookDialog from "@/components/workbooks/CreateWorkbookDialog";
  8 | import { Auth } from "@/services/auth";
  9 | import settingsMenu from "@/services/settingsMenu";
 10 | import AlertOutlinedIcon from "@ant-design/icons/AlertOutlined";
 11 | import WorkbookIcon from "@ant-design/icons/BookOutlined";
 12 | import CodeOutlinedIcon from "@ant-design/icons/CodeOutlined";
 13 | import DesktopOutlinedIcon from "@ant-design/icons/DesktopOutlined";
 14 | import FolderOutlined from "@ant-design/icons/FolderOutlined";
 15 | import LineChartOutlined from "@ant-design/icons/LineChartOutlined";
 16 | import PlusOutlinedIcon from "@ant-design/icons/PlusOutlined";
 17 | import QuestionCircleOutlinedIcon from "@ant-design/icons/QuestionCircleOutlined";
 18 | import SettingOutlinedIcon from "@ant-design/icons/SettingOutlined";
 19 | import Menu from "antd/lib/menu";
 20 | import { first, includes } from "lodash";
 21 | import React, { useMemo } from "react";
 22 | import "./DesktopNavbar.less";
 23 | import VersionInfo from "./VersionInfo";
 24 | 
 25 | function NavbarSection({ children, ...props }) {
 26 |   return (
 27 |     <Menu selectable={false} mode="vertical" theme="dark" {...props}>
 28 |       {children}
 29 |     </Menu>
 30 |   );
 31 | }
 32 | 
 33 | function useNavbarActiveState() {
 34 |   const currentRoute = useCurrentRoute();
 35 | 
 36 |   return useMemo(
 37 |     () => ({
 38 |       workbooks: includes(
 39 |         [
 40 |           "Workbooks.Favorites",
 41 |           "Workbooks.List",
 42 |           "Workbooks.My",
 43 |           "Workbooks.ViewOrEdit",
 44 |           "Workbooks.Presets",
 45 |           "Workbooks.Presets.View",
 46 |         ],
 47 |         currentRoute.id
 48 |       ),
 49 |       dashboards: includes(
 50 |         [
 51 |           "Dashboards.Favorites",
 52 |           "Dashboards.LegacyViewOrEdit",
 53 |           "Dashboards.List",
 54 |           "Dashboards.My",
 55 |           "Dashboards.ViewOrEdit",
 56 |           "Dashboards.Presets",
 57 |           "Dashboards.Presets.View",
 58 |         ],
 59 |         currentRoute.id
 60 |       ),
 61 |       queries: includes(
 62 |         [
 63 |           "Queries.List",
 64 |           "Queries.Favorites",
 65 |           "Queries.Archived",
 66 |           "Queries.My",
 67 |           "Queries.View",
 68 |           "Queries.New",
 69 |           "Queries.Edit",
 70 |         ],
 71 |         currentRoute.id
 72 |       ),
 73 |       charts: includes(["charts"], currentRoute.id),
 74 |       dataSources: includes(["DataSources.List"], currentRoute.id),
 75 |       alerts: includes(["Alerts.List", "Alerts.New", "Alerts.View", "Alerts.Edit"], currentRoute.id),
 76 |       projects: includes(["Project.View", "Projects.Explore", "Projects.Explore.Favorites"], currentRoute.id),
 77 |     }),
 78 |     [currentRoute.id]
 79 |   );
 80 | }
 81 | 
 82 | export default function DesktopNavbar() {
 83 |   const firstSettingsTab = first(settingsMenu.getAvailableItems());
 84 | 
 85 |   const activeState = useNavbarActiveState();
 86 | 
 87 |   const canCreateQuery = Auth.hasPermission("create_query");
 88 |   const canCreateDashboard = Auth.hasPermission("create_dashboard");
 89 |   const canCreateAlert = Auth.hasPermission("list_alerts");
 90 | 
 91 |   return (
 92 |     <nav className="desktop-navbar">
 93 |       <NavbarSection className="desktop-navbar-logo">
 94 |         <div role="menuitem">
 95 |           <Link href="./">
 96 |             <img src={logoUrl} alt="Redash" />
 97 |           </Link>
 98 |         </div>
 99 |       </NavbarSection>
100 | 
101 |       <NavbarSection>
102 |         <Menu.Item key="projects" className={activeState.projects ? "navbar-active-item" : null}>
103 |           <Link href="/projects">
104 |             <FolderOutlined />
105 |             <span className="desktop-navbar-label">Projects</span>
106 |           </Link>
107 |         </Menu.Item>
108 |         {Auth.hasPermission("list_dashboards") && (
109 |           <Menu.Item key="workbooks" className={activeState.workbooks ? "navbar-active-item" : null}>
110 |             <Link href="workbooks">
111 |               <WorkbookIcon aria-label="Workbook navigation button" />
112 |               <span className="desktop-navbar-label">Workbooks</span>
113 |             </Link>
114 |           </Menu.Item>
115 |         )}
116 |         {Auth.hasPermission("list_dashboards") && (
117 |           <Menu.Item key="dashboards" className={activeState.dashboards ? "navbar-active-item" : null}>
118 |             <Link href="dashboards">
119 |               <DesktopOutlinedIcon aria-label="Dashboard navigation button" />
120 |               <span className="desktop-navbar-label">Dashboards</span>
121 |             </Link>
122 |           </Menu.Item>
123 |         )}
124 |         {Auth.hasPermission("view_query") && (
125 |           <Menu.Item key="queries" className={activeState.queries ? "navbar-active-item" : null}>
126 |             <Link href="queries">
127 |               <CodeOutlinedIcon aria-label="Queries navigation button" />
128 |               <span className="desktop-navbar-label">Queries</span>
129 |             </Link>
130 |           </Menu.Item>
131 |         )}
132 |         <Menu.Item key="charts" className={activeState.charts ? "navbar-active-item" : null}>
133 |           <Link href="charts">
134 |             <LineChartOutlined />
135 |             <span className="desktop-navbar-label">Charts</span>
136 |           </Link>
137 |         </Menu.Item>
138 |         {Auth.hasPermission("list_alerts") && (
139 |           <Menu.Item key="alerts" className={activeState.alerts ? "navbar-active-item" : null}>
140 |             <Link href="alerts">
141 |               <AlertOutlinedIcon aria-label="Alerts navigation button" />
142 |               <span className="desktop-navbar-label">Alerts</span>
143 |             </Link>
144 |           </Menu.Item>
145 |         )}
146 |       </NavbarSection>
147 | 
148 |       <NavbarSection className="desktop-navbar-spacer">
149 |         {(canCreateQuery || canCreateDashboard || canCreateAlert) && (
150 |           <Menu.SubMenu
151 |             key="create"
152 |             popupClassName="desktop-navbar-submenu"
153 |             data-test="CreateButton"
154 |             tabIndex={0}
155 |             title={
156 |               <React.Fragment>
157 |                 <PlusOutlinedIcon />
158 |                 <span className="desktop-navbar-label">Create</span>
159 |               </React.Fragment>
160 |             }>
161 |             {canCreateQuery && (
162 |               <Menu.Item key="new-query">
163 |                 <Link href="queries/new" data-test="CreateQueryMenuItem">
164 |                   New Query
165 |                 </Link>
166 |               </Menu.Item>
167 |             )}
168 |             {canCreateDashboard && (
169 |               <Menu.Item key="new-dashboard">
170 |                 <PlainButton data-test="CreateDashboardMenuItem" onClick={() => CreateDashboardDialog.showModal()}>
171 |                   New Dashboard
172 |                 </PlainButton>
173 |               </Menu.Item>
174 |             )}
175 |             {canCreateDashboard && (
176 |               <Menu.Item key="new-workbook">
177 |                 <PlainButton data-test="CreateWorkbookMenuItem" onClick={() => CreateWorkbookDialog.showModal()}>
178 |                   New Workbook
179 |                 </PlainButton>
180 |               </Menu.Item>
181 |             )}
182 |             {canCreateAlert && (
183 |               <Menu.Item key="new-alert">
184 |                 <Link data-test="CreateAlertMenuItem" href="alerts/new">
185 |                   New Alert
186 |                 </Link>
187 |               </Menu.Item>
188 |             )}
189 |           </Menu.SubMenu>
190 |         )}
191 |       </NavbarSection>
192 | 
193 |       <NavbarSection>
194 |         <Menu.Item key="help">
195 |           <HelpTrigger showTooltip={false} type="HOME" tabIndex={0}>
196 |             <QuestionCircleOutlinedIcon />
197 |             <span className="desktop-navbar-label">Help</span>
198 |           </HelpTrigger>
199 |         </Menu.Item>
200 |         {firstSettingsTab && (
201 |           <Menu.Item key="settings" className={activeState.dataSources ? "navbar-active-item" : null}>
202 |             <Link href={firstSettingsTab.path} data-test="SettingsLink">
203 |               <SettingOutlinedIcon />
204 |               <span className="desktop-navbar-label">Settings</span>
205 |             </Link>
206 |           </Menu.Item>
207 |         )}
208 |       </NavbarSection>
209 | 
210 |       <NavbarSection className="desktop-navbar-profile-menu">
211 |         <Menu.SubMenu
212 |           key="profile"
213 |           popupClassName="desktop-navbar-submenu"
214 |           tabIndex={0}
215 |           title={
216 |             <span data-test="ProfileDropdown" className="desktop-navbar-profile-menu-title">
217 |               <img
218 |                 className="profile__image_thumb"
219 |                 src={Auth.session.user.profileImageUrl}
220 |                 alt={Auth.session.user.name}
221 |               />
222 |             </span>
223 |           }>
224 |           <Menu.Item key="profile">
225 |             <Link href="users/me">Profile</Link>
226 |           </Menu.Item>
227 |           {Auth.hasPermission("super_admin") && (
228 |             <Menu.Item key="status">
229 |               <Link href="admin/status">System Status</Link>
230 |             </Menu.Item>
231 |           )}
232 |           <Menu.Divider />
233 |           <Menu.Item key="logout">
234 |             <PlainButton data-test="LogOutButton" onClick={() => Auth.logout()}>
235 |               Log out
236 |             </PlainButton>
237 |           </Menu.Item>
238 |           <Menu.Divider />
239 |           <Menu.Item key="version" role="presentation" disabled className="version-info">
240 |             <VersionInfo />
241 |           </Menu.Item>
242 |         </Menu.SubMenu>
243 |       </NavbarSection>
244 |     </nav>
245 |   );
246 | }


--------------------------------------------------------------------------------
/tests/fixtures/2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | curl -u 'login01:password01' -s "https://example.com/test"
4 | # should not be matched as a variable
5 | curl -u 'login01:$password_var' -s "https://example.com/test"
6 | curl -u "qauser:$password" $URL > $FILENAME


--------------------------------------------------------------------------------
/tests/fixtures/2.toml:
--------------------------------------------------------------------------------
 1 | [env_vars]
 2 | ALLOW_ORIGINS = "*"
 3 | SERVICE_OAUTH = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs"
 4 | S3_SECRET = "SFRcQOuJllDaDlnL7BhcwbjdYQDkjeM8PDDae0y6"
 5 | S3_URL = "http://test.com"
 6 | STT_KEY = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs"
 7 | STT_TIMEOUT = "100"
 8 | STT_URL = "https://api.test.com"
 9 | TTS_KEY = "vhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbsvhpn6mbs"
10 | TTS_TIMEOUT = "50"
11 | TTS_URL = "http://test1234.com"
12 | 
13 | KEYLOGGER_KEYS_KEYRING_SUBKEY = "1"
14 | 
15 | # should not be detected
16 | SECRET_KEY_PATH = "/fdsa/fdsa/fdsa"


--------------------------------------------------------------------------------
/tests/fixtures/2.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" uuid="f0f564b7-2d17-461e-a471-1d413b1e8619" xsi:noNamespaceSchemaLocation="http://www.jetbrains.com/teamcity/schemas/10.0/project-config.xsd">
3 |   <name>Empty</name>
4 |   <parameters>
5 |     <param name="env.MUH" value="TESTSECRET1234" spec="password display='normal' description='The Muh value that does all the Muhing' readOnly='true' label='Muh value'" />
6 |   </parameters>
7 |   <cleanup />
8 | </project>


--------------------------------------------------------------------------------
/tests/fixtures/3.go:
--------------------------------------------------------------------------------
 1 | const (
 2 | 	testKeyPEM = `
 3 | 	asdf
 4 | 	df
 5 | 	sad
 6 | 	`
 7 | 
 8 | 	bla = "lol"
 9 | )
10 | 
11 | func init() {
12 | 	testKey = parseRSA(testKeyPEM, "TESTSECRET1234")
13 | }
14 | 
15 | claims := struct{ Msg string }{"TESTSECRET1234"}
16 | key := time.Now().Format("01.02.2006")


--------------------------------------------------------------------------------
/tests/fixtures/3.js:
--------------------------------------------------------------------------------
 1 | import { Button, Modal, Radio, Row } from 'antd';
 2 | import PropTypes from 'prop-types';
 3 | import React, { useState } from 'react';
 4 | import { useSelector } from 'react-redux';
 5 | 
 6 | import { AppstoreOutlined, OrderedListOutlined } from '@ant-design/icons';
 7 | 
 8 | import { getRoot } from '../../store/selectors';
 9 | import SortableGrid from './components/sortable-grid';
10 | import SortableList from './components/sortable-list';
11 | 
12 | import styles from './styles.css';
13 | 
14 | const reorder = (list, startIndex, endIndex) => {
15 |     const result = Array.from(list);
16 |     const [removed] = result.splice(startIndex, 1);
17 | 
18 |     result.splice(endIndex, 0, removed);
19 | 
20 |     return result;
21 | };
22 | 
23 | const SortModal = ({ stories, isVisible, onClose, onConfirm }) => {
24 |     const [cloneStories, setCloneStories] = useState(stories);
25 |     const [viewType, setViewType] = useState('grid');
26 |     const loading = useSelector(getRoot).orderLoading;
27 |     const changeViewTypeHandler = (event) => setViewType(event.target.value);
28 |     const changeSortHandler = (oldIndex, newIndex) => {
29 |         setCloneStories(reorder(cloneStories, oldIndex, newIndex));
30 |     };
31 | 
32 |     const confirmHandler = () => {
33 |         const storiesIds = cloneStories.map(s => s.id);
34 | 
35 |         onConfirm(storiesIds);
36 |     };
37 | 
38 |     return (
39 |         <Modal
40 |             title='Test'
41 |             visible={isVisible}
42 |             maskClosable={false}
43 |             width={1000}
44 |             style={{ top: 20 }}
45 |             bodyStyle={{ height: '80vh', overflow: 'hidden' }}
46 |             footer={[
47 |                 <Button key='cancel' data-marker='cancel-button' onClick={onClose}>
48 |                     Отмена
49 |                 </Button>,
50 |                 <Button key='save' type='primary' data-marker='save-button' loading={loading} onClick={confirmHandler}>
51 |                     Сохранить
52 |                 </Button>,
53 |             ]}
54 |             onCancel={onClose}>
55 |             <Row justify='end'>
56 |                 <Radio.Group value={viewType} size='small' onChange={changeViewTypeHandler}>
57 |                     <Radio.Button value='table'><OrderedListOutlined /></Radio.Button>
58 |                     <Radio.Button value='grid'><AppstoreOutlined /></Radio.Button>
59 |                 </Radio.Group>
60 |             </Row>
61 |             <div className={styles.viewContainer}>
62 |                 {
63 |                     viewType === 'table'
64 |                         ? <SortableList list={cloneStories} onChangeSort={changeSortHandler} />
65 |                         : <SortableGrid list={cloneStories} onChangeSort={changeSortHandler} />
66 |                 }
67 |             </div>
68 |         </Modal>
69 |     );
70 | };
71 | 
72 | SortModal.propTypes = {
73 |     stories: PropTypes.array,
74 |     isVisible: PropTypes.bool,
75 |     onClose: PropTypes.func,
76 |     onConfirm: PropTypes.func,
77 | };
78 | 
79 | export default SortModal;


--------------------------------------------------------------------------------
/tests/fixtures/3.jsx:
--------------------------------------------------------------------------------
1 | <Text key='right' strong>{BRAND_TRANSFER_LOCALE.TRANSFER_TITLES_RIGHT}</Text>


--------------------------------------------------------------------------------
/tests/fixtures/3.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # fmt: off
 3 | 
 4 | from ldap3 import (
 5 |     ALL,
 6 |     Connection,
 7 |     Server,
 8 | )
 9 | 
10 | s = Server('ldaps://ldap-main.miami.example.com:636', get_info=ALL, use_ssl=True)
11 | c = Connection(s, user='uid=openvpn,ou=services,dc=example,dc=us', password='TESTSECRET1234')
12 | 
13 | c = Connection(s, pwd='2TESTSECRET1234')
14 | 
15 | c.bind()
16 | 
17 | if key == 'setuptools':
18 |     print()
19 | 


--------------------------------------------------------------------------------
/tests/fixtures/4.go:
--------------------------------------------------------------------------------
 1 | func TestNilConnGetChannel_Error(t *testing.T) {
 2 | 	os.Setenv("RABBITMQ_URL", "amqp://fake_user:TESTSECRET1234@rabbitmq-esp01.miami.example.com:5672/esp")
 3 | 	
 4 | 	test2 := os.Getenv(`TEST_TEST`, "lol")
 5 | 	secret := os.Getenv("S3_STORAGE_SECRET_KEY")
 6 | 	b, err := newMQBroker(nil, &AutoAckDisableStrategy)
 7 | 
 8 | 	if err == nil {
 9 | 		t.FailNow()
10 | 	}
11 | 	_, err = b.getChannel(false)
12 | 	if err == nil {
13 | 		t.FailNow()
14 | 	}
15 | }


--------------------------------------------------------------------------------
/tests/fixtures/4.js:
--------------------------------------------------------------------------------
  1 | const result3 = reducer(result2, changeObjectPageDraftFilters({
  2 |     key: 'id-3',
  3 |     value: '789'
  4 | }));
  5 | 
  6 | var PDFRenderingQueue = function () {
  7 |   
  8 |     _createClass(PDFRenderingQueue, [{
  9 |       key: "setViewer",
 10 |       value: function setViewer(pdfViewer) {
 11 |         this.pdfViewer = pdfViewer;
 12 |       }
 13 |     }, {
 14 |       key: "setThumbnailViewer",
 15 |       value: function setThumbnailViewer(pdfThumbnailViewer) {
 16 |         this.pdfThumbnailViewer = pdfThumbnailViewer;
 17 |       }
 18 |     }, {
 19 |       key: "isHighestPriority",
 20 |       value: function isHighestPriority(view) {
 21 |         return this.highestPriorityPage === view.renderingId;
 22 |       }
 23 |     }, {
 24 |       key: "renderHighestPriority",
 25 |       value: function renderHighestPriority(currentlyVisiblePages) {
 26 |         if (this.idleTimeout) {
 27 |           clearTimeout(this.idleTimeout);
 28 |           this.idleTimeout = null;
 29 |         }
 30 |         if (this.pdfViewer.forceRendering(currentlyVisiblePages)) {
 31 |           return;
 32 |         }
 33 |         if (this.pdfThumbnailViewer && this.isThumbnailViewEnabled) {
 34 |           if (this.pdfThumbnailViewer.forceRendering()) {
 35 |             return;
 36 |           }
 37 |         }
 38 |         if (this.printing) {
 39 |           return;
 40 |         }
 41 |         if (this.onIdle) {
 42 |           this.idleTimeout = setTimeout(this.onIdle.bind(this), CLEANUP_TIMEOUT);
 43 |         }
 44 |       }
 45 |     }, {
 46 |       key: "getHighestPriority",
 47 |       value: function getHighestPriority(visible, views, scrolledDown) {
 48 |         var visibleViews = visible.views;
 49 |         var numVisible = visibleViews.length;
 50 |         if (numVisible === 0) {
 51 |           return false;
 52 |         }
 53 |         for (var i = 0; i < numVisible; ++i) {
 54 |           var view = visibleViews[i].view;
 55 |           if (!this.isViewFinished(view)) {
 56 |             return view;
 57 |           }
 58 |         }
 59 |         if (scrolledDown) {
 60 |           var nextPageIndex = visible.last.id;
 61 |           if (views[nextPageIndex] && !this.isViewFinished(views[nextPageIndex])) {
 62 |             return views[nextPageIndex];
 63 |           }
 64 |         } else {
 65 |           var previousPageIndex = visible.first.id - 2;
 66 |           if (views[previousPageIndex] && !this.isViewFinished(views[previousPageIndex])) {
 67 |             return views[previousPageIndex];
 68 |           }
 69 |         }
 70 |         return null;
 71 |       }
 72 |     }, {
 73 |       key: "isViewFinished",
 74 |       value: function isViewFinished(view) {
 75 |         return view.renderingState === RenderingStates.FINISHED;
 76 |       }
 77 |     }, {
 78 |       key: "renderView",
 79 |       value: function renderView(view) {
 80 |         var _this = this;
 81 |   
 82 |         switch (view.renderingState) {
 83 |           case RenderingStates.FINISHED:
 84 |             return false;
 85 |           case RenderingStates.PAUSED:
 86 |             this.highestPriorityPage = view.renderingId;
 87 |             view.resume();
 88 |             break;
 89 |           case RenderingStates.RUNNING:
 90 |             this.highestPriorityPage = view.renderingId;
 91 |             break;
 92 |           case RenderingStates.INITIAL:
 93 |             this.highestPriorityPage = view.renderingId;
 94 |             var continueRendering = function continueRendering() {
 95 |               _this.renderHighestPriority();
 96 |             };
 97 |             view.draw().then(continueRendering, continueRendering);
 98 |             break;
 99 |         }
100 |         return true;
101 |       }
102 |     }]);
103 |   
104 |     return PDFRenderingQueue;
105 |   }();
106 |   


--------------------------------------------------------------------------------
/tests/fixtures/4.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | # fmt: off
 3 | 
 4 | CFAuthData = {'username': 'fdfsdfdsf-cf', 'pass': 'TESTSECRET1234', 'host': 'https://cf.example.com'}
 5 | 
 6 | CFAuthData['lol'] = 'valentin'
 7 | 
 8 | result = {'line': 'fdsfdsafdsafdsagfds'}
 9 | print('')
10 | 
11 | 
12 | class SlackApproveTypes(Enum):
13 |     USER_MARKED_AS_FALSE = 'user_marked_false'
14 |     USER_MARKED_AS_FALSE_BAD_RULE = 'user_marked_false_bad_rule'
15 |     USER_MARKED_AS_FALSE_UNAPPLICABLE = 'user_marked_false_unapplicable'
16 |     USER_MARKED_AS_FALSE_OTHER = 'user_marked_false_other'
17 | 
18 |     ADMIN_MARKED_AS_FALSE = 'adm_marked_false'
19 |     ADMIN_APPROVED = 'adm_approved'
20 | 


--------------------------------------------------------------------------------
/tests/fixtures/5.go:
--------------------------------------------------------------------------------
1 | conn, err := amqp.Dial("amqp://guest:guest@localhost:5672/")
2 | aes_key := os.Getenv("AES_KEY")


--------------------------------------------------------------------------------
/tests/fixtures/6.go:
--------------------------------------------------------------------------------
 1 | func TestPostgresStringer(t *testing.T) {
 2 | 	t.Run("Normal", func(t *testing.T) {
 3 | 		testPostgresStringer(
 4 | 			t,
 5 | 			Postgres{
 6 | 				Host:     "localhost",
 7 | 				Port:     5432,
 8 | 				User:     "user",
 9 | 				Password: "password",
10 | 				DBName:   "main",
11 | 			},
12 | 			"postgres://user:password@localhost:5432/main?sslmode=disable&binary_parameters=yes",
13 | 		)
14 | 	})
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/fixtures/7.go:
--------------------------------------------------------------------------------
 1 | var (
 2 | 	caKeyPEM = []byte(`TESTSECRET1234
 3 | `)
 4 | )
 5 | 
 6 | bsonFilters = append(bsonFilters, bson.M{
 7 | 	"start": bson.D{{Key: "$gte", Value: in.ActiveAt.Gte}},
 8 | })
 9 | 
10 | //	unix://<user>:<password>@</path/to/redis.sock>?db=<db_number>
11 | 


--------------------------------------------------------------------------------
/tests/fixtures/excluded_paths.json:
--------------------------------------------------------------------------------
  1 | [
  2 |     {
  3 |         "name": "Path excluded",
  4 |         "pattern": "\\.git"
  5 |     },
  6 |     {
  7 |         "name": "Path excluded",
  8 |         "pattern": ".*package-lock\\.json.*"
  9 |     },
 10 |     {
 11 |         "name": "Path excluded",
 12 |         "pattern": ".*-requirements.txt"
 13 |     },
 14 |     {
 15 |         "name": "Path excluded",
 16 |         "pattern": ".*Pipfile\\.lock$"
 17 |     },
 18 |     {
 19 |         "name": "Path excluded",
 20 |         "pattern": ".*package.json.*"
 21 |     },
 22 |     {
 23 |         "name": "Images",
 24 |         "pattern": ".*\\.(jpg|png|bmp|gif|tiff)$"
 25 |     },
 26 |     {
 27 |         "name": "Executives",
 28 |         "pattern": ".*\\.(exe|dll)$"
 29 |     },
 30 |     {
 31 |         "name": "Path excluded",
 32 |         "pattern": "vendor/"
 33 |     },
 34 |     {
 35 |         "name": "Path excluded",
 36 |         "pattern": ".*Gopkg\\.lock.*"
 37 |     },
 38 |     {
 39 |         "name": "Path excluded",
 40 |         "pattern": "venv/"
 41 |     },
 42 |     {
 43 |         "name": "Path excluded",
 44 |         "pattern": "requirements.txt$"
 45 |     },
 46 |     {
 47 |         "name": "Path excluded",
 48 |         "pattern": ".*Gopkg\\.lck$"
 49 |     },
 50 |     {
 51 |         "name": "Path excluded",
 52 |         "pattern": ".*Podfile\\.lock$"
 53 |     },
 54 |     {
 55 |         "name": "Path excluded",
 56 |         "pattern": "\\.gitignore"
 57 |     },
 58 |     {
 59 |         "name": "Path excluded",
 60 |         "pattern": ".*xcodeproj.*"
 61 |     },
 62 |     {
 63 |         "name": "Path excluded",
 64 |         "pattern": ".*__snapshots__.*"
 65 |     },
 66 |     {
 67 |         "name": "Path excluded",
 68 |         "pattern": "internal/generated/"
 69 |     },
 70 |     {
 71 |         "name": "Path excluded",
 72 |         "pattern": "npm-shrinkwrap.json"
 73 |     },
 74 |     {
 75 |         "name": "Path excluded",
 76 |         "pattern": ".*composer.json.*"
 77 |     },
 78 |     {
 79 |         "name": "Path excluded",
 80 |         "pattern": ".*brief"
 81 |     },
 82 |     {
 83 |         "name": "Path excluded",
 84 |         "pattern": ".*Godeps\\.json$"
 85 |     },
 86 |     {
 87 |         "name": "Path excluded",
 88 |         "pattern": ".*composer\\.lock.*"
 89 |     },
 90 |     {
 91 |         "name": "Path excluded",
 92 |         "pattern": "src/Generated/"
 93 |     },
 94 |     {
 95 |         "name": "Path excluded",
 96 |         "pattern": ".*yarn\\.lock$"
 97 |     },
 98 |     {
 99 |         "name": "Path excluded",
100 |         "pattern": ".*node_modules\\/.*"
101 |     },
102 |     {
103 |         "name": "Path excluded",
104 |         "pattern": ".*symfony\\.lock$"
105 |     },
106 |     {
107 |         "name": "Path excluded",
108 |         "pattern": "Gopkg.toml"
109 |     },
110 |     {
111 |         "name": "Path excluded",
112 |         "pattern": "lib/generated/"
113 |     },
114 |     {
115 |         "name": "Path excluded",
116 |         "pattern": ".*/vendor\\/.*"
117 |     },
118 |     {
119 |         "name": "Path excluded",
120 |         "pattern": ".*pbxproj$"
121 |     },
122 |     {
123 |         "name": "Path excluded",
124 |         "pattern": ".*go\\.sum$"
125 |     },
126 |     {
127 |         "name": "Postman collection files",
128 |         "pattern": ".*postman_collection\\.json$"
129 |     }
130 | ]


--------------------------------------------------------------------------------
/tests/fixtures/extless/json:
--------------------------------------------------------------------------------
1 | {
2 |     "queue": {
3 |         "connection": "amqp://guest:guest@10.10.11.104:32769/"
4 |     }
5 | }


--------------------------------------------------------------------------------
/tests/fixtures/false_findings.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "name": "False",
4 |         "pattern": "^.*-.*i1Wbhr6G.*eg$"
5 |     }
6 | ]


--------------------------------------------------------------------------------
/tests/fixtures/hashed_secrets.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "Some password",
 4 |         "hash": "8c535f99d6d0fa55b64af0fae6e3b6829eda413b",
 5 |         "length": 12,
 6 |         "algorithm": "sha1"
 7 |     },
 8 |     {
 9 |         "name": "Another rule",
10 |         "hash": "fakjsdfiudsajfndsjkafka",
11 |         "length": 5,
12 |         "algorithm": "sha1"
13 |     }
14 | ]
15 | 


--------------------------------------------------------------------------------
/tests/fixtures/regex_checks.txt:
--------------------------------------------------------------------------------
 1 | # S0
 2 | xoxb-278549377329-i1Wbhr6Gd7nMMMvKgElv5Neg
 3 | xoxb-675660624544-675670263056-vCSNM7pKAGgcBKOwvW2Kmi2K
 4 | 
 5 | # S1
 6 | -----BEGIN RSA PRIVATE KEY-----
 7 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu
 8 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm
 9 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k
10 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7
11 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy
12 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs
13 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00
14 | -----END RSA PRIVATE KEY-----
15 | 
16 | # S2
17 | -----BEGIN OPENSSH PRIVATE KEY-----
18 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu
19 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm
20 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k
21 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7
22 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy
23 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs
24 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00
25 | -----END OPENSSH PRIVATE KEY-----
26 | 
27 | # S3
28 | -----BEGIN DSA PRIVATE KEY-----
29 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu
30 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm
31 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k
32 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7
33 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy
34 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs
35 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00
36 | -----END DSA PRIVATE KEY-----
37 | 
38 | # S4
39 | -----BEGIN EC PRIVATE KEY-----
40 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu
41 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm
42 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k
43 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7
44 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy
45 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs
46 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00
47 | -----END EC PRIVATE KEY-----
48 | 
49 | # S5
50 | -----BEGIN PGP PRIVATE KEY BLOCK-----
51 | MIIBOgIBAAJBAKj34GkxFhD90vcNLYLInFEX6Ppy1tPf9Cnzj4p4WGeKLs1Pt8Qu
52 | KUpRKfFLfRYC9AIKjbJTWit+CqvjWYzvQwECAwEAAQJAIJLixBy2qpFoS4DSmoEm
53 | o3qGy0t6z09AIJtH+5OeRV1be+N4cDYJKffGzDa88vQENZiRm0GRq6a+HPGQMd2k
54 | TQIhAKMSvzIBnni7ot/OSie2TmJLY4SwTQAevXysE2RbFDYdAiEBCUEaRQnMnbp7
55 | 9mxDXDf6AU0cN/RPBjb9qSHDcWZHGzUCIG2Es59z8ugGrDY+pxLQnwfotadxd+Uy
56 | v/Ow5T0q5gIJAiEAyS4RaI9YG8EWx/2w0T67ZUVAw8eOMB6BIUg0Xcu+3okCIBOs
57 | /5OiPgoTdSy7bcF9IGpSE8ZgGKzgYQVZeN97YE00
58 | -----END PGP PRIVATE KEY BLOCK-----
59 | 
60 | # S19
61 | https://login:password@example.com
62 | ftp://login:password@example.com
63 | amqp://login:$password@example.com
64 | amqp://login:${password}@example.com
65 | amqp://login:%password%@example.com
66 | //	redis://<user>:<password>@<host>:<port>
67 | 


--------------------------------------------------------------------------------
/tests/fixtures/regexes.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "id": "S0",
 4 |         "name": "Slack Token",
 5 |         "confidence": 9,
 6 |         "pattern": "xox(?:a|b|p|o|s|r)-(?:\\d+-)+[a-z0-9]+"
 7 |     },
 8 |     {
 9 |         "id": "S1",
10 |         "name": "RSA private key",
11 |         "confidence": 9,
12 |         "pattern": "-----BEGIN RSA PRIVATE KEY-----[\\S\\s]{15,}?-----END RSA PRIVATE KEY-----"
13 |     }
14 | ]


--------------------------------------------------------------------------------
/tests/fixtures/service.postman_collection.json:
--------------------------------------------------------------------------------
1 | {
2 |     
3 | }


--------------------------------------------------------------------------------
/tests/scan_modes/test_cli_scan_mode.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepsecrets.config import Config, Output
 4 | from deepsecrets.core.engines.regex import RegexEngine
 5 | from deepsecrets.core.rulesets.false_findings import FalseFindingsBuilder
 6 | from deepsecrets.core.rulesets.regex import RegexRulesetBuilder
 7 | from deepsecrets.scan_modes.cli import CliScanMode
 8 | 
 9 | FP_TO_BE_EXCLUDED = '/app/tests/fixtures/service.postman_collection.json'
10 | 
11 | @pytest.fixture(scope='module')
12 | def config():
13 |     config = Config()
14 |     config.set_workdir('tests/fixtures')
15 |     config.engines.append(RegexEngine)
16 |     config.add_ruleset(RegexRulesetBuilder, ['tests/fixtures/regexes.json'])
17 |     config.add_ruleset(FalseFindingsBuilder, ['tests/fixtures/false_findings.json'])
18 |     config.output = Output(type='json', path='tests/1.json')
19 |     return config
20 | 
21 | 
22 | def test_cli_scan_mode(config: Config):
23 |     mode = CliScanMode(config=config)
24 |     assert FP_TO_BE_EXCLUDED in mode.filepaths
25 | 
26 |     config.set_global_exclusion_paths(['tests/fixtures/excluded_paths.json'])
27 |     mode = CliScanMode(config=config)
28 |     assert FP_TO_BE_EXCLUDED not in mode.filepaths
29 | 
30 |     findings = []
31 |     for file in mode.filepaths:
32 |         findings.extend(mode._per_file_analyzer(mode.analyzer_bundle(), file))
33 | 
34 |     assert len(findings) == 3
35 | 
36 |     # checking through the 'run' method
37 |     # false findings checked at the end
38 |     findings = []
39 |     findings = mode.run()
40 | 
41 |     assert len(findings) == 2
42 | 


--------------------------------------------------------------------------------