├── .github
├── FUNDING.yml
└── workflows
│ ├── codeql.yml
│ ├── publish.yml
│ └── python_test.yml
├── .gitignore
├── LICENSE
├── README.md
├── libsast
├── __init__.py
├── __main__.py
├── common.py
├── core_matcher
│ ├── __init__.py
│ ├── choice_matcher.py
│ ├── helpers.py
│ ├── matchers.py
│ └── pattern_matcher.py
├── core_sgrep
│ ├── __init__.py
│ ├── helpers.py
│ └── semantic_sgrep.py
├── exceptions.py
├── scanner.py
├── standards.py
└── standards
│ ├── cwe.yaml
│ ├── owasp_masvs.yaml
│ ├── owasp_mobile_top10_2016.yaml
│ ├── owasp_mobile_top10_2024.yaml
│ └── owasp_web_top10_2017.yaml
├── poetry.lock
├── pyproject.toml
├── tests
├── assets
│ ├── files
│ │ ├── .semgrepignore
│ │ ├── alternate.python
│ │ ├── choice_test.python
│ │ ├── choice_test2.python
│ │ ├── comments.java
│ │ ├── example_file.py
│ │ ├── handlebars_template.hbs
│ │ └── test_matcher.test
│ ├── invalid
│ │ ├── invalid_type.yaml
│ │ ├── invalid_yaml.yaml
│ │ ├── missing_pattern.yaml
│ │ └── missing_type.yaml
│ ├── multiple
│ │ ├── part1.yaml
│ │ └── part2.yaml
│ └── rules
│ │ ├── choice_matcher
│ │ └── choice.yaml
│ │ ├── pattern_matcher
│ │ └── patterns.yaml
│ │ └── semantic_grep
│ │ └── sgrep.yaml
├── integration
│ └── test_cli.py
└── unit
│ ├── __init__.py
│ ├── setup_test.py
│ ├── test_api.py
│ ├── test_matchers.py
│ ├── test_options.py
│ ├── test_rules.py
│ ├── test_semgrep.py
│ └── test_standards.py
└── tox.ini
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: ajinabraham
2 | custom: ['https://paypal.me/ajinabraham']
3 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | name: "CodeQL"
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "master" ]
8 | schedule:
9 | - cron: "13 5 * * 1"
10 |
11 | jobs:
12 | analyze:
13 | name: Analyze
14 | runs-on: ubuntu-latest
15 | permissions:
16 | actions: read
17 | contents: read
18 | security-events: write
19 |
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | language: [ python ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v4.2.2
28 |
29 | - name: Initialize CodeQL
30 | uses: github/codeql-action/init@v2
31 | with:
32 | languages: ${{ matrix.language }}
33 | queries: +security-and-quality
34 |
35 | - name: Autobuild
36 | uses: github/codeql-action/autobuild@v2
37 |
38 | - name: Perform CodeQL Analysis
39 | uses: github/codeql-action/analyze@v2
40 | with:
41 | category: "/language:${{ matrix.language }}"
42 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [created]
6 |
7 | jobs:
8 | deploy:
9 |
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v4.2.2
14 | - name: Set up Python
15 | uses: actions/setup-python@v5.3.0
16 | with:
17 | python-version: '3.x'
18 | - name: Install dependencies
19 | run: |
20 | python -m pip install --upgrade pip
21 | pip install poetry
22 | - name: Build and publish
23 | env:
24 | PYPI_TOKEN: ${{ secrets.PYPI_PASSWORD }}
25 | run: |
26 | poetry config pypi-token.pypi ${PYPI_TOKEN}
27 | poetry build
28 | poetry publish
29 |
--------------------------------------------------------------------------------
/.github/workflows/python_test.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | env:
10 | SEMGREP_R2C_INTERNAL_EXPLICIT_SEMGREPIGNORE: ./tests/assets/files/.semgrepignore
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | os: [ubuntu-latest, macos-latest]
19 | python-version: ['3.10', '3.11', '3.12']
20 |
21 | steps:
22 | - uses: actions/checkout@v4.2.2
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v5.3.0
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | - name: Install dependencies
28 | run: |
29 | python -m pip install --upgrade pip
30 | pip install tox poetry
31 | - name: Lint
32 | run: |
33 | tox -e lint
34 | - name: Install libsast
35 | run: |
36 | poetry install --no-interaction --no-ansi --with semgrep
37 | - name: Bandit Scan
38 | run: |
39 | poetry run bandit -ll libsast -r
40 | - name: Unit test
41 | run: |
42 | poetry run pytest -v --cache-clear tests
43 | - name: Python Package Test
44 | run: |
45 | tox -e build
46 | - name: Clean Up
47 | run: |
48 | tox -e clean
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 |
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 |
60 | # Scrapy stuff:
61 | .scrapy
62 |
63 | # Sphinx documentation
64 | docs/_build/
65 |
66 | # PyBuilder
67 | target/
68 |
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 |
72 | # pyenv
73 | .python-version
74 |
75 | # celery beat schedule file
76 | celerybeat-schedule
77 |
78 | # dotenv
79 | .env
80 |
81 | # virtualenv
82 | venv/
83 | ENV/
84 |
85 | # Spyder project settings
86 | .spyderproject
87 |
88 | # Rope project settings
89 | .ropeproject
90 | #Mac
91 | .DS_Store
92 | *.pyc
93 | *.pyo
94 |
95 | .vscode
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # libsast
2 |
3 | Generic SAST for Security Engineers. Powered by regex based pattern matcher and semantic aware [semgrep](https://github.com/returntocorp/semgrep).
4 |
5 | Made with  in India [](https://twitter.com/intent/tweet/?text=Generic%20SAST%20for%20Security%20Engineers.%20Powered%20by%20regex%20based%20pattern%20matcher%20and%20semantic%20aware%20semgrep%20by%20%40ajinabraham%20%40OpenSecurity_IN&url=https://github.com/ajinabraham/libsast)
6 |
7 | [](https://badge.fury.io/py/libsast)
8 | [](https://github.com/ajinabraham/libsast)
9 | [](https://www.gnu.org/licenses/lgpl-3.0.en.html)
10 | [](https://www.python.org/downloads/)
11 | [](https://github.com/ajinabraham/libsast/actions?query=workflow%3ABuild)
12 |
13 | ### Support libsast
14 |
15 | * **Donate via Paypal:** [](https://paypal.me/ajinabraham)
16 | * **Sponsor the Project:** [](https://github.com/sponsors/ajinabraham)
17 |
18 | ## Install
19 |
20 | ```bash
21 | pip install semgrep==1.86.0 #For semgrep support
22 | pip install libsast
23 | ```
24 |
25 | Pattern Matcher is cross-platform, but Semgrep supports only Mac and Linux.
26 |
27 | ## Command Line Options
28 |
29 | ```bash
30 | $ libsast
31 | usage: libsast [-h] [-o OUTPUT] [-p PATTERN_FILE] [-s SGREP_PATTERN_FILE]
32 | [--sgrep-file-extensions SGREP_FILE_EXTENSIONS [SGREP_FILE_EXTENSIONS ...]]
33 | [--file-extensions FILE_EXTENSIONS [FILE_EXTENSIONS ...]]
34 | [--ignore-filenames IGNORE_FILENAMES [IGNORE_FILENAMES ...]]
35 | [--ignore-extensions IGNORE_EXTENSIONS [IGNORE_EXTENSIONS ...]]
36 | [--ignore-paths IGNORE_PATHS [IGNORE_PATHS ...]]
37 | [--show-progress] [--cpu-core CPU_CORE] [-v]
38 | [path ...]
39 |
40 | positional arguments:
41 | path Path can be file(s) or directories
42 |
43 | options:
44 | -h, --help show this help message and exit
45 | -o OUTPUT, --output OUTPUT
46 | Output filename to save JSON report.
47 | -p PATTERN_FILE, --pattern-file PATTERN_FILE
48 | YAML pattern file, directory or url
49 | -s SGREP_PATTERN_FILE, --sgrep-pattern-file SGREP_PATTERN_FILE
50 | sgrep rules directory
51 | --sgrep-file-extensions SGREP_FILE_EXTENSIONS [SGREP_FILE_EXTENSIONS ...]
52 | File extensions that should be scanned with semantic
53 | grep
54 | --file-extensions FILE_EXTENSIONS [FILE_EXTENSIONS ...]
55 | File extensions that should be scanned with pattern
56 | matcher
57 | --ignore-filenames IGNORE_FILENAMES [IGNORE_FILENAMES ...]
58 | File name(s) to ignore
59 | --ignore-extensions IGNORE_EXTENSIONS [IGNORE_EXTENSIONS ...]
60 | File extension(s) to ignore in lower case
61 | --ignore-paths IGNORE_PATHS [IGNORE_PATHS ...]
62 | Path(s) to ignore
63 | --show-progress Show scan progress
64 | --cpu-core CPU_CORE No of CPU cores to use. Use all cores by default
65 | -v, --version Show libsast version
66 | ```
67 |
68 |
69 | ## Example Usage
70 |
71 | ```json
72 | $ libsast -s tests/assets/rules/semantic_grep/ -p tests/assets/rules/pattern_matcher/ tests/assets/files/
73 | {
74 | "pattern_matcher": {
75 | "test_regex": {
76 | "files": [
77 | {
78 | "file_path": "tests/assets/files/test_matcher.test",
79 | "match_lines": [
80 | 28,
81 | 28
82 | ],
83 | "match_position": [
84 | 1141,
85 | 1149
86 | ],
87 | "match_string": ".close()"
88 | }
89 | ],
90 | "metadata": {}
91 | },
92 | "test_regex_and": {
93 | "files": [
94 | {
95 | "file_path": "tests/assets/files/test_matcher.test",
96 | "match_lines": [
97 | 3,
98 | 3
99 | ],
100 | "match_position": [
101 | 52,
102 | 66
103 | ],
104 | "match_string": "webkit.WebView"
105 | },
106 | {
107 | "file_path": "tests/assets/files/test_matcher.test",
108 | "match_lines": [
109 | 7,
110 | 7
111 | ],
112 | "match_position": [
113 | 194,
114 | 254
115 | ],
116 | "match_string": ".loadUrl(\"file:/\" + Environment.getExternalStorageDirectory("
117 | }
118 | ],
119 | "metadata": {}
120 | },
121 | "test_regex_and_not": {
122 | "files": [
123 | {
124 | "file_path": "tests/assets/files/test_matcher.test",
125 | "match_lines": [
126 | 42,
127 | 42
128 | ],
129 | "match_position": [
130 | 1415,
131 | 1424
132 | ],
133 | "match_string": "WKWebView"
134 | },
135 | {
136 | "file_path": "tests/assets/files/test_matcher.test",
137 | "match_lines": [
138 | 40,
139 | 40
140 | ],
141 | "match_position": [
142 | 1363,
143 | 1372
144 | ],
145 | "match_string": "WKWebView"
146 | }
147 | ],
148 | "metadata": {}
149 | },
150 | "test_regex_and_or": {
151 | "files": [
152 | {
153 | "file_path": "tests/assets/files/test_matcher.test",
154 | "match_lines": [
155 | 50,
156 | 50
157 | ],
158 | "match_position": [
159 | 1551,
160 | 1571
161 | ],
162 | "match_string": "telephony.SmsManager"
163 | },
164 | {
165 | "file_path": "tests/assets/files/test_matcher.test",
166 | "match_lines": [
167 | 58,
168 | 58
169 | ],
170 | "match_position": [
171 | 1973,
172 | 1988
173 | ],
174 | "match_string": "sendTextMessage"
175 | }
176 | ],
177 | "metadata": {}
178 | },
179 | "test_regex_multiline_and_metadata": {
180 | "files": [
181 | {
182 | "file_path": "tests/assets/files/test_matcher.test",
183 | "match_lines": [
184 | 52,
185 | 52
186 | ],
187 | "match_position": [
188 | 1586,
189 | 1684
190 | ],
191 | "match_string": "public void onRequestPermissionsResult(int requestCode,String permissions[], int[] grantResults) {"
192 | },
193 | {
194 | "file_path": "tests/assets/files/test_matcher.test",
195 | "match_lines": [
196 | 10,
197 | 11
198 | ],
199 | "match_position": [
200 | 297,
201 | 368
202 | ],
203 | "match_string": "public static ForgeAccount add(Context context, ForgeAccount account) {"
204 | }
205 | ],
206 | "metadata": {
207 | "cwe": "CWE-1051 Initialization with Hard-Coded Network Resource Configuration Data",
208 | "description": "This is a rule to test regex",
209 | "foo": "bar",
210 | "masvs": "MSTG-STORAGE-3",
211 | "owasp-mobile": "M1: Improper Platform Usage",
212 | "owasp-web": "A10: Insufficient Logging & Monitoring",
213 | "severity": "info"
214 | }
215 | },
216 | "test_regex_or": {
217 | "files": [
218 | {
219 | "file_path": "tests/assets/files/test_matcher.test",
220 | "match_lines": [
221 | 26,
222 | 26
223 | ],
224 | "match_position": [
225 | 1040,
226 | 1067
227 | ],
228 | "match_string": "Context.MODE_WORLD_READABLE"
229 | }
230 | ],
231 | "metadata": {}
232 | }
233 | },
234 | "semantic_grep": {
235 | "errors": [
236 | {
237 | "code": 3,
238 | "level": "warn",
239 | "message": "Semgrep Core WARN - Lexical error in file tests/assets/files/test_matcher.test:40\n\tunrecognized symbols: !",
240 | "path": "tests/assets/files/test_matcher.test",
241 | "type": "Lexical error"
242 | },
243 | ],
244 | "matches": {
245 | "boto-client-ip": {
246 | "files": [
247 | {
248 | "file_path": "tests/assets/files/example_file.py",
249 | "match_lines": [
250 | 4,
251 | 4
252 | ],
253 | "match_position": [
254 | 24,
255 | 31
256 | ],
257 | "match_string": "c = boto3.client(host='8.8.8.8')"
258 | }
259 | ],
260 | "metadata": {
261 | "cwe": "CWE-1050 Excessive Platform Resource Consumption within a Loop",
262 | "description": "boto client using IP address",
263 | "owasp-web": "A8: Insecure Deserialization",
264 | "severity": "ERROR"
265 | }
266 | }
267 | }
268 | }
269 | }
270 | ```
271 |
272 | ## Python API
273 |
274 | ```python
275 | >>> from libsast import Scanner
276 | >>> options = {'match_rules': '/Users/ajinabraham/Code/njsscan/njsscan/rules/pattern_matcher', 'sgrep_rules': '/Users/ajinabraham/Code/njsscan/njsscan/rules/semantic_grep', 'sgrep_extensions': {'', '.js'}, 'match_extensions': {'.hbs', '.sh', '.ejs', '.toml', '.mustache', '.tmpl', '.jade', '.json', '.ect', '.vue', '.yml', '.hdbs', '.tl', '.html', '.haml', '.dust', '.pug', '.tpl'}, 'ignore_filenames': {'bootstrap.min.js', '.DS_Store', 'bootstrap-tour.js', 'd3.min.js', 'tinymce.js', 'codemirror.js', 'tinymce.min.js', 'react-dom.production.min.js', 'react.js', 'jquery.min.js', 'react.production.min.js', 'codemirror-compressed.js', 'axios.min.js', 'angular.min.js', 'raphael-min.js', 'vue.min.js'}, 'ignore_extensions': {'.7z', '.exe', '.rar', '.zip', '.a', '.o', '.tz'}, 'ignore_paths': {'__MACOSX', 'jquery', 'fixtures', 'node_modules', 'bower_components', 'example', 'spec'}, 'show_progress': False}
277 | >>> paths = ['../njsscan/tests/assets/dot_njsscan/']
278 | >>> scanner = Scanner(options, paths)
279 | >>> scanner.scan()
280 | {'pattern_matcher': {'handlebar_mustache_template': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/ignore_ext.hbs', 'match_string': '{{{html}}}', 'match_position': (52, 62), 'match_lines': (1, 1)}], 'metadata': {'id': 'handlebar_mustache_template', 'description': 'The Handlebar.js/Mustache.js template has an unescaped variable. Untrusted user input passed to this variable results in Cross Site Scripting (XSS).', 'type': 'Regex', 'pattern': '{{{.+}}}|{{[ ]*&[\\w]+.*}}', 'severity': 'ERROR', 'input_case': 'exact', 'cwe': "CWE-79: Improper Neutralization of Input During Web Page Generation ('Cross-site Scripting')", 'owasp': 'A1: Injection'}}}, 'semantic_grep': {'matches': {'node_aes_ecb': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/lorem_scan.js', 'match_position': (16, 87), 'match_lines': (14, 14), 'match_string': "let decipher = crypto.createDecipheriv('aes-128-ecb', Buffer.from(ENCRYPTION_KEY), iv);"}], 'metadata': {'owasp': 'A9: Using Components with Known Vulnerabilities', 'cwe': 'CWE-327: Use of a Broken or Risky Cryptographic Algorithm', 'description': 'AES with ECB mode is deterministic in nature and not suitable for encrypting large amount of repetitive data.', 'severity': 'ERROR'}}, 'node_tls_reject': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/skip_dir/skip_me.js', 'match_position': (9, 58), 'match_lines': (9, 9), 'match_string': " process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = '0';"}, {'file_path': '../njsscan/tests/assets/dot_njsscan/skip_dir/skip_me.js', 'match_position': (9, 55), 'match_lines': (18, 18), 'match_string': ' process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";'}], 'metadata': {'owasp': 'A6: Security Misconfiguration', 'cwe': 'CWE-295: Improper Certificate Validation', 'description': "Setting 'NODE_TLS_REJECT_UNAUTHORIZED' to 0 will allow node server to accept self signed certificates and is not a secure behaviour.", 'severity': 'ERROR'}}, 'node_curl_ssl_verify_disable': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/skip_dir/skip_me.js', 'match_position': (5, 11), 'match_lines': (45, 51), 'match_string': ' curl(url,\n\n {\n\n SSL_VERIFYPEER: 0\n\n },\n\n function (err) {\n\n response.end(this.body);\n\n })'}], 'metadata': {'owasp': 'A6: Security Misconfiguration', 'cwe': 'CWE-599: Missing Validation of OpenSSL Certificate', 'description': 'SSL Certificate verification for node-curl is disabled.', 'severity': 'ERROR'}}, 'regex_injection_dos': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/lorem_scan.js', 'match_position': (5, 37), 'match_lines': (25, 27), 'match_string': ' var key = req.param("key");\n\n // Regex created from user input\n\n var re = new RegExp("\\\\b" + key);'}], 'metadata': {'owasp': 'A1: Injection', 'cwe': 'CWE-400: Uncontrolled Resource Consumption', 'description': 'User controlled data in RegExp() can make the application vulnerable to layer 7 DoS.', 'severity': 'ERROR'}}, 'express_xss': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/skip.js', 'match_position': (9, 55), 'match_lines': (7, 10), 'match_string': ' var str = new Buffer(req.cookies.profile, \'base64\').toString();\n\n var obj = serialize.unserialize(str);\n\n if (obj.username) {\n\n res.send("Hello " + escape(obj.username));'}], 'metadata': {'owasp': 'A1: Injection', 'cwe': "CWE-79: Improper Neutralization of Input During Web Page Generation ('Cross-site Scripting')", 'description': 'Untrusted User Input in Response will result in Reflected Cross Site Scripting Vulnerability.', 'severity': 'ERROR'}}, 'generic_path_traversal': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/lorem_scan.js', 'match_position': (5, 35), 'match_lines': (36, 37), 'match_string': " var filePath = path.join(__dirname, '/' + req.query.load);\n\n fileSystem.readFile(filePath); // ignore: generic_path_traversal"}, {'file_path': '../njsscan/tests/assets/dot_njsscan/lorem_scan.js', 'match_position': (5, 35), 'match_lines': (42, 43), 'match_string': " var filePath = path.join(__dirname, '/' + req.query.load);\n\n fileSystem.readFile(filePath); // detect this"}], 'metadata': {'owasp': 'A5: Broken Access Control', 'cwe': 'CWE-23: Relative Path Traversal', 'description': 'Untrusted user input in readFile()/readFileSync() can endup in Directory Traversal Attacks.', 'severity': 'ERROR'}}, 'express_open_redirect': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/lorem_scan.js', 'match_position': (5, 26), 'match_lines': (49, 51), 'match_string': ' var target = req.param("target");\n\n // BAD: sanitization doesn\'t apply here\n\n res.redirect(target); //ignore: express_open_redirect'}], 'metadata': {'owasp': 'A1: Injection', 'cwe': "CWE-601: URL Redirection to Untrusted Site ('Open Redirect')", 'description': 'Untrusted user input in redirect() can result in Open Redirect vulnerability.', 'severity': 'ERROR'}}, 'node_deserialize': {'files': [{'file_path': '../njsscan/tests/assets/dot_njsscan/skip.js', 'match_position': (19, 45), 'match_lines': (8, 8), 'match_string': ' var obj = serialize.unserialize(str);'}], 'metadata': {'owasp': 'A8: Insecure Deserialization', 'cwe': 'CWE-502: Deserialization of Untrusted Data', 'description': "User controlled data in 'unserialize()' or 'deserialize()' function can result in Object Injection or Remote Code Injection.", 'severity': 'ERROR'}}}, 'errors': [{'type': 'SourceParseError', 'code': 3, 'short_msg': 'parse error', 'long_msg': 'Could not parse .njsscan as javascript', 'level': 'warn', 'spans': [{'start': {'line': 2, 'col': 20}, 'end': {'line': 2, 'col': 21}, 'source_hash': 'c60298be568bfb1325d92cbb3c0bc1450a25b85bb2e4000bdc3267c05f1c8c73', 'file': '.njsscan', 'context_start': None, 'context_end': None}], 'help': 'If the code appears to be valid, this may be a semgrep bug.'}, {'type': 'SourceParseError', 'code': 3, 'short_msg': 'parse error', 'long_msg': 'Could not parse no_ext_scan as javascript', 'level': 'warn', 'spans': [{'start': {'line': 1, 'col': 3}, 'end': {'line': 1, 'col': 5}, 'source_hash': 'f002e2a715be216987dd1b134e7b9fa6eef28e3caa82dead0109c4cdc489e089', 'file': 'no_ext_scan', 'context_start': None, 'context_end': None}], 'help': 'If the code appears to be valid, this may be a semgrep bug.'}]}}
281 | ```
282 |
283 | ## Write you own Static Analysis tool
284 |
285 | With libsast, you can write your own static analysis tools. libsast provides two matching engines:
286 |
287 | 1. **Pattern Matcher**
288 | 2. **Semantic Grep**
289 |
290 | ### Pattern Matcher
291 |
292 | Currently Pattern Matcher supports any language.
293 |
294 | Use [Regex 101](https://regex101.com/r/nGbAay/1) to write simple Python Regex rule patterns.
295 |
296 | A sample rule looks like
297 |
298 | ```yaml
299 | - id: test_regex_or
300 | message: This is a rule to test regex_or
301 | input_case: exact
302 | pattern:
303 | - MODE_WORLD_READABLE|Context\.MODE_WORLD_READABLE
304 | - openFileOutput\(\s*".+"\s*,\s*1\s*\)
305 | severity: error
306 | type: RegexOr
307 | metadata:
308 | owasp-web: a1
309 | reference: http://foo.bar
310 | foo: Some extra metadata
311 | ```
312 | A rule consist of
313 |
314 | * `id` : A unique id for the rule.
315 | * `message`: A description for the rule.
316 | * `input_case`: It can be `exact`, `upper` or `lower`. Data will be converted to lower case/upper case/as it is before comparing with the regex.
317 | * `pattern`: List of patterns depends on `type`.
318 | * `severity`: It can be `error`, `warning` or `info`.
319 | * `type`: Pattern Matcher supports `Regex`, `RegexAnd`, `RegexOr`, `RegexAndOr`, `RegexAndNot`.
320 | * `metadata` (optional): Define your own custom fields that you can use as metadata along with standard mappings.
321 |
322 | ```bash
323 | 1. Regex - if regex1 in input
324 | 2. RegexAnd - if regex1 in input and regex2 in input
325 | 3. RegexOr - if regex1 in input or regex2 in input
326 | 4. RegexAndOr - if regex1 in input and (regex2 in input or regex3 in input)
327 | 5. RegexAndNot - if regex1 in input and not regex2 in input
328 | ```
329 | Example: [Pattern Matcher Rule](https://github.com/ajinabraham/libsast/blob/master/tests/assets/rules/pattern_matcher/patterns.yaml)
330 |
331 | Test your pattern matcher rules
332 |
333 | `$ libsast -p tests/assets/rules/pattern_matcher/patterns.yaml tests/assets/files/`
334 |
335 | #### Inbuilt Standard Mapping Support
336 |
337 | Metadata fields also support [libsast standard mapping](https://github.com/ajinabraham/libsast/tree/master/libsast/standards).
338 |
339 | For example, the metadata field `owasp-web: a1` will get expanded at runtime as `owasp-web: 'A1: Injection'`.
340 |
341 | *Currently Supports*
342 |
343 | * [OWASP Web Top 10](https://github.com/ajinabraham/libsast/blob/master/libsast/standards/owasp_web_top10_2017.yaml)
344 | * [OWASP Mobile Top 10](https://github.com/ajinabraham/libsast/blob/master/libsast/standards/owasp_mobile_top10_2016.yaml)
345 | * [OWASP MASVS](https://github.com/ajinabraham/libsast/blob/master/libsast/standards/owasp_masvs.yaml)
346 | * [CWE](https://github.com/ajinabraham/libsast/blob/master/libsast/standards/cwe.yaml)
347 |
348 | ### Semantic Grep
349 |
350 | Semantic Grep uses [semgrep](https://github.com/returntocorp/semgrep), a fast and syntax-aware semantic code pattern search for many languages: like grep but for code.
351 |
352 | Currently it supports Python, Java, JavaScript, Go and C.
353 |
354 | Use [semgrep.dev](https://semgrep.dev/vAb) to write semantic grep rule patterns.
355 |
356 | A sample rule for Python code looks like
357 |
358 | ```yaml
359 | rules:
360 | - id: boto-client-ip
361 | patterns:
362 | - pattern-inside: boto3.client(host="...")
363 | - pattern-regex: '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
364 | message: "boto client using IP address"
365 | languages: [python]
366 | severity: ERROR
367 | metadata:
368 | owasp-web: a2
369 | owasp-mobile: m7
370 | cwe: cwe-1048
371 | foo: Some extra metadata
372 | ```
373 |
374 | See semgrep documentation [here](https://semgrep.dev/docs/writing-rules/rule-syntax/).
375 |
376 | Example: [Semantic Grep Rule](https://github.com/ajinabraham/libsast/blob/master/tests/assets/rules/semantic_grep/sgrep.yaml)
377 |
378 | Test your semgrep rules
379 |
380 | `$ libsast -s tests/assets/rules/semantic_grep/sgrep.yaml tests/assets/files/`
381 |
382 | ## Realworld Implementations
383 |
384 | * [njsscan](https://github.com/ajinabraham/njsscan) SAST is built with libsast pattern matcher and semantic grep.
385 | * [nodejsscan](https://github.com/ajinabraham/nodejsscan) nodejsscan is a static security code scanner for Node.js applications.
386 | * [MobSF](https://mobsf.github.io/Mobile-Security-Framework-MobSF/) Static Code Analyzer for Android and iOS mobile applications.
387 | * [mobsfscan](https://github.com/MobSF/mobsfscan) mobsfscan is a static security code scanner for Mobile applications built for Android (Java, Kotlin) & iOS (Swift, Objective C).
388 |
--------------------------------------------------------------------------------
/libsast/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf_8 -*-
3 | from datetime import datetime
4 |
5 | from .core_matcher.pattern_matcher import PatternMatcher
6 | from .core_matcher.choice_matcher import ChoiceMatcher
7 | from .core_sgrep.semantic_sgrep import SemanticGrep
8 | from .scanner import Scanner
9 |
10 |
11 | year = str(datetime.now().year)
12 | __title__ = 'libsast'
13 | __authors__ = 'Ajin Abraham'
14 | __copyright__ = f'Copyright {year} Ajin Abraham, opensecurity.in'
15 | __version__ = '3.1.6'
16 | __version_info__ = tuple(int(i) for i in __version__.split('.'))
17 | __all__ = [
18 | 'Scanner',
19 | 'ChoiceMatcher',
20 | 'PatternMatcher',
21 | 'SemanticGrep',
22 | '__title__',
23 | '__authors__',
24 | '__copyright__',
25 | '__version__',
26 | '__version_info__',
27 | ]
28 |
--------------------------------------------------------------------------------
/libsast/__main__.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding: utf_8 -*-
3 | """libsast cli."""
4 | import argparse
5 | import json
6 | import sys
7 |
8 | from libsast import __version__
9 | from libsast.scanner import Scanner
10 |
11 |
12 | def output(out, scan_results):
13 | """Output."""
14 | if out:
15 | with open(out, 'w') as outfile:
16 | json.dump(scan_results,
17 | outfile,
18 | sort_keys=True,
19 | indent=2,
20 | separators=(',', ': '))
21 | else:
22 | if scan_results:
23 | print(json.dumps(scan_results,
24 | sort_keys=True,
25 | indent=2,
26 | separators=(',', ': ')))
27 | if scan_results:
28 | sgrep_out = scan_results.get('semantic_grep', {}).get('matches')
29 | matcher_out = scan_results.get('pattern_matcher')
30 | if sgrep_out or matcher_out:
31 | sys.exit(1)
32 | sys.exit(0)
33 |
34 |
35 | def main():
36 | """Main CLI."""
37 | parser = argparse.ArgumentParser()
38 | parser.add_argument('path',
39 | nargs='*',
40 | help=('Path can be file(s) or '
41 | 'directories'))
42 | parser.add_argument('-o', '--output',
43 | help='Output filename to save JSON report.',
44 | required=False)
45 | parser.add_argument('-p', '--pattern-file',
46 | help='YAML pattern file, directory or url',
47 | required=False)
48 | parser.add_argument('-s', '--sgrep-pattern-file',
49 | help='sgrep rules directory',
50 | required=False)
51 | parser.add_argument('--sgrep-file-extensions',
52 | nargs='+',
53 | help=('File extensions that should be scanned'
54 | ' with semantic grep'),
55 | required=False)
56 | parser.add_argument('--file-extensions',
57 | nargs='+',
58 | help=('File extensions that should be scanned'
59 | ' with pattern matcher'),
60 | required=False)
61 | parser.add_argument('--ignore-filenames',
62 | nargs='+',
63 | help='File name(s) to ignore',
64 | required=False)
65 | parser.add_argument('--ignore-extensions',
66 | nargs='+',
67 | help='File extension(s) to ignore in lower case',
68 | required=False)
69 | parser.add_argument('--ignore-paths',
70 | nargs='+',
71 | help='Path(s) to ignore',
72 | required=False)
73 | parser.add_argument('--show-progress',
74 | help='Show scan progress',
75 | required=False,
76 | action='store_true')
77 | parser.add_argument('--cpu-core',
78 | help='No of CPU cores to use. Use all cores by default',
79 | type=int,
80 | required=False)
81 | parser.add_argument('-mp', '--multiprocessing',
82 | help=('Multiprocessing strategy to use.'
83 | ' Options: default, thread, billiard'),
84 | default='default',
85 | type=str,
86 | required=False)
87 | parser.add_argument('-v', '--version',
88 | help='Show libsast version',
89 | required=False,
90 | action='store_true')
91 | args = parser.parse_args()
92 | if args.path and (args.pattern_file or args.sgrep_pattern_file):
93 | options = {
94 | 'sgrep_rules': args.sgrep_pattern_file,
95 | 'sgrep_extensions': args.sgrep_file_extensions,
96 | 'match_rules': args.pattern_file,
97 | 'match_extensions': args.file_extensions,
98 | 'ignore_filenames': args.ignore_filenames,
99 | 'ignore_extensions': args.ignore_extensions,
100 | 'ignore_paths': args.ignore_paths,
101 | 'show_progress': args.show_progress,
102 | 'cpu_core': args.cpu_core,
103 | 'multiprocessing': args.multiprocessing,
104 | }
105 | result = Scanner(options, args.path).scan()
106 | output(args.output, result)
107 | elif args.version:
108 | print(f'libsast v{__version__}')
109 | else:
110 | parser.print_help()
111 |
112 |
113 | if __name__ == '__main__':
114 | main()
115 |
--------------------------------------------------------------------------------
/libsast/common.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Common Helpers."""
3 | import os
4 | import sys
5 | from threading import Thread
6 |
7 | from libsast.exceptions import (
8 | YamlRuleLoadError,
9 | YamlRuleParseError,
10 | )
11 |
12 | import yaml
13 |
14 |
15 | class ProgressBar:
16 | def __init__(self, prefix, expected_time, size=60, output=sys.stderr):
17 | self.prefix = prefix
18 | self.expected_time = expected_time
19 | self.size = size
20 | self.output = output
21 |
22 | def progress_print(self, index):
23 | """Print progress bar."""
24 | prog_length = int(self.size * index / self.expected_time)
25 | prog = '█' * prog_length
26 | self.output.write(f'- {self.prefix} {prog} {index}\r')
27 | self.output.flush()
28 |
29 | def progress_loop(self, iterator):
30 | """Show progress for loop."""
31 | self.progress_print(0)
32 | for index, item in enumerate(iterator):
33 | yield item
34 | self.progress_print(index + 1)
35 | self.output.write('\n')
36 | self.output.flush()
37 |
38 | def progress_function(self, function, args=None, kwargs=None):
39 | """Show progress for function."""
40 | ret = [None]
41 | index = 0
42 | # Hack determined by size of rule files
43 | self.expected_time = self.expected_time * 350
44 |
45 | def myrunner(function, ret, *args, **kwargs):
46 | ret[0] = function(*args, **kwargs)
47 | self.progress_print(0)
48 | thread = Thread(
49 | target=myrunner,
50 | args=(function, ret) + tuple(args),
51 | kwargs=kwargs)
52 | thread.start()
53 | while thread.is_alive():
54 | thread.join(timeout=0.2)
55 | index += 1
56 | self.progress_print(index)
57 | self.output.write('\n')
58 | self.output.flush()
59 | return ret[0]
60 |
61 |
62 | def read_yaml(file_obj, text=False):
63 | try:
64 | if text:
65 | return yaml.safe_load(file_obj)
66 | return yaml.safe_load(file_obj.read_text('utf-8', 'ignore'))
67 | except yaml.YAMLError as exp:
68 | raise YamlRuleParseError(
69 | f'YAML Parse Error: {repr(exp)}')
70 | except Exception as gen:
71 | raise YamlRuleLoadError(
72 | f'Failed to load YAML file: {repr(gen)}')
73 |
74 |
75 | def get_worker_count():
76 | """Get worker count for pool."""
77 | libsast_workers = os.getenv('LIBSAST_WORKERS')
78 | if libsast_workers:
79 | try:
80 | return int(libsast_workers)
81 | except ValueError:
82 | return 1
83 | try:
84 | worker_count = os.cpu_count()
85 | except Exception:
86 | worker_count = 1
87 |
88 | # Adjust worker count for Windows
89 | if sys.platform == 'win32':
90 | worker_count = min(worker_count, 61)
91 | return worker_count
92 |
--------------------------------------------------------------------------------
/libsast/core_matcher/__init__.py:
--------------------------------------------------------------------------------
1 | """Core Matcher."""
2 |
--------------------------------------------------------------------------------
/libsast/core_matcher/choice_matcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Choice Matcher."""
3 | import re
4 | from pathlib import Path
5 | from concurrent.futures import (
6 | ProcessPoolExecutor,
7 | ThreadPoolExecutor,
8 | )
9 | from functools import lru_cache
10 |
11 | from libsast.core_matcher.helpers import (
12 | get_rules,
13 | is_file_valid,
14 | strip_comments,
15 | )
16 | from libsast import (
17 | common,
18 | exceptions,
19 | )
20 |
21 |
22 | class ChoiceMatcher:
23 | def __init__(self, options: dict) -> None:
24 | self.scan_rules = get_rules(options.get('choice_rules'))
25 | self.show_progress = options.get('show_progress')
26 | self.cpu = options.get('cpu_core')
27 | self.multiprocessing = options.get('multiprocessing')
28 | self.alternative_path = options.get('alternative_path')
29 | exts = options.get('choice_extensions')
30 | self.exts = [ext.lower() for ext in exts] if exts else []
31 | self.findings = {}
32 |
33 | def scan(self, paths: list) -> dict:
34 | """Scan file(s) or directory per rule."""
35 | if self.show_progress:
36 | pbar = common.ProgressBar('Choice Match', len(self.scan_rules))
37 | self.scan_rules = pbar.progress_loop(self.scan_rules)
38 |
39 | file_contents = self.read_file_contents(paths)
40 | return self.regex_scan(file_contents)
41 |
42 | def read_file_contents(self, paths: list) -> list:
43 | """Load file(s) content."""
44 | if not paths:
45 | return []
46 |
47 | choice_args = []
48 | for rule in self.scan_rules:
49 | scan_paths = paths
50 | if rule['type'] != 'code' and self.alternative_path:
51 | # Scan only alternative path
52 | scan_paths = [Path(self.alternative_path)]
53 | choice_args.append((scan_paths, rule))
54 | if not choice_args:
55 | return []
56 |
57 | # Use ThreadPoolExecutor for file reading
58 | with ThreadPoolExecutor() as io_executor:
59 | # Submit file reading tasks and wait for results
60 | futures = []
61 | for args_tuple in choice_args:
62 | future = io_executor.submit(
63 | self._read_file_contents, args_tuple)
64 | futures.append(future)
65 | return [future.result() for future in futures]
66 |
67 | def regex_scan(self, file_contents: list, rules=None) -> dict:
68 | """Process regex matches on the file contents."""
69 | if rules:
70 | self.scan_rules = get_rules(rules)
71 | if not (self.scan_rules and file_contents):
72 | return {}
73 | self.validate_rules()
74 |
75 | if self.multiprocessing == 'billiard':
76 | # Use billiard's pool for regex (support queues)
77 | from billiard import Pool
78 | with Pool(processes=self.cpu) as pool:
79 | # Run regex on file data
80 | results = pool.map(
81 | self.choice_matcher,
82 | file_contents)
83 | elif self.multiprocessing == 'thread':
84 | # Use a ThreadPool for regex check
85 | with ThreadPoolExecutor() as io_executor:
86 | results = list(io_executor.map(
87 | self.choice_matcher,
88 | file_contents))
89 | else:
90 | # Use ProcessPoolExecutor for regex processing
91 | with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
92 | results = list(cpu_executor.map(
93 | self.choice_matcher,
94 | file_contents))
95 |
96 | self.add_finding(results)
97 | return self.findings
98 |
99 | def validate_rules(self):
100 | """Validate Rules before scanning."""
101 | for rule in self.scan_rules:
102 | if not isinstance(rule, dict):
103 | raise exceptions.InvalidRuleFormatError(
104 | 'Choice Matcher Rule format is invalid.')
105 | required_keys = [
106 | 'id',
107 | 'type',
108 | 'choice_type',
109 | 'selection',
110 | 'choice']
111 | for key in required_keys:
112 | if not rule.get(key):
113 | raise exceptions.PatternKeyMissingError(
114 | f'The rule is missing the key "{key}"')
115 |
116 | def _read_file_contents(self, args_tuple):
117 | """Read file contents for the given paths and rule."""
118 | scan_paths, rule = args_tuple
119 | results = []
120 | for sfile in scan_paths:
121 | if not is_file_valid(sfile, self.exts, 5):
122 | continue
123 | try:
124 | data = self._format_content(
125 | sfile.read_text('utf-8', 'ignore'),
126 | sfile.suffix.lower())
127 | results.append((data, rule))
128 | except Exception as e:
129 | raise exceptions.RuleProcessingError(
130 | 'Error reading file: {}'.format(sfile)) from e
131 | return results
132 |
133 | def find_choices(self, data, rule):
134 | """Find Choices."""
135 | all_matches = set()
136 | for idx, choice in enumerate(rule['choice']):
137 | typ = rule['choice_type']
138 | if typ == 'and':
139 | # Return on first and choice
140 | if all(re.compile(and_regx).search(data) for and_regx in choice[0]):
141 | return (all_matches, [idx])
142 | elif re.compile(choice[0]).search(data):
143 | if typ == 'or':
144 | # Return on first or choice
145 | return (all_matches, [idx])
146 | elif typ == 'all':
147 | # Extract all choice(s) from all files
148 | all_matches.add(choice[1])
149 | return (all_matches, None) # None means no matches found.
150 |
151 | def choice_matcher(self, file_contents):
152 | """Process regex matches on the file contents."""
153 | results = []
154 | for data, rule in file_contents:
155 | match = self.find_choices(data, rule)
156 | results.append({
157 | 'rule': rule,
158 | 'matches': match[1] if isinstance(match, tuple) else set(),
159 | 'all_matches': match[0] if isinstance(match, tuple) else match,
160 | })
161 | return results
162 |
163 | @staticmethod
164 | @lru_cache(maxsize=128)
165 | def _format_content(data, file_suffix):
166 | return strip_comments(data, file_suffix)
167 |
168 | def add_finding(self, results):
169 | """Add Choice Findings and generate metadata."""
170 | for res_list in results:
171 | if not res_list:
172 | continue
173 | for match_dict in res_list:
174 | rule = match_dict['rule']
175 | all_matches = match_dict['all_matches']
176 | matches = match_dict['matches']
177 |
178 | # Determine selection string
179 | if all_matches:
180 | selection = rule['selection'].format(list(all_matches))
181 | elif matches:
182 | select = rule['choice'][min(matches)][1]
183 | selection = rule['selection'].format(select)
184 | else:
185 | selection = rule['selection'].format(rule.get('else', ''))
186 |
187 | # Create metadata dictionary
188 | meta_dict = {
189 | 'choice': selection,
190 | 'description': rule['message'],
191 | **{key: rule[key] for key in rule if key not in {
192 | 'choice',
193 | 'message',
194 | 'id',
195 | 'type',
196 | 'choice_type',
197 | 'selection',
198 | 'else'}}}
199 |
200 | self.findings[rule['id']] = meta_dict
201 |
--------------------------------------------------------------------------------
/libsast/core_matcher/helpers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Helper Functions."""
3 | import re
4 | from pathlib import Path
5 |
6 | from libsast.exceptions import (
7 | InvalidRuleError,
8 | MissingRuleError,
9 | RuleDownloadError,
10 | )
11 | from libsast.common import read_yaml
12 | from libsast.standards import get_mapping
13 |
14 | import requests
15 |
16 | # Default Single Line
17 | DEF_SINGLE = re.compile(r'//.+', re.MULTILINE)
18 | DEF_MULTI = re.compile(r'/\*([\S|\s]+?)\*/', re.MULTILINE)
19 | XML_CMT = re.compile(r'', re.MULTILINE)
20 |
21 |
22 | def download_rule(url):
23 | """Download Pattern File."""
24 | try:
25 | with requests.get(
26 | url,
27 | allow_redirects=True,
28 | timeout=10) as r:
29 | r.raise_for_status()
30 | return r.text
31 | except requests.exceptions.RequestException:
32 | raise RuleDownloadError(f'Failed to download from: {url}')
33 |
34 |
35 | def get_rules(rule_loc): # noqa: R701
36 | """Get pattern matcher rules."""
37 | if not rule_loc:
38 | raise MissingRuleError('Rule location is missing.')
39 | if rule_loc.startswith(('http://', 'https://')):
40 | pat = download_rule(rule_loc)
41 | if not pat:
42 | return
43 | rules = read_yaml(pat, True)
44 | if not rules:
45 | return
46 | return get_mapping(rules)
47 | rule = Path(rule_loc)
48 | if rule.is_file() and rule.exists():
49 | rules = read_yaml(rule)
50 | if not rules:
51 | return
52 | return get_mapping(rules)
53 | elif rule.is_dir() and rule.exists():
54 | patterns = []
55 | for yfile in rule.glob('**/*.yaml'):
56 | rules = read_yaml(yfile)
57 | if rules:
58 | rules = get_mapping(rules)
59 | patterns.extend(rules)
60 | return patterns
61 | else:
62 | raise InvalidRuleError('This path is invalid')
63 |
64 |
65 | def comment_replacer(matches, data):
66 | """Replace Comments from data."""
67 | to_replace = set()
68 | repl_regex = re.compile(r'\S', re.MULTILINE)
69 | for match in matches:
70 | if match.group():
71 | stripm = match.group().strip()
72 | if stripm == '//':
73 | # ignore comment starters
74 | continue
75 | if ':' + stripm in data:
76 | # possible URLs http://, do not strip
77 | continue
78 | if 'ignore:' in data:
79 | # preserve ignore tags
80 | continue
81 | to_replace.add(match.group())
82 | for itm in to_replace:
83 | dummy = repl_regex.sub(' ', itm)
84 | data = data.replace(itm, dummy)
85 | return data
86 |
87 |
88 | def strip_comments1(data):
89 | """Remove Comments.
90 |
91 | Replace multiline comments first and
92 | then replace single line comments.
93 | """
94 | single_line = DEF_SINGLE
95 | multi_line = DEF_MULTI
96 | mmatches = multi_line.finditer(data)
97 | data = comment_replacer(mmatches, data)
98 | smatches = single_line.finditer(data)
99 | data = comment_replacer(smatches, data)
100 | return data
101 |
102 |
103 | def strip_comments2(data):
104 | """Remove Comments 2.
105 |
106 | Replace comments for HTML/XML
107 | """
108 | multi_line = XML_CMT
109 | mmatches = multi_line.finditer(data)
110 | data = comment_replacer(mmatches, data)
111 | return data
112 |
113 |
114 | def get_match_lines(content, pos):
115 | """Get Match lines from position."""
116 | start_line = 0
117 | filepos = 0
118 | skip = False
119 | for idx, line in enumerate(content.split('\n'), 1):
120 | filepos += len(line) + 1
121 | if filepos >= pos[0] and filepos >= pos[1] and not skip:
122 | # Match is on the same line
123 | return (idx, idx)
124 | elif filepos >= pos[0] and not skip:
125 | # Multiline match, find start line
126 | skip = True
127 | start_line = idx
128 | if filepos >= pos[1] and skip:
129 | # Multiline march, find end line
130 | return (start_line, idx)
131 |
132 |
133 | def is_file_valid(sfile, allowed_extensions=None, max_size_mb=5):
134 | """Check if the file is valid based on its extension and size.
135 |
136 | Args:
137 | sfile (Path): The file to check.
138 | allowed_extensions (set): A set of allowed file extensions.
139 | max_size_mb (int): The maximum file size in MB.
140 |
141 | Returns:
142 | bool: True if the file is valid, False otherwise.
143 | """
144 | # Get the file extension in lowercase
145 | ext = sfile.suffix.lower()
146 |
147 | # Check if the file extension is allowed
148 | if allowed_extensions and ext not in allowed_extensions:
149 | return False
150 |
151 | # Check if the file size exceeds the maximum limit (in bytes)
152 | max_size_bytes = max_size_mb * 1024 * 1024 # Convert MB to bytes
153 | if sfile.stat().st_size > max_size_bytes:
154 | return False
155 |
156 | return True
157 |
158 |
159 | def strip_comments(data, ext):
160 | """Format content by stripping comments based on file type."""
161 | if ext in ('.html', '.xml'):
162 | return strip_comments2(data)
163 | return strip_comments1(data)
164 |
--------------------------------------------------------------------------------
/libsast/core_matcher/matchers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Optimized Algorithms for Pattern Matching with Caching."""
3 | import re
4 | from abc import ABC, abstractmethod
5 | from functools import lru_cache
6 |
7 | from libsast.core_matcher.helpers import get_match_lines
8 |
9 |
10 | def get_pos(match):
11 | """Adjust zero index in match span."""
12 | start, end = match.span()
13 | return (1 if start == 0 else start, end)
14 |
15 |
16 | # Cache compiled regex patterns
17 | @lru_cache(maxsize=256)
18 | def get_compiled_pattern(pattern):
19 | """Compile and cache regex patterns."""
20 | return re.compile(pattern)
21 |
22 |
23 | class MatchCommand:
24 | def __init__(self):
25 | self.patterns = {}
26 | # Dictionary to map pattern names to their corresponding classes
27 | self.available_patterns = {
28 | 'Regex': Regex,
29 | 'RegexAnd': RegexAnd,
30 | 'RegexOr': RegexOr,
31 | 'RegexAndNot': RegexAndNot,
32 | 'RegexAndOr': RegexAndOr,
33 | }
34 |
35 | def _find_match(self, pattern_name, content, rule):
36 | pattern_class = self.patterns.get(
37 | pattern_name) or self._get_pattern_class(pattern_name)
38 | self.patterns.setdefault(pattern_name, pattern_class)
39 |
40 | # Apply case transformation if specified in the rule
41 | case = rule.get('input_case')
42 | if case == 'lower':
43 | content = content.lower()
44 | elif case == 'upper':
45 | content = content.upper()
46 |
47 | # Perform search
48 | return pattern_class._perform_search(content, rule)
49 |
50 | def _get_pattern_class(self, pattern_name):
51 | """Get pattern class from the available patterns dictionary."""
52 | if pattern_name in self.available_patterns:
53 | return self.available_patterns[pattern_name]()
54 | raise ValueError(f"Pattern '{pattern_name}' is not recognized.")
55 |
56 |
57 | class MatchStrategy(ABC):
58 | @abstractmethod
59 | def _perform_search(self, content, rule):
60 | """Search for instances of a pattern match in content."""
61 |
62 |
63 | class Regex(MatchStrategy):
64 | def _perform_search(self, content, rule):
65 | pattern = get_compiled_pattern(rule['pattern'])
66 | return self._find_matches(content, pattern)
67 |
68 | @staticmethod
69 | def _find_matches(content, pattern):
70 | """Helper to find all matches in content and extract details."""
71 | matches = set()
72 | for match in pattern.finditer(content):
73 | if match.group():
74 | match_pos = get_pos(match)
75 | match_lines = get_match_lines(content, match_pos)
76 | matches.add((match.group(), match_pos, match_lines))
77 | return matches
78 |
79 |
80 | class RegexAnd(MatchStrategy):
81 | def _perform_search(self, content, rule):
82 | patterns = rule['pattern'] if isinstance(
83 | rule['pattern'], list) else [rule['pattern']]
84 | all_matches = set()
85 | for regex in patterns:
86 | pattern = get_compiled_pattern(regex)
87 | matches = Regex._find_matches(content, pattern)
88 | if not matches:
89 | return False
90 | all_matches.update(matches)
91 | return all_matches
92 |
93 |
94 | class RegexOr(MatchStrategy):
95 | def _perform_search(self, content, rule):
96 | patterns = rule['pattern'] if isinstance(
97 | rule['pattern'], list) else [rule['pattern']]
98 | matches = set()
99 | for regex in patterns:
100 | pattern = get_compiled_pattern(regex)
101 | matches.update(Regex._find_matches(content, pattern))
102 | return matches
103 |
104 |
105 | class RegexAndNot(MatchStrategy):
106 | def _perform_search(self, content, rule):
107 | present_pattern, not_pattern = rule['pattern']
108 | not_found = get_compiled_pattern(not_pattern).search(content) is None
109 | if not not_found:
110 | return False
111 | present_matches = Regex._find_matches(
112 | content, get_compiled_pattern(present_pattern))
113 | return present_matches if present_matches else False
114 |
115 |
116 | class RegexAndOr(MatchStrategy):
117 | def _perform_search(self, content, rule):
118 | and_pattern, or_patterns = rule['pattern']
119 | and_matches = Regex._find_matches(content, get_compiled_pattern(and_pattern))
120 | or_matches = set()
121 | for regex in or_patterns:
122 | or_pattern = get_compiled_pattern(regex)
123 | or_matches = Regex._find_matches(content, or_pattern)
124 | if or_matches:
125 | break
126 | if and_matches and or_matches:
127 | return and_matches.union(or_matches)
128 | return False
129 |
--------------------------------------------------------------------------------
/libsast/core_matcher/pattern_matcher.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Pattern Matcher."""
3 | from operator import itemgetter
4 | from concurrent.futures import (
5 | ProcessPoolExecutor,
6 | ThreadPoolExecutor,
7 | )
8 | from functools import lru_cache
9 |
10 | from libsast.core_matcher.helpers import (
11 | get_rules,
12 | is_file_valid,
13 | strip_comments,
14 | )
15 | from libsast.core_matcher import matchers
16 | from libsast import (
17 | common,
18 | exceptions,
19 | )
20 |
21 |
22 | class PatternMatcher:
23 | def __init__(self, options: dict) -> None:
24 | self.matcher = matchers.MatchCommand()
25 | self.scan_rules = get_rules(options.get('match_rules'))
26 | self.show_progress = options.get('show_progress')
27 | self.cpu = options.get('cpu_core')
28 | self.multiprocessing = options.get('multiprocessing')
29 | exts = options.get('match_extensions')
30 | self.exts = [ext.lower() for ext in exts] if exts else []
31 | self.findings = {}
32 |
33 | def scan(self, paths: list) -> dict:
34 | """Scan file(s) or directory."""
35 | if self.show_progress:
36 | pbar = common.ProgressBar('Pattern Match', len(paths))
37 | paths = pbar.progress_loop(paths)
38 |
39 | file_contents = self.read_file_contents(paths)
40 | return self.regex_scan(file_contents)
41 |
42 | def read_file_contents(self, paths: list) -> list:
43 | """Load file(s) content."""
44 | if not paths:
45 | return []
46 |
47 | # Filter files by extension and size, prepare list for processing
48 | files_to_scan = {
49 | sfile for sfile in paths
50 | if is_file_valid(sfile, self.exts, 5)
51 | }
52 | if not files_to_scan:
53 | return []
54 |
55 | # Use a ThreadPool for file reading
56 | with ThreadPoolExecutor() as io_executor:
57 |
58 | # Read all files
59 | file_contents = list(io_executor.map(
60 | self._read_file_content, files_to_scan))
61 | return file_contents
62 |
63 | def regex_scan(self, file_contents: list, rules=None) -> dict:
64 | """Scan file(s) content."""
65 | if rules:
66 | self.scan_rules = get_rules(rules)
67 | if not (self.scan_rules and file_contents):
68 | return {}
69 | self.validate_rules()
70 |
71 | if self.multiprocessing == 'billiard':
72 | # Use billiard's pool for CPU-bound regex (support queues)
73 | from billiard import Pool
74 | with Pool(processes=self.cpu) as cpu_executor:
75 | # Run regex on file data
76 | results = cpu_executor.map(
77 | self.pattern_matcher,
78 | file_contents,
79 | )
80 | elif self.multiprocessing == 'thread':
81 | # Use a ThreadPool for regex check
82 | with ThreadPoolExecutor() as io_executor:
83 | # Run regex on file data
84 | results = io_executor.map(
85 | self.pattern_matcher,
86 | file_contents,
87 | )
88 | else:
89 | # Use a ProcessPool for CPU-bound regex
90 | with ProcessPoolExecutor(max_workers=self.cpu) as cpu_executor:
91 | # Run regex on file data
92 | results = cpu_executor.map(
93 | self.pattern_matcher,
94 | file_contents,
95 | )
96 |
97 | # Compile findings
98 | self.add_finding(results)
99 | return self.findings
100 |
101 | def validate_rules(self):
102 | """Validate Rules before scanning."""
103 | available_matchers = {m for m in dir(matchers) if m.startswith('R')}
104 | required_keys = ['type', 'pattern']
105 |
106 | for rule in self.scan_rules:
107 | if not isinstance(rule, dict):
108 | raise exceptions.InvalidRuleFormatError(
109 | 'Pattern Matcher Rule format is invalid.')
110 |
111 | # Check for missing required keys
112 | missing_keys = [key for key in required_keys if key not in rule]
113 | if missing_keys:
114 | mkeys = ', '.join(missing_keys)
115 | raise exceptions.PatternKeyMissingError(
116 | f'The rule is missing the keys: {mkeys}')
117 |
118 | pattern_name = rule['type']
119 | if pattern_name not in available_matchers:
120 | supported = ', '.join(available_matchers)
121 | raise exceptions.MatcherNotFoundError(
122 | f'Matcher {pattern_name} is not supported.'
123 | f' Available matchers are {supported}.')
124 |
125 | @staticmethod
126 | def _read_file_content(file_path):
127 | """Read file content with encoding handling."""
128 | try:
129 | return file_path, file_path.read_text('utf-8', 'ignore')
130 | except Exception as e:
131 | print(f'Error reading {file_path}: {e}')
132 | return file_path, ''
133 |
134 | def pattern_matcher(self, file_data):
135 | """Static Analysis Pattern Matcher."""
136 | file_path, data = file_data
137 | results = []
138 | try:
139 | fmt_data = self._format_content(data, file_path.suffix.lower())
140 | for rule in self.scan_rules:
141 | matches = self.matcher._find_match(rule['type'], fmt_data, rule)
142 | if matches:
143 | results.append({
144 | 'file': file_path.as_posix(),
145 | 'rule': rule,
146 | 'matches': matches,
147 | })
148 | except Exception as e:
149 | msg = f'Error processing rule for {file_path}: {e}'
150 | raise exceptions.RuleProcessingError(msg)
151 | return results
152 |
153 | @staticmethod
154 | @lru_cache(maxsize=128)
155 | def _format_content(data, file_suffix):
156 | return strip_comments(data, file_suffix)
157 |
158 | def add_finding(self, results):
159 | """Add Code Analysis Findings."""
160 | for res_list in results:
161 | if not res_list:
162 | continue
163 | for match_dict in res_list:
164 | rule = match_dict['rule']
165 | rule_id = rule['id']
166 | files = self.findings.setdefault(
167 | rule_id, {'files': [], 'metadata': {}})['files']
168 |
169 | for match in match_dict['matches']:
170 | file_details = {
171 | 'file_path': match_dict['file'],
172 | 'match_string': match[0],
173 | 'match_position': match[1],
174 | 'match_lines': match[2],
175 | }
176 | files.append(file_details)
177 |
178 | if not self.findings[rule_id]['metadata']:
179 | meta = rule.get('metadata', {})
180 | # message & severity are mandatory
181 | meta['description'] = rule['message']
182 | meta['severity'] = rule['severity']
183 | self.findings[rule_id]['metadata'] = meta
184 |
185 | # Sort files by specified criteria
186 | self.findings[rule_id]['files'].sort(
187 | key=itemgetter('file_path', 'match_string', 'match_lines'))
188 |
--------------------------------------------------------------------------------
/libsast/core_sgrep/__init__.py:
--------------------------------------------------------------------------------
1 | """Core Semantic Grep."""
2 |
--------------------------------------------------------------------------------
/libsast/core_sgrep/helpers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Semantic Grep Helpers."""
3 | import json
4 | import platform
5 | import subprocess
6 |
7 |
8 | def invoke_semgrep(paths, scan_rules):
9 | if platform.system() == 'Windows':
10 | return None
11 | ps = [pt.as_posix() for pt in paths]
12 | command = [
13 | 'semgrep',
14 | '--metrics=off',
15 | '--no-rewrite-rule-ids',
16 | '--json',
17 | '-q',
18 | '--config',
19 | scan_rules,
20 | *ps,
21 | ]
22 | try:
23 | result = subprocess.run(command, capture_output=True, text=True, check=True)
24 | return json.loads(result.stdout)
25 | except subprocess.CalledProcessError as e:
26 | try:
27 | return json.loads(e.output)
28 | except json.JSONDecodeError:
29 | return {'errors': e.output}
30 |
--------------------------------------------------------------------------------
/libsast/core_sgrep/semantic_sgrep.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Semantic Grep Core."""
3 | from libsast.core_sgrep.helpers import invoke_semgrep
4 | from libsast import (
5 | common,
6 | standards,
7 | )
8 |
9 |
10 | class SemanticGrep:
11 | def __init__(self, options: dict) -> None:
12 | self.scan_rules = options.get('sgrep_rules')
13 | self.show_progress = options.get('show_progress')
14 | exts = options.get('sgrep_extensions')
15 | if exts:
16 | self.exts = [ext.lower() for ext in exts]
17 | else:
18 | self.exts = []
19 | self.findings = {
20 | 'matches': {},
21 | 'errors': [],
22 | }
23 | self.standards = standards.get_standards()
24 |
25 | def scan(self, paths: list) -> dict:
26 | """Do sgrep scan."""
27 | if self.exts:
28 | filtered = []
29 | for sfile in paths:
30 | if sfile.suffix.lower() in self.exts:
31 | filtered.append(sfile)
32 | if filtered:
33 | paths = filtered
34 | if self.show_progress:
35 | pbar = common.ProgressBar('Semantic Grep', len(paths))
36 | sgrep_out = pbar.progress_function(
37 | invoke_semgrep,
38 | (paths, self.scan_rules))
39 | else:
40 | sgrep_out = invoke_semgrep(paths, self.scan_rules)
41 | self.format_output(sgrep_out)
42 | return self.findings
43 |
44 | def format_output(self, results):
45 | """Format sgrep results."""
46 | errs = results.get('errors')
47 | if errs:
48 | self.findings['errors'] = errs
49 | if not results.get('results'):
50 | return
51 | smatches = self.findings['matches']
52 | for find in results['results']:
53 | file_details = {
54 | 'file_path': find['path'],
55 | 'match_position': (find['start']['col'], find['end']['col']),
56 | 'match_lines': (find['start']['line'], find['end']['line']),
57 | 'match_string': find['extra']['lines'],
58 | }
59 | rule_id = find['check_id']
60 | if rule_id in smatches:
61 | smatches[rule_id]['files'].append(file_details)
62 | else:
63 | metadata = find['extra']['metadata']
64 | metadata['description'] = find['extra']['message']
65 | metadata['severity'] = find['extra']['severity']
66 | smatches[rule_id] = {
67 | 'files': [file_details],
68 | 'metadata': metadata,
69 | }
70 | self.expand_mappings(smatches[rule_id])
71 |
72 | def expand_mappings(self, meta):
73 | """Expand libsast standard mappings."""
74 | meta_keys = meta['metadata'].keys()
75 | for mkey in meta_keys:
76 | if mkey not in self.standards.keys():
77 | continue
78 | to_expand = meta['metadata'][mkey]
79 | expanded = self.standards[mkey].get(to_expand)
80 | if expanded:
81 | meta['metadata'][mkey] = expanded
82 |
--------------------------------------------------------------------------------
/libsast/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """The libsast Exceptions."""
3 |
4 |
5 | class LibsastError(Exception):
6 | """Base class for all exceptions thrown by libsast."""
7 |
8 | def __init__(self, message=None):
9 | super().__init__(message)
10 |
11 |
12 | class InvalidPathError(LibsastError):
13 | """Invalid Path Supplied to libsast."""
14 |
15 | pass
16 |
17 |
18 | class YamlRuleParseError(LibsastError):
19 | """Failed to parse YAML rule."""
20 |
21 | pass
22 |
23 |
24 | class YamlRuleLoadError(LibsastError):
25 | """Failed to load YAML rule file."""
26 |
27 | pass
28 |
29 |
30 | class MissingRuleError(LibsastError):
31 | """Rule not provided."""
32 |
33 | pass
34 |
35 |
36 | class InvalidRuleError(LibsastError):
37 | """No rule directory, file or url specified."""
38 |
39 | pass
40 |
41 |
42 | class TypeKeyMissingError(LibsastError):
43 | """Pattern Matcher rule does not have the key 'type'."""
44 |
45 | pass
46 |
47 |
48 | class InvalidRuleFormatError(LibsastError):
49 | """Pattern Matcher rule file is invalid."""
50 |
51 | pass
52 |
53 |
54 | class PatternKeyMissingError(LibsastError):
55 | """Pattern Matcher rule does not have the key 'pattern'."""
56 |
57 | pass
58 |
59 |
60 | class RuleDownloadError(LibsastError):
61 | """Failed to download rule."""
62 |
63 | pass
64 |
65 |
66 | class RuleProcessingError(LibsastError):
67 | """Failed to download rule."""
68 |
69 | pass
70 |
71 |
72 | class MatcherNotFoundError(LibsastError):
73 | """Pattern Matcher not found."""
74 |
75 | pass
76 |
--------------------------------------------------------------------------------
/libsast/scanner.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """The libsast Scanner module."""
3 | from pathlib import Path
4 | from fnmatch import fnmatch
5 |
6 | from libsast.common import get_worker_count
7 | from libsast.core_matcher.pattern_matcher import PatternMatcher
8 | from libsast.core_matcher.choice_matcher import ChoiceMatcher
9 | from libsast.core_sgrep.semantic_sgrep import SemanticGrep
10 | from libsast.exceptions import InvalidPathError
11 |
12 |
13 | class Scanner:
14 | def __init__(self, options: dict, paths: list) -> None:
15 | # Set options with default values where missing
16 | self.options = {
17 | 'sgrep_rules': None,
18 | 'sgrep_extensions': None,
19 | 'match_rules': None,
20 | 'match_extensions': None,
21 | 'choice_rules': None,
22 | 'choice_extensions': None,
23 | 'alternative_path': None,
24 | 'ignore_filenames': [],
25 | 'ignore_extensions': [],
26 | 'ignore_paths': [],
27 | 'show_progress': False,
28 | 'cpu_core': 1,
29 | 'multiprocessing': 'default',
30 | # Overwrite with options from invocation
31 | **(options or {}),
32 | }
33 |
34 | # Cache frequently used settings
35 | if options.get('ignore_extensions'):
36 | self.ignore_extensions = set(options.get('ignore_extensions'))
37 | else:
38 | self.ignore_extensions = set()
39 | if options.get('ignore_filenames'):
40 | self.ignore_filenames = set(options.get('ignore_filenames'))
41 | else:
42 | self.ignore_filenames = set()
43 | if options.get('ignore_paths'):
44 | self.ignore_paths = {
45 | Path(p).as_posix() for p in self.options['ignore_paths']}
46 | else:
47 | self.ignore_paths = set()
48 | if options.get('cpu_core') and isinstance(options['cpu_core'], int):
49 | self.options['cpu_core'] = options['cpu_core']
50 | else:
51 | self.options['cpu_core'] = get_worker_count()
52 | self.paths = paths
53 |
54 | def scan(self) -> dict:
55 | """Start Scan."""
56 | results = {}
57 | valid_paths = self.get_scan_files()
58 |
59 | if not valid_paths:
60 | return {}
61 |
62 | if self.options['match_rules']:
63 | results['pattern_matcher'] = PatternMatcher(self.options).scan(valid_paths)
64 | if self.options['choice_rules']:
65 | results['choice_matcher'] = ChoiceMatcher(self.options).scan(valid_paths)
66 | if self.options['sgrep_rules']:
67 | results['semantic_grep'] = SemanticGrep(self.options).scan(valid_paths)
68 |
69 | return results
70 |
71 | def get_scan_files(self):
72 | """Get files valid for scanning."""
73 | if not isinstance(self.paths, list):
74 | raise InvalidPathError('Path should be a list')
75 |
76 | all_files = set()
77 | for path in self.paths:
78 | pobj = Path(path)
79 | if pobj.is_dir():
80 | all_files.update({
81 | pfile
82 | for pfile in pobj.rglob('*')
83 | if self.validate_file(pfile)
84 | })
85 | elif pobj.is_file() and self.validate_file(pobj):
86 | all_files.add(pobj)
87 |
88 | return all_files
89 |
90 | def validate_file(self, path):
91 | """Check if we should scan the file."""
92 | # Early exit if the path is not a file or does not exist
93 | if not path.exists() or not path.is_file():
94 | return False
95 |
96 | # Validate against ignore conditions
97 | ignore_conditions = any([
98 | any(pp in path.as_posix() for pp in self.ignore_paths),
99 | any(fnmatch(path.name, pattern) for pattern in self.ignore_filenames),
100 | path.suffix.lower() in self.ignore_extensions,
101 | ])
102 |
103 | return not ignore_conditions
104 |
--------------------------------------------------------------------------------
/libsast/standards.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf_8 -*-
2 | """Standards Matcher."""
3 | from pathlib import Path
4 |
5 | from libsast.common import (
6 | read_yaml,
7 | )
8 |
9 |
10 | def read_standards(rule_loc):
11 | """Get mappings."""
12 | if rule_loc.is_dir() and rule_loc.exists():
13 | patterns = {}
14 | for yfile in rule_loc.glob('**/*.yaml'):
15 | rule = read_yaml(yfile)
16 | if rule:
17 | patterns.update(rule)
18 | return patterns
19 | return None
20 |
21 |
22 | def get_standards():
23 | """Get inbuilt mappings."""
24 | stds_loc = Path(__file__).parents[0] / 'standards'
25 | return read_standards(stds_loc)
26 |
27 |
28 | def get_mapping(rules):
29 | """Process rule mappings."""
30 | std_map = get_standards()
31 | new_rules = []
32 | for rule in rules:
33 | if not rule.get('metadata'):
34 | new_rules.append(rule)
35 | continue
36 | meta_keys = rule['metadata'].keys()
37 | for mkey in meta_keys:
38 | if mkey not in std_map.keys():
39 | continue
40 | to_expand = rule['metadata'][mkey]
41 | expanded = std_map[mkey].get(to_expand)
42 | if expanded:
43 | rule['metadata'][mkey] = expanded
44 | new_rules.append(rule)
45 | return new_rules
46 |
--------------------------------------------------------------------------------
/libsast/standards/cwe.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | cwe:
3 | cwe-1004: "CWE-1004: Sensitive Cookie Without 'HttpOnly' Flag"
4 | cwe-1007: "CWE-1007: Insufficient Visual Distinction of Homoglyphs Presented to User"
5 | cwe-1021: "CWE-1021: Improper Restriction of Rendered UI Layers or Frames"
6 | cwe-1022: "CWE-1022: Use of Web Link to Untrusted Target with window.opener Access"
7 | cwe-1024: "CWE-1024: Comparison of Incompatible Types"
8 | cwe-1025: "CWE-1025: Comparison Using Wrong Factors"
9 | cwe-1037: "CWE-1037: Processor Optimization Removal or Modification of Security-critical Code"
10 | cwe-1041: "CWE-1041: Use of Redundant Code"
11 | cwe-1043: "CWE-1043: Data Element Aggregating an Excessively Large Number of Non-Primitive Elements"
12 | cwe-1044: "CWE-1044: Architecture with Number of Horizontal Layers Outside of Expected Range"
13 | cwe-1045: "CWE-1045: Parent Class with a Virtual Destructor and a Child Class without a Virtual Destructor"
14 | cwe-1046: "CWE-1046: Creation of Immutable Text Using String Concatenation"
15 | cwe-1047: "CWE-1047: Modules with Circular Dependencies"
16 | cwe-1048: "CWE-1048: Invokable Control Element with Large Number of Outward Calls"
17 | cwe-1049: "CWE-1049: Excessive Data Query Operations in a Large Data Table"
18 | cwe-1050: "CWE-1050: Excessive Platform Resource Consumption within a Loop"
19 | cwe-1051: "CWE-1051: Initialization with Hard-Coded Network Resource Configuration Data"
20 | cwe-1052: "CWE-1052: Excessive Use of Hard-Coded Literals in Initialization"
21 | cwe-1053: "CWE-1053: Missing Documentation for Design"
22 | cwe-1054: "CWE-1054: Invocation of a Control Element at an Unnecessarily Deep Horizontal Layer"
23 | cwe-1055: "CWE-1055: Multiple Inheritance from Concrete Classes"
24 | cwe-1056: "CWE-1056: Invokable Control Element with Variadic Parameters"
25 | cwe-1057: "CWE-1057: Data Access Operations Outside of Expected Data Manager Component"
26 | cwe-1058: "CWE-1058: Invokable Control Element in Multi-Thread Context with non-Final Static Storable or Member Element"
27 | cwe-1060: "CWE-1060: Excessive Number of Inefficient Server-Side Data Accesses"
28 | cwe-1062: "CWE-1062: Parent Class with References to Child Class"
29 | cwe-1063: "CWE-1063: Creation of Class Instance within a Static Code Block"
30 | cwe-1064: "CWE-1064: Invokable Control Element with Signature Containing an Excessive Number of Parameters"
31 | cwe-1065: "CWE-1065: Runtime Resource Management Control Element in a Component Built to Run on Application Servers"
32 | cwe-1066: "CWE-1066: Missing Serialization Control Element"
33 | cwe-1067: "CWE-1067: Excessive Execution of Sequential Searches of Data Resource"
34 | cwe-1068: "CWE-1068: Inconsistency Between Implementation and Documented Design"
35 | cwe-1069: "CWE-1069: Empty Exception Block"
36 | cwe-1070: "CWE-1070: Serializable Data Element Containing non-Serializable Item Elements"
37 | cwe-1071: "CWE-1071: Empty Code Block"
38 | cwe-1072: "CWE-1072: Data Resource Access without Use of Connection Pooling"
39 | cwe-1073: "CWE-1073: Non-SQL Invokable Control Element with Excessive Number of Data Resource Accesses"
40 | cwe-1074: "CWE-1074: Class with Excessively Deep Inheritance"
41 | cwe-1075: "CWE-1075: Unconditional Control Flow Transfer outside of Switch Block"
42 | cwe-1077: "CWE-1077: Floating Point Comparison with Incorrect Operator"
43 | cwe-1079: "CWE-1079: Parent Class without Virtual Destructor Method"
44 | cwe-1080: "CWE-1080: Source Code File with Excessive Number of Lines of Code"
45 | cwe-1082: "CWE-1082: Class Instance Self Destruction Control Element"
46 | cwe-1083: "CWE-1083: Data Access from Outside Expected Data Manager Component"
47 | cwe-1084: "CWE-1084: Invokable Control Element with Excessive File or Data Access Operations"
48 | cwe-1085: "CWE-1085: Invokable Control Element with Excessive Volume of Commented-out Code"
49 | cwe-1086: "CWE-1086: Class with Excessive Number of Child Classes"
50 | cwe-1087: "CWE-1087: Class with Virtual Method without a Virtual Destructor"
51 | cwe-1088: "CWE-1088: Synchronous Access of Remote Resource without Timeout"
52 | cwe-1089: "CWE-1089: Large Data Table with Excessive Number of Indices"
53 | cwe-1090: "CWE-1090: Method Containing Access of a Member Element from Another Class"
54 | cwe-1091: "CWE-1091: Use of Object without Invoking Destructor Method"
55 | cwe-1092: "CWE-1092: Use of Same Invokable Control Element in Multiple Architectural Layers"
56 | cwe-1094: "CWE-1094: Excessive Index Range Scan for a Data Resource"
57 | cwe-1095: "CWE-1095: Loop Condition Value Update within the Loop"
58 | cwe-1097: "CWE-1097: Persistent Storable Data Element without Associated Comparison Control Element"
59 | cwe-1098: "CWE-1098: Data Element containing Pointer Item without Proper Copy Control Element"
60 | cwe-1099: "CWE-1099: Inconsistent Naming Conventions for Identifiers"
61 | cwe-1100: "CWE-1100: Insufficient Isolation of System-Dependent Functions"
62 | cwe-1101: "CWE-1101: Reliance on Runtime Component in Generated Code"
63 | cwe-1102: "CWE-1102: Reliance on Machine-Dependent Data Representation"
64 | cwe-1103: "CWE-1103: Use of Platform-Dependent Third Party Components"
65 | cwe-1104: "CWE-1104: Use of Unmaintained Third Party Components"
66 | cwe-1105: "CWE-1105: Insufficient Encapsulation of Machine-Dependent Functionality"
67 | cwe-1106: "CWE-1106: Insufficient Use of Symbolic Constants"
68 | cwe-1107: "CWE-1107: Insufficient Isolation of Symbolic Constant Definitions"
69 | cwe-1108: "CWE-1108: Excessive Reliance on Global Variables"
70 | cwe-1109: "CWE-1109: Use of Same Variable for Multiple Purposes"
71 | cwe-1110: "CWE-1110: Incomplete Design Documentation"
72 | cwe-1111: "CWE-1111: Incomplete I/O Documentation"
73 | cwe-1112: "CWE-1112: Incomplete Documentation of Program Execution"
74 | cwe-1113: "CWE-1113: Inappropriate Comment Style"
75 | cwe-1114: "CWE-1114: Inappropriate Whitespace Style"
76 | cwe-1115: "CWE-1115: Source Code Element without Standard Prologue"
77 | cwe-1116: "CWE-1116: Inaccurate Comments"
78 | cwe-1117: "CWE-1117: Callable with Insufficient Behavioral Summary"
79 | cwe-1118: "CWE-1118: Insufficient Documentation of Error Handling Techniques"
80 | cwe-1119: "CWE-1119: Excessive Use of Unconditional Branching"
81 | cwe-112: "CWE-112: Missing XML Validation"
82 | cwe-1121: "CWE-1121: Excessive McCabe Cyclomatic Complexity"
83 | cwe-1122: "CWE-1122: Excessive Halstead Complexity"
84 | cwe-1123: "CWE-1123: Excessive Use of Self-Modifying Code"
85 | cwe-1124: "CWE-1124: Excessively Deep Nesting"
86 | cwe-1125: "CWE-1125: Excessive Attack Surface"
87 | cwe-1126: "CWE-1126: Declaration of Variable with Unnecessarily Wide Scope"
88 | cwe-1127: "CWE-1127: Compilation with Insufficient Warnings or Errors"
89 | cwe-113: "CWE-113: Improper Neutralization of CRLF Sequences in HTTP Headers ('HTTP Response Splitting')"
90 | cwe-115: "CWE-115: Misinterpretation of Input"
91 | cwe-117: "CWE-117: Improper Output Neutralization for Logs"
92 | cwe-1173: "CWE-1173: Improper Use of Validation Framework"
93 | cwe-1188: "CWE-1188: Insecure Default Initialization of Resource"
94 | cwe-119: "CWE-119: Improper Restriction of Operations within the Bounds of a Memory Buffer"
95 | cwe-120: "CWE-120: Buffer Copy without Checking Size of Input ('Classic Buffer Overflow')"
96 | cwe-121: "CWE-121: Stack-based Buffer Overflow"
97 | cwe-122: "CWE-122: Heap-based Buffer Overflow"
98 | cwe-1220: "CWE-1220: Insufficient Granularity of Access Control"
99 | cwe-123: "CWE-123: Write-what-where Condition"
100 | cwe-1230: "CWE-1230: Exposure of Sensitive Information Through Metadata"
101 | cwe-1235: "CWE-1235: Incorrect Use of Autoboxing and Unboxing for Performance Critical Operations"
102 | cwe-1236: "CWE-1236: Improper Neutralization of Formula Elements in a CSV File"
103 | cwe-124: "CWE-124: Buffer Underwrite ('Buffer Underflow')"
104 | cwe-1240: "CWE-1240: Use of a Risky Cryptographic Primitive"
105 | cwe-1241: "CWE-1241: Use of Predictable Algorithm in Random Number Generator"
106 | cwe-125: "CWE-125: Out-of-bounds Read"
107 | cwe-126: "CWE-126: Buffer Over-read"
108 | cwe-128: "CWE-128: Wrap-around Error"
109 | cwe-129: "CWE-129: Improper Validation of Array Index"
110 | cwe-130: "CWE-130: Improper Handling of Length Parameter Inconsistency"
111 | cwe-131: "CWE-131: Incorrect Calculation of Buffer Size"
112 | cwe-134: "CWE-134: Use of Externally-Controlled Format String"
113 | cwe-135: "CWE-135: Incorrect Calculation of Multi-Byte String Length"
114 | cwe-140: "CWE-140: Improper Neutralization of Delimiters"
115 | cwe-141: "CWE-141: Improper Neutralization of Parameter/Argument Delimiters"
116 | cwe-15: "CWE-15: External Control of System or Configuration Setting"
117 | cwe-16: "CWE-16: Configuration"
118 | cwe-166: "CWE-166: Improper Handling of Missing Special Element"
119 | cwe-167: "CWE-167: Improper Handling of Additional Special Element"
120 | cwe-168: "CWE-168: Improper Handling of Inconsistent Special Elements"
121 | cwe-170: "CWE-170: Improper Null Termination"
122 | cwe-178: "CWE-178: Improper Handling of Case Sensitivity"
123 | cwe-179: "CWE-179: Incorrect Behavior Order: Early Validation"
124 | cwe-182: "CWE-182: Collapse of Data into Unsafe Value"
125 | cwe-183: "CWE-183: Permissive List of Allowed Inputs"
126 | cwe-184: "CWE-184: Incomplete List of Disallowed Inputs"
127 | cwe-185: "CWE-185: Incorrect Regular Expression"
128 | cwe-186: "CWE-186: Overly Restrictive Regular Expression"
129 | cwe-188: "CWE-188: Reliance on Data/Memory Layout"
130 | cwe-19: "CWE-19: Data Handling"
131 | cwe-190: "CWE-190: Integer Overflow or Wraparound"
132 | cwe-191: "CWE-191: Integer Underflow (Wrap or Wraparound)"
133 | cwe-192: "CWE-192: Integer Coercion Error"
134 | cwe-193: "CWE-193: Off-by-one Error"
135 | cwe-194: "CWE-194: Unexpected Sign Extension"
136 | cwe-197: "CWE-197: Numeric Truncation Error"
137 | cwe-198: "CWE-198: Use of Incorrect Byte Ordering"
138 | cwe-199: "CWE-199: Information Management Errors"
139 | cwe-20: "CWE-20: Improper Input Validation"
140 | cwe-200: "CWE-200: Information Exposure"
141 | cwe-201: "CWE-201: Exposure of Sensitive Information Through Sent Data"
142 | cwe-204: "CWE-204: Observable Response Discrepancy"
143 | cwe-205: "CWE-205: Observable Behavioral Discrepancy"
144 | cwe-208: "CWE-208: Observable Timing Discrepancy"
145 | cwe-209: "CWE-209: Generation of Error Message Containing Sensitive Information"
146 | cwe-212: "CWE-212: Improper Removal of Sensitive Information Before Storage or Transfer"
147 | cwe-213: "CWE-213: Exposure of Sensitive Information Due to Incompatible Policies"
148 | cwe-214: "CWE-214: Invocation of Process Using Visible Sensitive Information"
149 | cwe-215: "CWE-215: Insertion of Sensitive Information Into Debugging Code"
150 | cwe-22: "CWE-22: Improper Limitation of a Pathname to a Restricted Directory ('Path Traversal')"
151 | cwe-222: "CWE-222: Truncation of Security-relevant Information"
152 | cwe-223: "CWE-223: Omission of Security-relevant Information"
153 | cwe-224: "CWE-224: Obscured Security-relevant Information by Alternate Name"
154 | cwe-226: "CWE-226: Sensitive Information Uncleared in Resource Before Release for Reuse"
155 | cwe-229: "CWE-229: Improper Handling of Values"
156 | cwe-23: "CWE-23: Relative Path Traversal"
157 | cwe-233: "CWE-233: Improper Handling of Parameters"
158 | cwe-237: "CWE-237: Improper Handling of Structural Elements"
159 | cwe-241: "CWE-241: Improper Handling of Unexpected Data Type"
160 | cwe-242: "CWE-242: Use of Inherently Dangerous Function"
161 | cwe-243: "CWE-243: Creation of chroot Jail Without Changing Working Directory"
162 | cwe-244: "CWE-244: Improper Clearing of Heap Memory Before Release ('Heap Inspection')"
163 | cwe-248: "CWE-248: Uncaught Exception"
164 | cwe-250: "CWE-250: Execution with Unnecessary Privileges"
165 | cwe-252: "CWE-252: Unchecked Return Value"
166 | cwe-253: "CWE-253: Incorrect Check of Function Return Value"
167 | cwe-254: "CWE-254: Security Features"
168 | cwe-255: "CWE-255: Credentials Management"
169 | cwe-256: "CWE-256: Unprotected Storage of Credentials"
170 | cwe-257: "CWE-257: Storing Passwords in a Recoverable Format"
171 | cwe-258: "CWE-258: Empty password in configuration file"
172 | cwe-259: "CWE-259: Use of Hard-coded Password"
173 | cwe-260: "CWE-260: Password in Configuration File"
174 | cwe-261: "CWE-261: Weak Encoding for Password"
175 | cwe-262: "CWE-262: Not Using Password Aging"
176 | cwe-263: "CWE-263: Password Aging with Long Expiration"
177 | cwe-264: "CWE-264: Permissions, Privileges, and Access Controls"
178 | cwe-265: "CWE-265: Privilege / Sandbox Issues"
179 | cwe-266: "CWE-266: Incorrect Privilege Assignment"
180 | cwe-267: "CWE-267: Privilege Defined With Unsafe Actions"
181 | cwe-268: "CWE-268: Privilege Chaining"
182 | cwe-269: "CWE-269: Improper Privilege Management"
183 | cwe-270: "CWE-270: Privilege Context Switching Error"
184 | cwe-271: "CWE-271: Privilege Dropping / Lowering Errors"
185 | cwe-272: "CWE-272: Least Privilege Violation"
186 | cwe-273: "CWE-273: Improper Check for Dropped Privileges"
187 | cwe-274: "CWE-274: Improper Handling of Insufficient Privileges"
188 | cwe-276: "CWE-276: Incorrect Default Permissions"
189 | cwe-277: "CWE-277: Insecure Inherited Permissions"
190 | cwe-278: "CWE-278: Insecure Preserved Inherited Permissions"
191 | cwe-279: "CWE-279: Incorrect Execution-Assigned Permissions"
192 | cwe-280: "CWE-280: Improper Handling of Insufficient Permissions or Privileges"
193 | cwe-281: "CWE-281: Improper Preservation of Permissions"
194 | cwe-283: "CWE-283: Unverified Ownership"
195 | cwe-284: "CWE-284: Improper Access Control"
196 | cwe-285: "CWE-285: Improper Authorization"
197 | cwe-287: "CWE-287: Improper Authentication"
198 | cwe-288: "CWE-288: Authentication Bypass Using an Alternate Path or Channel"
199 | cwe-290: "CWE-290: Authentication Bypass by Spoofing"
200 | cwe-294: "CWE-294: Authentication Bypass by Capture-replay"
201 | cwe-295: "CWE-295: Improper Certificate Validation"
202 | cwe-296: "CWE-296: Improper Following of a Certificate's Chain of Trust"
203 | cwe-297: "CWE-297: Improper Validation of Certificate with Host Mismatch"
204 | cwe-299: "CWE-299: Improper Check for Certificate Revocation"
205 | cwe-300: "CWE-300: Channel Accessible by Non-Endpoint ('Man-in-the-Middle')"
206 | cwe-302: "CWE-302: Authentication Bypass by Assumed-Immutable Data"
207 | cwe-303: "CWE-303: Incorrect Implementation of Authentication Algorithm"
208 | cwe-304: "CWE-304: Missing Critical Step in Authentication"
209 | cwe-305: "CWE-305: Authentication Bypass by Primary Weakness"
210 | cwe-306: "CWE-306: Missing Authentication for Critical Function"
211 | cwe-307: "CWE-307: Improper Restriction of Excessive Authentication Attempts"
212 | cwe-308: "CWE-308: Use of Single-factor Authentication"
213 | cwe-309: "CWE-309: Use of Password System for Primary Authentication"
214 | cwe-310: "CWE-310: Cryptographic Issues"
215 | cwe-311: "CWE-311: Missing Encryption of Sensitive Data"
216 | cwe-312: "CWE-312: Cleartext Storage of Sensitive Information"
217 | cwe-317: "CWE-317: Cleartext Storage of Sensitive Information in GUI"
218 | cwe-319: "CWE-319: Cleartext Transmission of Sensitive Information"
219 | cwe-320: "CWE-320: Key Management Errors"
220 | cwe-321: "CWE-321: Use of Hard-coded Cryptographic Key"
221 | cwe-322: "CWE-322: Key Exchange without Entity Authentication"
222 | cwe-323: "CWE-323: Reusing a Nonce, Key Pair in Encryption"
223 | cwe-324: "CWE-324: Use of a Key Past its Expiration Date"
224 | cwe-325: "CWE-325: Missing Required Cryptographic Step"
225 | cwe-326: "CWE-326: Inadequate Encryption Strength"
226 | cwe-327: "CWE-327: Use of a Broken or Risky Cryptographic Algorithm"
227 | cwe-328: "CWE-328: Reversible One-Way Hash"
228 | cwe-329: "CWE-329: Not Using a Random IV with CBC Mode"
229 | cwe-330: "CWE-330: Use of Insufficiently Random Values"
230 | cwe-331: "CWE-331: Insufficient Entropy"
231 | cwe-332: "CWE-332: Insufficient Entropy in PRNG"
232 | cwe-334: "CWE-334: Small Space of Random Values"
233 | cwe-335: "CWE-335: Incorrect Usage of Seeds in Pseudo-Random Number Generator (PRNG)"
234 | cwe-338: "CWE-338: Use of Cryptographically Weak Pseudo-Random Number Generator (PRNG)"
235 | cwe-341: "CWE-341: Predictable from Observable State"
236 | cwe-342: "CWE-342: Predictable Exact Value from Previous Values"
237 | cwe-343: "CWE-343: Predictable Value Range from Previous Values"
238 | cwe-345: "CWE-345: Insufficient Verification of Data Authenticity"
239 | cwe-346: "CWE-346: Origin Validation Error"
240 | cwe-347: "CWE-347: Improper Verification of Cryptographic Signature"
241 | cwe-348: "CWE-348: Use of Less Trusted Source"
242 | cwe-349: "CWE-349: Acceptance of Extraneous Untrusted Data With Trusted Data"
243 | cwe-350: "CWE-350: Reliance on Reverse DNS Resolution for a Security-Critical Action"
244 | cwe-351: "CWE-351: Insufficient Type Distinction"
245 | cwe-352: "CWE-352: Cross-Site Request Forgery (CSRF)"
246 | cwe-353: "CWE-353: Missing Support for Integrity Check"
247 | cwe-354: "CWE-354: Improper Validation of Integrity Check Value"
248 | cwe-356: "CWE-356: Product UI does not Warn User of Unsafe Actions"
249 | cwe-357: "CWE-357: Insufficient UI Warning of Dangerous Operations"
250 | cwe-358: "CWE-358: Improperly Implemented Security Check for Standard"
251 | cwe-359: "CWE-359: Exposure of Private Personal Information to an Unauthorized Actor"
252 | cwe-36: "CWE-36: Absolute Path Traversal"
253 | cwe-362: "CWE-362: Concurrent Execution using Shared Resource with Improper Synchronization ('Race Condition')"
254 | cwe-363: "CWE-363: Race Condition Enabling Link Following"
255 | cwe-364: "CWE-364: Signal Handler Race Condition"
256 | cwe-365: "CWE-365: Race Condition in Switch"
257 | cwe-366: "CWE-366: Race Condition within a Thread"
258 | cwe-367: "CWE-367: Time-of-check Time-of-use (TOCTOU) Race Condition"
259 | cwe-368: "CWE-368: Context Switching Race Condition"
260 | cwe-369: "CWE-369: Divide By Zero"
261 | cwe-371: "CWE-371: State Issues"
262 | cwe-372: "CWE-372: Incomplete Internal State Distinction"
263 | cwe-374: "CWE-374: Passing Mutable Objects to an Untrusted Method"
264 | cwe-375: "CWE-375: Returning a Mutable Object to an Untrusted Caller"
265 | cwe-377: "CWE-377: Insecure Temporary File"
266 | cwe-378: "CWE-378: Creation of Temporary File With Insecure Permissions"
267 | cwe-379: "CWE-379: Creation of Temporary File in Directory with Insecure Permissions"
268 | cwe-384: "CWE-384: Session Fixation"
269 | cwe-385: "CWE-385: Covert Timing Channel"
270 | cwe-386: "CWE-386: Symbolic Name not Mapping to Correct Object"
271 | cwe-388: "CWE-388: Error Handling"
272 | cwe-390: "CWE-390: Detection of Error Condition Without Action"
273 | cwe-391: "CWE-391: Unchecked Error Condition"
274 | cwe-392: "CWE-392: Missing Report of Error Condition"
275 | cwe-393: "CWE-393: Return of Wrong Status Code"
276 | cwe-394: "CWE-394: Unexpected Status Code or Return Value"
277 | cwe-395: "CWE-395: Use of NullPointerException Catch to Detect NULL Pointer Dereference"
278 | cwe-396: "CWE-396: Declaration of Catch for Generic Exception"
279 | cwe-397: "CWE-397: Declaration of Throws for Generic Exception"
280 | cwe-399: "CWE-399: Resource Management, ManageRelease of Memory Before Removing Last Reference ('Memory Leak')"
281 | cwe-403: "CWE-403: Exposure of File Descriptor to Unintended Control Sphere ('File Descriptor Leak')"
282 | cwe-404: "CWE-404: Improper Resource Shutdown or Release"
283 | cwe-406: "CWE-406: Insufficient Control of Network Message Volume (Network Amplification)"
284 | cwe-408: "CWE-408: Incorrect Behavior Order: Early Amplification"
285 | cwe-409: "CWE-409: Improper Handling of Highly Compressed Data (Data Amplification)"
286 | cwe-41: "CWE-41: Improper Resolution of Path Equivalence"
287 | cwe-400: "CWE-400: Uncontrolled Resource Consumption"
288 | cwe-410: "CWE-410: Insufficient Resource Pool"
289 | cwe-412: "CWE-412: Unrestricted Externally Accessible Lock"
290 | cwe-413: "CWE-413: Improper Resource Locking"
291 | cwe-414: "CWE-414: Missing Lock Check"
292 | cwe-415: "CWE-415: Double Free"
293 | cwe-416: "CWE-416: Use After Free"
294 | cwe-419: "CWE-419: Unprotected Primary Channel"
295 | cwe-420: "CWE-420: Unprotected Alternate Channel"
296 | cwe-421: "CWE-421: Race Condition During Access to Alternate Channel"
297 | cwe-425: "CWE-425: Direct Request ('Forced Browsing')"
298 | cwe-426: "CWE-426: Untrusted Search Path"
299 | cwe-427: "CWE-427: Uncontrolled Search Path Element"
300 | cwe-428: "CWE-428: Unquoted Search Path or Element"
301 | cwe-430: "CWE-430: Deployment of Wrong Handler"
302 | cwe-431: "CWE-431: Missing Handler"
303 | cwe-432: "CWE-432: Dangerous Signal Handler not Disabled During Sensitive Operations"
304 | cwe-433: "CWE-433: Unparsed Raw Web Content Delivery"
305 | cwe-434: "CWE-434: Unrestricted Upload of File with Dangerous Type"
306 | cwe-437: "CWE-437: Incomplete Model of Endpoint Features"
307 | cwe-439: "CWE-439: Behavioral Change in New Version or Environment"
308 | cwe-440: "CWE-440: Expected Behavior Violation"
309 | cwe-441: "CWE-441: Unintended Proxy or Intermediary ('Confused Deputy')"
310 | cwe-444: "CWE-444: Inconsistent Interpretation of HTTP Requests ('HTTP Request Smuggling')"
311 | cwe-447: "CWE-447: Unimplemented or Unsupported Feature in UI"
312 | cwe-448: "CWE-448: Obsolete Feature in UI"
313 | cwe-449: "CWE-449: The UI Performs the Wrong Action"
314 | cwe-450: "CWE-450: Multiple Interpretations of UI Input"
315 | cwe-451: "CWE-451: User Interface (UI) Misrepresent451,User Interface (UI) Misrckjacking, spoofing)"
316 | cwe-454: "CWE-454: External Initialization of Trusted Variables or Data Stores"
317 | cwe-455: "CWE-455: Non-exit on Failed Initialization"
318 | cwe-457: "CWE-457: Use of Uninitialized Variable"
319 | cwe-459: "CWE-459: Incomplete Cleanup"
320 | cwe-46: "CWE-46: Path Equivalence: 'filename ' (Trailing Space)"
321 | cwe-460: "CWE-460: Improper Cleanup on Thrown Exception"
322 | cwe-462: "CWE-462: Duplicate Key in Associative List (Alist)"
323 | cwe-463: "CWE-463: Deletion of Data Structure Sentinel"
324 | cwe-464: "CWE-464: Addition of Data Structure Sentinel"
325 | cwe-466: "CWE-466: Return of Pointer Value Outside of Expected Range"
326 | cwe-467: "CWE-467: Use of sizeof() on a Pointer Type"
327 | cwe-468: "CWE-468: Incorrect Pointer Scaling"
328 | cwe-469: "CWE-469: Use of Pointer Subtraction to Determine Size"
329 | cwe-470: "CWE-470: Use of Externally-Controlled Input to Select Classes or Code ('Unsafe Reflection')"
330 | cwe-471: "CWE-471: Modification of Assumed-Immutable Data (MAID)"
331 | cwe-472: "CWE-472: External Control of Assumed-Immutable Web Parameter"
332 | cwe-474: "CWE-474: Use of Function with Inconsistent Implementations"
333 | cwe-475: "CWE-475: Undefined Behavior for Input to API"
334 | cwe-476: "CWE-476: NULL Pointer Dereference"
335 | cwe-477: "CWE-477: Use of Obsolete Function"
336 | cwe-478: "CWE-478: Missing Default Case in Switch Statement"
337 | cwe-479: "CWE-479: Signal Handler Use of a Non-reentrant Function"
338 | cwe-480: "CWE-480: Use of Incorrect Operator"
339 | cwe-483: "CWE-483: Incorrect Block Delimitation"
340 | cwe-484: "CWE-484: Omitted Break Statement in Switch"
341 | cwe-485: "CWE-485: Insufficient Encapsulation"
342 | cwe-487: "CWE-487: Reliance on Package-level Scope"
343 | cwe-488: "CWE-488: Exposure of Data Element to Wrong Session"
344 | cwe-489: "CWE-489: Active Debug Code"
345 | cwe-494: "CWE-494: Download of Code Without Integrity Check"
346 | cwe-497: "CWE-497: Exposure of Sensitive System Information to an Unauthorized Control Sphere"
347 | cwe-501: "CWE-501: Trust Boundary Violation"
348 | cwe-502: "CWE-502: Deserialization of Untrusted Data"
349 | cwe-506: "CWE-506: Embedded Malicious Code"
350 | cwe-515: "CWE-515: Covert Storage Channel"
351 | cwe-521: "CWE-521: Weak Password Requirements"
352 | cwe-522: "CWE-522: Insufficiently Protected Credentials"
353 | cwe-523: "CWE-523: Unprotected Transport of Credentials"
354 | cwe-524: "CWE-524: Use of Cache Containing Sensitive Information"
355 | cwe-525: "CWE-525: Use of Web Browser Cache Containing Sensitive Information"
356 | cwe-532: "CWE-532: Insertion of Sensitive Information into Log File"
357 | cwe-538: "CWE-538: File And Directory Information Exposure"
358 | cwe-540: "CWE-540: Inclusion of Sensitive Information in Source Code"
359 | cwe-544: "CWE-544: Missing Standardized Error Handling Mechanism"
360 | cwe-546: "CWE-546: Suspicious Comment"
361 | cwe-547: "CWE-547: Use of Hard-coded, Security-relevant Constants"
362 | cwe-549: "CWE-549: Missing Password Field Masking"
363 | cwe-551: "CWE-551: Incorrect Behavior Order: Authorization Before Parsing and Canonicalization"
364 | cwe-561: "CWE-561: Dead Code"
365 | cwe-562: "CWE-562: Return of Stack Variable Address"
366 | cwe-563: "CWE-563: Assignment to Variable without Use"
367 | cwe-564: "CWE-564: SQL Injection: Hibernate"
368 | cwe-565: "CWE-565: Reliance on Cookies without Validation and Integrity Checking"
369 | cwe-566: "CWE-566: Authorization Bypass Through User-Controlled SQL Primary Key"
370 | cwe-567: "CWE-567: Unsynchronized Access to Shared Data in a Multithreaded Context"
371 | cwe-570: "CWE-570: Expression is Always False"
372 | cwe-571: "CWE-571: Expression is Always True"
373 | cwe-580: "CWE-580: clone() Method Without super.clone()"
374 | cwe-581: "CWE-581: Object Model Violation: Just One of Equals and Hashcode Defined"
375 | cwe-584: "CWE-584: Return Inside Finally Block"
376 | cwe-585: "CWE-585: Empty Synchronized Block"
377 | cwe-586: "CWE-586: Explicit Call to Finalize()"
378 | cwe-587: "CWE-587: Assignment of a Fixed Address to a Pointer"
379 | cwe-588: "CWE-588: Attempt to Access Child of a Non-structure Pointer"
380 | cwe-59: "CWE-59: Improper Link Resolution Before File Access ('Link Following')"
381 | cwe-592: "CWE-592: Authentication Bypass Issues"
382 | cwe-595: "CWE-595: Comparison of Object References Instead of Object Contents"
383 | cwe-597: "CWE-597: Use of Wrong Operator in String Comparison"
384 | cwe-598: "CWE-598: Information Exposure Through Query Strings in GET Request"
385 | cwe-600: "CWE-600: Uncaught Exception in Servlet"
386 | cwe-601: "CWE-601: URL Redirection to Untrusted Site ('Open Redirect')"
387 | cwe-602: "CWE-602: Client-Side Enforcement of Server-Side Security"
388 | cwe-603: "CWE-603: Use of Client-Side Authentication"
389 | cwe-605: "CWE-605: Multiple Binds to the Same Port"
390 | cwe-606: "CWE-606: Unchecked Input for Loop Condition"
391 | cwe-609: "CWE-609: Double-Checked Locking"
392 | cwe-61: "CWE-61: UNIX Symbolic Link (Symlink) Following"
393 | cwe-611: "CWE-611: Improper Restriction of XML External Entity Reference"
394 | cwe-612: "CWE-612: Improper Authorization of Index Containing Sensitive Information"
395 | cwe-613: "CWE-613: Insufficient Session Expiration"
396 | cwe-617: "CWE-617: Reachable Assertion"
397 | cwe-618: "CWE-618: Exposed Unsafe ActiveX Method"
398 | cwe-619: "CWE-619: Dangling Database Cursor ('Cursor Injection')"
399 | cwe-62: "CWE-62: UNIX Hard Link"
400 | cwe-620: "CWE-620: Unverified Password Change"
401 | cwe-621: "CWE-621: Variable Extraction Error"
402 | cwe-622: "CWE-622: Improper Validation of Function Hook Argument"
403 | cwe-624: "CWE-624: Executable Regular Expression Error"
404 | cwe-625: "CWE-625: Permissive Regular Expression"
405 | cwe-626: "CWE-626: Null Byte Interaction Error (Poison Null Byte)"
406 | cwe-627: "CWE-627: Dynamic Variable Evaluation"
407 | cwe-628: "CWE-628: Function Call with Incorrectly Specified Arguments"
408 | cwe-639: "CWE-639: Authorization Bypass Through User-Controlled Key"
409 | cwe-640: "CWE-640: Weak Password Recovery Mechanism for Forgotten Password"
410 | cwe-641: "CWE-641: Improper Restriction of Names for Files and Other Resources"
411 | cwe-642: "CWE-642: External Control of Critical State Data"
412 | cwe-643: "CWE-643: Improper Neutralization of Data within XPath Expressions ('XPath Injection')"
413 | cwe-644: "CWE-644: Improper Neutralization of HTTP Headers for Scripting Syntax"
414 | cwe-645: "CWE-645: Overly Restrictive Account Lockout Mechanism"
415 | cwe-648: "CWE-648: Incorrect Use of Privileged APIs"
416 | cwe-649: "CWE-649: Reliance on Obfuscation or Encryption of Security-Relevant Inputs without Integrity Checking"
417 | cwe-65: "CWE-65: Windows hard link"
418 | cwe-652: "CWE-652: Improper Neutralization of Data within XQuery Expressions ('XQuery Injection')"
419 | cwe-66: "CWE-66: Improper Handling of File Names that Identify Virtual Resources"
420 | cwe-661: "CWE-661: Weaknesses in Software Written in PHP"
421 | cwe-663: "CWE-663: Use of a Non-reentrant Function in a Concurrent Context"
422 | cwe-665: "CWE-665: Improper Initialization"
423 | cwe-676: "CWE-676: Use of Potentially Dangerous Function"
424 | cwe-681: "CWE-681: Incorrect Conversion between Numeric Types"
425 | cwe-692: "CWE-692: Incomplete Blacklist to Cross-Site Scripting"
426 | cwe-693: "CWE-693: Protection Mechanism Failure"
427 | cwe-694: "CWE-694: Use of Multiple Resources with Duplicate Identifier"
428 | cwe-695: "CWE-695: Use of Low-Level Functionality"
429 | cwe-696: "CWE-696: Incorrect Behavior Order"
430 | cwe-697: "CWE-697: Incorrect Comparison"
431 | cwe-698: "CWE-698: Execution After Redirect (EAR)"
432 | cwe-703: "CWE-703: Improper Check or Handling of Exceptional Conditions"
433 | cwe-704: "CWE-704: Incorrect Type Conversion or Cast (Type Conversion)"
434 | cwe-708: "CWE-708: Incorrect Ownership Assignment"
435 | cwe-73: "CWE-73: External Control of File Name or Path"
436 | cwe-732: "CWE-732: Incorrect Permission Assignment for Critical Resource"
437 | cwe-733: "CWE-733: Compiler Optimization Removal or Modification of Security-critical Code"
438 | cwe-74: "CWE-74: Improper Neutralization of Special Elements in Output Used by a Downstream Component ('Injection')"
439 | cwe-749: "CWE-749: Exposed Dangerous Method or Function"
440 | cwe-754: "CWE-754: Improper Check for Unusual or Exceptional Conditions"
441 | cwe-756: "CWE-756: Missing Custom Error Page"
442 | cwe-757: "CWE-757: Selection of Less-Secure Algorithm During Negotiation"
443 | cwe-759: "CWE-759: Use of a One-Way Hash without a Salt"
444 | cwe-76: "CWE-76: Improper Neutralization of Equivalent Special Elements"
445 | cwe-763: "CWE-763: Release of Invalid Pointer or Reference"
446 | cwe-764: "CWE-764: Multiple Locks of a Critical Resource"
447 | cwe-765: "CWE-765: Multiple Unlocks of a Critical Resource"
448 | cwe-766: "CWE-766: Critical Data Element Declared Public"
449 | cwe-767: "CWE-767: Access to Critical Private Variable via Public Method"
450 | cwe-77: "CWE-77: Improper Neutralization of Special Elements used in a Command ('Command Injection')"
451 | cwe-770: "CWE-770: Allocation of Resources Without Limits or Throttling"
452 | cwe-771: "CWE-771: Missing Reference to Active Allocated Resource"
453 | cwe-772: "CWE-772: Missing Release of Resource after Effective Lifetime"
454 | cwe-776: "CWE-776: Improper Restriction of Recursive Entity References in DTDs ('XML Entity Expansion')"
455 | cwe-777: "CWE-777: Regular Expression without Anchors"
456 | cwe-778: "CWE-778: Insufficient Logging"
457 | cwe-779: "CWE-779: Logging of Excessive Data"
458 | cwe-78: "CWE-78: Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection')"
459 | cwe-780: "CWE-780: Use of RSA Algorithm without OAEP"
460 | cwe-782: "CWE-782: Exposed IOCTL with Insufficient Access Control"
461 | cwe-783: "CWE-783: Operator Precedence Logic Error"
462 | cwe-785: "CWE-785: Use of Path Manipulation Function without Maximum-sized Buffer"
463 | cwe-786: "CWE-786: Access of Memory Location Before Start of Buffer"
464 | cwe-787: "CWE-787: Out-of-bounds Write"
465 | cwe-788: "CWE-788: Access of Memory Location After End of Buffer"
466 | cwe-789: "CWE-789: Uncontrolled Memory Allocation"
467 | cwe-79: "CWE-79: Improper Neutralization of Input During Web Page Generation ('Cross-site Scripting')"
468 | cwe-791: "CWE-791: Incomplete Filtering of Special Elements"
469 | cwe-795: "CWE-795: Only Filtering Special Elements at a Specified Location"
470 | cwe-798: "CWE-798: Use of Hard-coded Credentials"
471 | cwe-799: "CWE-799: Improper Control of Interaction Frequency"
472 | cwe-80: "CWE-80: Improper Neutralization of Script-Related HTML Tags in a Web Page (Basic XSS)"
473 | cwe-804: "CWE-804: Guessable CAPTCHA"
474 | cwe-805: "CWE-805: Buffer Access with Incorrect Length Value"
475 | cwe-807: "CWE-807: Reliance on Untrusted Inputs in a Security Decision"
476 | cwe-820: "CWE-820: Missing Synchronization"
477 | cwe-821: "CWE-821: Incorrect Synchronization"
478 | cwe-822: "CWE-822: Untrusted Pointer Dereference"
479 | cwe-823: "CWE-823: Use of Out-of-range Pointer Offset"
480 | cwe-824: "CWE-824: Access of Uninitialized Pointer"
481 | cwe-825: "CWE-825: Expired Pointer Dereference"
482 | cwe-826: "CWE-826: Premature Release of Resource During Expected Lifetime"
483 | cwe-828: "CWE-828: Signal Handler with Functionality that is not Asynchronous-Safe"
484 | cwe-829: "CWE-829: Inclusion of Functionality from Untrusted Control Sphere"
485 | cwe-831: "CWE-831: Signal Handler Function Associated with Multiple Signals"
486 | cwe-832: "CWE-832: Unlock of a Resource that is not Locked"
487 | cwe-833: "CWE-833: Deadlock"
488 | cwe-835: "CWE-835: Loop with Unreachable Exit Condition ('Infinite Loop')"
489 | cwe-836: "CWE-836: Use of Password Hash Instead of Password for Authentication"
490 | cwe-837: "CWE-837: Improper Enforcement of a Single, Unique Action"
491 | cwe-838: "CWE-838: Inappropriate Encoding for Output Context"
492 | cwe-839: "CWE-839: Numeric Range Comparison Without Minimum Check"
493 | cwe-840: "CWE-840: Business Logic Errors"
494 | cwe-841: "CWE-841: Improper Enforcement of Behavioral Workflow"
495 | cwe-842: "CWE-842: Placement of User into Incorrect Group"
496 | cwe-843: "CWE-843: Access of Resource Using Incompatible Type ('Type Confusion')"
497 | cwe-862: "CWE-862: Missing Authorization"
498 | cwe-863: "CWE-863: Incorrect Authorization"
499 | cwe-87: "CWE-87: Improper Neutralization of Alternate XSS Syntax"
500 | cwe-88: "CWE-88: Improper Neutralization of Argument Delimiters in a Command ('Argument Injection')"
501 | cwe-89: "CWE-89: Improper Neutralization of Special Elements used in an SQL Command ('SQL Injection')"
502 | cwe-90: "CWE-90: Improper Neutralization of Special Elements used in an LDAP Query ('LDAP Injection')"
503 | cwe-908: "CWE-908: Use of Uninitialized Resource"
504 | cwe-909: "CWE-909: Missing Initialization of Resource"
505 | cwe-91: "CWE-91: XML Injection (aka Blind XPath Injection)"
506 | cwe-95: "CWE-95: Improper Neutralization of Directives in Dynamically Evaluated Code ('Eval Injection')"
507 | cwe-910: "CWE-910: Use of Expired File Descriptor"
508 | cwe-911: "CWE-911: Improper Update of Reference Count"
509 | cwe-912: "CWE-912: Hidden Functionality (Backdoor)"
510 | cwe-914: "CWE-914: Improper Control of Dynamically-Identified Variables"
511 | cwe-915: "CWE-915: Improperly Controlled Modification of Dynamically-Determined Object Attributes"
512 | cwe-916: "CWE-916: Use of Password Hash With Insufficient Computational Effort"
513 | cwe-917: "CWE-917: Improper Neutralization of Special Elements used in an Expression Language Statement ('Expression Language Injection')"
514 | cwe-918: "CWE-918: Server-Side Request Forgery (SSRF)"
515 | cwe-919: "CWE-919: Weaknesses in Mobile Applications"
516 | cwe-920: "CWE-920: Improper Restriction of Power Consumption"
517 | cwe-921: "CWE-921: Storage of Sensitive Data in a Mechanism without Access Control"
518 | cwe-922: "CWE-922: Insecure Storage of Sensitive Information"
519 | cwe-923: "CWE-923: Improper Restriction of Communication Channel to Intended Endpoints"
520 | cwe-924: "CWE-924: Improper Enforcement of Message Integrity During Transmission in a Communication Channel"
521 | cwe-93: "CWE-93: Improper Neutralization of CRLF Sequences ('CRLF Injection')"
522 | cwe-939: "CWE-939: Improper Authorization in Handler for Custom URL Scheme"
523 | cwe-94: "CWE-94: Improper Control of Generation of Code ('Code Injection')"
524 | cwe-940: "CWE-940: Improper Verification of Source of a Communication Channel"
525 | cwe-941: "CWE-941: Incorrectly Specified Destination in a Communication Channel"
526 | cwe-942: "CWE-942: Overly Permissive Cross-domain Whitelist"
527 | cwe-943: "CWE-943: Improper Neutralization of Special Elements in Data Query Logic"
528 | cwe-96: "CWE-96: Improper Neutralization of Directives in Statically Saved Code ('Static Code Injection')"
529 | cwe-98: "CWE-98: Improper Control of Filename for Include/Require Statement in PHP Program"
530 | cwe-99: "CWE-99: Improper Control of Resource Identifiers ('Resource Injection')"
531 |
--------------------------------------------------------------------------------
/libsast/standards/owasp_masvs.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | masvs:
3 | arch-1: MSTG-ARCH-1
4 | arch-2: MSTG-ARCH-2
5 | arch-3: MSTG-ARCH-3
6 | arch-4: MSTG-ARCH-4
7 | arch-5: MSTG-ARCH-5
8 | arch-6: MSTG-ARCH-6
9 | arch-7: MSTG-ARCH-7
10 | arch-8: MSTG-ARCH-8
11 | arch-9: MSTG-ARCH-9
12 | arch-10: MSTG-ARCH-10
13 | arch-11: MSTG-ARCH-11
14 | arch-12: MSTG-ARCH-12
15 | storage-1: MSTG-STORAGE-1
16 | storage-2: MSTG-STORAGE-2
17 | storage-3: MSTG-STORAGE-3
18 | storage-4: MSTG-STORAGE-4
19 | storage-5: MSTG-STORAGE-5
20 | storage-6: MSTG-STORAGE-6
21 | storage-7: MSTG-STORAGE-7
22 | storage-8: MSTG-STORAGE-8
23 | storage-9: MSTG-STORAGE-9
24 | storage-10: MSTG-STORAGE-10
25 | storage-11: MSTG-STORAGE-11
26 | storage-12: MSTG-STORAGE-12
27 | storage-13: MSTG-STORAGE-13
28 | storage-14: MSTG-STORAGE-14
29 | storage-15: MSTG-STORAGE-15
30 | crypto-1: MSTG-CRYPTO-1
31 | crypto-2: MSTG-CRYPTO-2
32 | crypto-3: MSTG-CRYPTO-3
33 | crypto-4: MSTG-CRYPTO-4
34 | crypto-5: MSTG-CRYPTO-5
35 | crypto-6: MSTG-CRYPTO-6
36 | auth-1: MSTG-AUTH-1
37 | auth-2: MSTG-AUTH-2
38 | auth-3: MSTG-AUTH-3
39 | auth-4: MSTG-AUTH-4
40 | auth-5: MSTG-AUTH-5
41 | auth-6: MSTG-AUTH-6
42 | auth-7: MSTG-AUTH-7
43 | auth-8: MSTG-AUTH-8
44 | auth-9: MSTG-AUTH-9
45 | auth-10: MSTG-AUTH-10
46 | auth-11: MSTG-AUTH-11
47 | auth-12: MSTG-AUTH-12
48 | network-1: MSTG-NETWORK-1
49 | network-2: MSTG-NETWORK-2
50 | network-3: MSTG-NETWORK-3
51 | network-4: MSTG-NETWORK-4
52 | network-5: MSTG-NETWORK-5
53 | network-6: MSTG-NETWORK-6
54 | platform-1: MSTG-PLATFORM-1
55 | platform-2: MSTG-PLATFORM-2
56 | platform-3: MSTG-PLATFORM-3
57 | platform-4: MSTG-PLATFORM-4
58 | platform-5: MSTG-PLATFORM-5
59 | platform-6: MSTG-PLATFORM-6
60 | platform-7: MSTG-PLATFORM-7
61 | platform-8: MSTG-PLATFORM-8
62 | platform-9: MSTG-PLATFORM-9
63 | platform-10: MSTG-PLATFORM-10
64 | platform-11: MSTG-PLATFORM-11
65 | code-1: MSTG-CODE-1
66 | code-2: MSTG-CODE-2
67 | code-3: MSTG-CODE-3
68 | code-4: MSTG-CODE-4
69 | code-5: MSTG-CODE-5
70 | code-6: MSTG-CODE-6
71 | code-7: MSTG-CODE-7
72 | code-8: MSTG-CODE-8
73 | code-9: MSTG-CODE-9
74 | resilience-1: MSTG-RESILIENCE-1
75 | resilience-2: MSTG-RESILIENCE-2
76 | resilience-3: MSTG-RESILIENCE-3
77 | resilience-4: MSTG-RESILIENCE-4
78 | resilience-5: MSTG-RESILIENCE-5
79 | resilience-6: MSTG-RESILIENCE-6
80 | resilience-7: MSTG-RESILIENCE-7
81 | resilience-8: MSTG-RESILIENCE-8
82 | resilience-9: MSTG-RESILIENCE-9
83 | resilience-10: MSTG-RESILIENCE-10
84 | resilience-11: MSTG-RESILIENCE-11
85 | resilience-12: MSTG-RESILIENCE-12
86 | resilience-13: MSTG-RESILIENCE-13
87 |
--------------------------------------------------------------------------------
/libsast/standards/owasp_mobile_top10_2016.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | owasp-mobile:
3 | m1: 'M1: Improper Platform Usage'
4 | m2: 'M2: Insecure Data Storage'
5 | m3: 'M3: Insecure Communication'
6 | m4: 'M4: Insecure Authentication'
7 | m5: 'M5: Insufficient Cryptography'
8 | m6: 'M6: Insecure Authorization'
9 | m7: 'M7: Client Code Quality'
10 | m8: 'M8: Code Tampering'
11 | m9: 'M9: Reverse Engineering'
12 | m10: 'M10: Extraneous Functionality'
13 |
--------------------------------------------------------------------------------
/libsast/standards/owasp_mobile_top10_2024.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | owasp-mobile-2024:
3 | m1: 'M1: Improper Credential Usage'
4 | m2: 'M2: Inadequate Supply Chain Security'
5 | m3: 'M3: Insecure Authentication/Authorization'
6 | m4: 'M4: Insufficient Input/Output Validation'
7 | m5: 'M5: Insecure Communication'
8 | m6: 'M6: Inadequate Privacy Controls'
9 | m7: 'M7: Insufficient Binary Protections'
10 | m8: 'M8: Security Misconfiguration'
11 | m9: 'M9: Insecure Data Storage'
12 | m10: 'M10: Insufficient Cryptography'
13 |
--------------------------------------------------------------------------------
/libsast/standards/owasp_web_top10_2017.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | owasp-web:
3 | a1: 'A1: Injection'
4 | a2: 'A2: Broken Authentication'
5 | a3: 'A3: Sensitive Data Exposure'
6 | a4: 'A4: XML External Entities (XXE)'
7 | a5: 'A5: Broken Access Control'
8 | a6: 'A6: Security Misconfiguration'
9 | a7: 'A7: Cross-Site Scripting XSS'
10 | a8: 'A8: Insecure Deserialization'
11 | a9: 'A9: Using Components with Known Vulnerabilities'
12 | a10: 'A10: Insufficient Logging & Monitoring'
13 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "libsast"
3 | version = "3.1.6"
4 | description = "A generic SAST library built on top of semgrep and regex"
5 | keywords = ["libsast", "SAST", "Python SAST", "SAST API", "Regex SAST", "Pattern Matcher"]
6 | authors = ["Ajin Abraham "]
7 | license = "LGPL-3.0-or-later"
8 | readme = "README.md"
9 | repository = "https://github.com/ajinabraham/libsast"
10 | documentation = "https://github.com/ajinabraham/libsast/blob/master/README.md"
11 | packages = [{ include = "libsast", format = ["sdist", "wheel"]}]
12 | classifiers = [
13 | "Development Status :: 4 - Beta",
14 | "Intended Audience :: Developers",
15 | "Topic :: Security",
16 | "Topic :: Software Development :: Quality Assurance",
17 | ]
18 |
19 | [tool.poetry.urls]
20 | "Bug Tracker" = "https://github.com/ajinabraham/libsast/issues"
21 |
22 | [tool.poetry.scripts]
23 | libsast = "libsast.__main__:main"
24 |
25 | [tool.poetry.dependencies]
26 | python = "^3.8"
27 | requests = "*"
28 | pyyaml = ">=6.0"
29 | billiard = "^4.2.1"
30 |
31 | [tool.poetry.group.semgrep]
32 | optional = true
33 |
34 | [tool.poetry.group.semgrep.dependencies]
35 | semgrep = {version = "1.86.0", markers = "sys_platform != 'win32'"}
36 |
37 | [tool.poetry.group.dev.dependencies]
38 | bandit = "*"
39 | pytest = "*"
40 |
41 | [build-system]
42 | requires = ["poetry-core"]
43 | build-backend = "poetry.core.masonry.api"
44 |
--------------------------------------------------------------------------------
/tests/assets/files/.semgrepignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ajinabraham/libsast/73f3fc47feef07b4f8c0bf8bcd96aa85643959c6/tests/assets/files/.semgrepignore
--------------------------------------------------------------------------------
/tests/assets/files/alternate.python:
--------------------------------------------------------------------------------
1 |
2 | socket(124)
3 |
4 | os.open(foo)
--------------------------------------------------------------------------------
/tests/assets/files/choice_test.python:
--------------------------------------------------------------------------------
1 | import urllib
2 | urllib.open('http://foo')
3 |
4 |
5 | import re
--------------------------------------------------------------------------------
/tests/assets/files/choice_test2.python:
--------------------------------------------------------------------------------
1 | import os
--------------------------------------------------------------------------------
/tests/assets/files/comments.java:
--------------------------------------------------------------------------------
1 | /*
2 | This
3 | is
4 | outputStream.close();
5 | comment
6 | */
7 | /**
8 | * The Calculator claoutputStream.close();
9 | */
10 | class Calculator {
11 | /** The outputStream.close(); of given numbers. */
12 | static int add(int a, int b) {
13 | return a + b;
14 | }
15 |
16 | /** The sub() method returns subtraction of given numbers. */
17 | static int sub(int a, int b) {
18 | return a - b;
19 | }
20 | }
21 |
22 | class CommentExample1 {
23 | static void main(String[] args) {
24 | int i = 10;// outputStream.close();
25 | System.out.println(i);
26 | }
27 | }
--------------------------------------------------------------------------------
/tests/assets/files/example_file.py:
--------------------------------------------------------------------------------
1 | """Boto3 test."""
2 | import boto3
3 |
4 | c = boto3.client(host='8.8.8.8')
5 |
--------------------------------------------------------------------------------
/tests/assets/files/handlebars_template.hbs:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/assets/files/test_matcher.test:
--------------------------------------------------------------------------------
1 | //RegexAnd
2 | import android.view.View;
3 | import android.webkit.WebView;
4 | import android.widget.Button;
5 | import android.widget.EditText;
6 | WebView engine = (WebView) (findViewById(R.id.webView1));
7 | engine.loadUrl("file:/" + Environment.getExternalStorageDirectory().getPath() + "testing.html");
8 |
9 |
10 | //RegexOr
11 | public static ForgeAccount add(Context context, ForgeAccount account) {
12 | // Initialise GSON
13 | Gson gson = new Gson();
14 | // Get the already saved Forge accounts
15 | HashMap accounts = FileManager.load(context);
16 | // Give the account a random UID
17 | account.setId(java.util.UUID.randomUUID());
18 | // Add the new account to the hash map
19 | accounts.put(account.getId(), account);
20 | // Convert the list of Forge Accounts to a JSON string
21 | String jsonString = gson.toJson(new ArrayList<>(accounts.values()));
22 | // Internal save
23 | FileOutputStream outputStream;
24 | try {
25 | // Save the JSON to the file
26 | outputStream = context.openFileOutput(context.getString(R.string.filename_forge_accounts), Context.MODE_WORLD_READABLE);
27 | outputStream.write(jsonString.getBytes());
28 | outputStream.close();
29 | return account;
30 | } catch (Exception e) {
31 | // If there is an error, log it
32 | Log.e(Forge.ERROR_LOG, e.getMessage());
33 | return null;
34 | }
35 | }
36 |
37 | //RegexAndNot
38 |
39 | import WebKit
40 | var webView: WKWebView!
41 | override func loadView() {
42 | webView = WKWebView()
43 | webView.navigationDelegate = self
44 | webview.javaScriptEnabled=true
45 | view = webView
46 | }
47 |
48 | //RegexAndOr
49 |
50 | import android.telephony.SmsManager;
51 | @Override
52 | public void onRequestPermissionsResult(int requestCode,String permissions[], int[] grantResults) {
53 | switch (requestCode) {
54 | case MY_PERMISSIONS_REQUEST_SEND_SMS: {
55 | if (grantResults.length > 0
56 | && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
57 | SmsManager smsManager = SmsManager.getDefault();
58 | smsManager.sendTextMessage(phoneNo, null, message, null, null);
59 | Toast.makeText(getApplicationContext(), "SMS sent.",
60 | Toast.LENGTH_LONG).show();
61 | } else {
62 | Toast.makeText(getApplicationContext(),
63 | "SMS faild, please try again.", Toast.LENGTH_LONG).show();
64 | return;
65 | }
66 | }
67 | }
68 |
69 | }
70 | }
71 |
72 | //RegexCase
73 |
74 | // Test case
75 | common MATCH1 me.
--------------------------------------------------------------------------------
/tests/assets/invalid/invalid_type.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | pattern: \.close\(\)
5 | severity: info
6 | type: regex
--------------------------------------------------------------------------------
/tests/assets/invalid/invalid_yaml.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | pattern: \.close\(\)
5 | severity: info
6 | type: regex
--------------------------------------------------------------------------------
/tests/assets/invalid/missing_pattern.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | severity: info
5 | type: Regex
--------------------------------------------------------------------------------
/tests/assets/invalid/missing_type.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | pattern: \.close\(\)
5 | severity: info
--------------------------------------------------------------------------------
/tests/assets/multiple/part1.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | pattern: \.close\(\)
5 | severity: info
6 | type: Regex
7 | - id: test_regex_and
8 | message: This is a rule to test regex_and
9 | input_case: exact
10 | pattern:
11 | - \.loadUrl\(.*getExternalStorageDirectory\(
12 | - webkit\.WebView
13 | severity: error
14 | type: RegexAnd
15 | - id: test_regex_or
16 | message: This is a rule to test regex_or
17 | input_case: exact
18 | pattern:
19 | - MODE_WORLD_READABLE|Context\.MODE_WORLD_READABLE
20 | - openFileOutput\(\s*".+"\s*,\s*1\s*\)
21 | severity: error
22 | type: RegexOr
23 |
--------------------------------------------------------------------------------
/tests/assets/multiple/part2.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex_and_not
2 | message: This is a rule to test regex_and_not
3 | input_case: exact
4 | pattern:
5 | - WKWebView
6 | - \.javaScriptEnabled=false
7 | severity: warning
8 | type: RegexAndNot
9 | - id: test_regex_and_or
10 | message: This is a rule to test regex_and_or
11 | input_case: exact
12 | pattern:
13 | - telephony.SmsManager
14 | - - sendMultipartTextMessage
15 | - sendTextMessage
16 | - vnd.android-dir/mms-sms
17 | severity: warning
18 | type: RegexAndOr
--------------------------------------------------------------------------------
/tests/assets/rules/choice_matcher/choice.yaml:
--------------------------------------------------------------------------------
1 | - id: rule1
2 | type: code
3 | choice_type: or
4 | message: Rule Desc
5 | selection: 'The rule has place holder {}'
6 | choice:
7 | - - import os
8 | - import os module
9 | - - import re
10 | - import re module
11 | meta: some extra
12 | - id: rule2
13 | type: code
14 | choice_type: and
15 | message: Rule Desc to show and match
16 | selection: 'The rule has placeholder {}'
17 | choice:
18 | - - - requests
19 | - \.get\(
20 | - request module used
21 | - - - urllib
22 | - \.open\(
23 | - urllib module used
24 | meta: some extra
25 | - id: rule3
26 | type: no-code
27 | choice_type: all
28 | message: Some desc for non code rule
29 | selection: 'The rule place holder {}.'
30 | choice:
31 | - - os\.open\(
32 | - OS Open
33 | - - os\.exec\(
34 | - Exec
35 | - - socket\(
36 | - Socket
37 | else: no API called
38 | meta: some extra
39 |
--------------------------------------------------------------------------------
/tests/assets/rules/pattern_matcher/patterns.yaml:
--------------------------------------------------------------------------------
1 | - id: test_regex
2 | message: This is a rule to test regex
3 | input_case: exact
4 | pattern: \.close\(\)
5 | severity: info
6 | type: Regex
7 | - id: test_regex_and
8 | message: This is a rule to test regex_and
9 | input_case: exact
10 | pattern:
11 | - \.loadUrl\(.*getExternalStorageDirectory\(
12 | - webkit\.WebView
13 | severity: error
14 | type: RegexAnd
15 | - id: test_regex_or
16 | message: This is a rule to test regex_or
17 | input_case: exact
18 | pattern:
19 | - MODE_WORLD_READABLE|Context\.MODE_WORLD_READABLE
20 | - openFileOutput\(\s*".+"\s*,\s*1\s*\)
21 | severity: error
22 | type: RegexOr
23 | - id: test_regex_and_not
24 | message: This is a rule to test regex_and_not
25 | input_case: exact
26 | pattern:
27 | - WKWebView
28 | - \.javaScriptEnabled=false
29 | severity: warning
30 | type: RegexAndNot
31 | - id: test_regex_and_or
32 | message: This is a rule to test regex_and_or
33 | input_case: exact
34 | pattern:
35 | - telephony.SmsManager
36 | - - sendMultipartTextMessage
37 | - sendTextMessage
38 | - vnd.android-dir/mms-sms
39 | severity: warning
40 | type: RegexAndOr
41 | - id: test_regex_multiline_and_metadata
42 | message: This is a rule to test regex
43 | input_case: exact
44 | pattern: ((?:public.+)+)
45 | severity: info
46 | type: Regex
47 | metadata:
48 | owasp-mobile: m1
49 | owasp-mobile-2024: m1
50 | owasp-web: a10
51 | cwe: cwe-1051
52 | masvs: storage-3
53 | foo: bar
54 | - id: test_regex_case
55 | message: This is a rule to test regex case
56 | input_case: lower
57 | pattern: match\d{1}
58 | severity: error
59 | type: Regex
60 |
--------------------------------------------------------------------------------
/tests/assets/rules/semantic_grep/sgrep.yaml:
--------------------------------------------------------------------------------
1 | rules:
2 | - id: boto-client-ip
3 | patterns:
4 | - pattern-inside: boto3.client(host="...")
5 | - pattern-regex: '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
6 | message: "boto client using IP address"
7 | languages:
8 | - python
9 | severity: ERROR
10 | metadata:
11 | owasp-web: a8
12 | cwe: cwe-1050
13 | - id: node_deserialize
14 | patterns:
15 | - pattern-inside: |
16 | require('node-serialize');
17 | ...
18 | - pattern: |
19 | $X.unserialize(...)
20 | message: >-
21 | User controlled data in 'unserialize()' or 'deserialize()' function can
22 | result in Object Injection or Remote Code Injection.
23 | languages:
24 | - javascript
25 | severity: ERROR
--------------------------------------------------------------------------------
/tests/integration/test_cli.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test cli."""
2 | import subprocess
3 |
4 |
5 | def test_cli_help():
6 | out = subprocess.check_output(['python', 'libsast'])
7 | assert out
8 |
9 |
10 | def test_cli_pattern_match():
11 | try:
12 | subprocess.check_output([
13 | 'python',
14 | 'libsast',
15 | '-p',
16 | 'tests/assets/rules/pattern_matcher/patterns.yaml',
17 | 'tests/assets/files/'],
18 | stderr=subprocess.STDOUT,
19 | )
20 | except subprocess.CalledProcessError as exc:
21 | assert exc.returncode == 1
22 | assert b'test_regex' in exc.output
23 |
--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ajinabraham/libsast/73f3fc47feef07b4f8c0bf8bcd96aa85643959c6/tests/unit/__init__.py
--------------------------------------------------------------------------------
/tests/unit/setup_test.py:
--------------------------------------------------------------------------------
1 | """Test Setup."""
2 | from pathlib import Path
3 |
4 | import libsast
5 |
6 |
7 | def scanner(options):
8 | base_dir = Path(__file__).parents[1]
9 | files_dir = base_dir / 'assets' / 'files'
10 | paths = [files_dir.as_posix()]
11 | rules_dir = base_dir / 'assets' / 'rules' / 'pattern_matcher'
12 | sgrep_dir = base_dir / 'assets' / 'rules' / 'semantic_grep'
13 | choice_dir = base_dir / 'assets' / 'rules' / 'choice_matcher'
14 | options['match_rules'] = rules_dir.as_posix()
15 | options['sgrep_rules'] = sgrep_dir.as_posix()
16 | options['choice_rules'] = choice_dir.as_posix()
17 | options['choice_extensions'] = {'.python'}
18 | options['alternative_path'] = files_dir / 'alternate.python'
19 | return libsast.Scanner(options, paths)
20 |
--------------------------------------------------------------------------------
/tests/unit/test_api.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test API."""
2 | from pathlib import Path
3 |
4 | import libsast
5 |
6 |
7 | def get_config():
8 | base_dir = Path(__file__).parents[1]
9 | rules_dir = base_dir / 'assets' / 'rules' / 'pattern_matcher'
10 | files_dir = base_dir / 'assets' / 'files'
11 | options = {'match_rules': rules_dir.as_posix()}
12 | paths = [files_dir.as_posix()]
13 | return options, paths
14 |
15 |
16 | def test_no_rule():
17 | assert libsast.Scanner({}, []).scan() == {}
18 |
19 |
20 | def test_no_path():
21 | options, _ = get_config()
22 | assert libsast.Scanner(options, []).scan() == {}
23 |
24 |
25 | def test_pattern_matcher_dir():
26 | options, paths = get_config()
27 | result = libsast.Scanner(options, paths).scan()
28 | assert result['pattern_matcher']['test_regex_or']
29 |
30 |
31 | def test_pattern_matcher_file():
32 | options, paths = get_config()
33 | file_path = [paths[0] + '/test_matcher.test']
34 | result = libsast.Scanner(options, file_path).scan()
35 | assert result['pattern_matcher']['test_regex']
36 |
--------------------------------------------------------------------------------
/tests/unit/test_matchers.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test pattern matchers."""
2 | from .setup_test import scanner
3 |
4 |
5 | def test_pattern_matcher():
6 | result = scanner({}).scan()
7 | assert result['pattern_matcher']['test_regex']
8 | assert result['pattern_matcher']['test_regex_and']
9 | assert result['pattern_matcher']['test_regex_or']
10 | assert result['pattern_matcher']['test_regex_and_not']
11 | assert result['pattern_matcher']['test_regex_and_or']
12 | assert result['pattern_matcher']['test_regex_multiline_and_metadata']
13 | assert result['pattern_matcher']['test_regex_case']
14 |
15 |
16 | def test_choice_matcher():
17 | result = scanner({}).scan()
18 | assert result['choice_matcher']['rule1']
19 | assert result['choice_matcher']['rule2']
20 | assert result['choice_matcher']['rule3']
21 |
--------------------------------------------------------------------------------
/tests/unit/test_options.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test pattern matchers."""
2 | from .setup_test import scanner
3 |
4 | from pathlib import Path
5 |
6 |
7 | def test_scan_test():
8 | options = {'match_extensions': ['.test']}
9 | result = scanner(options).scan()
10 | assert result['pattern_matcher']['test_regex']
11 |
12 |
13 | def test_scan_ext():
14 | options = {'match_extensions': ['.test']}
15 | result = scanner(options).scan()
16 | assert result['pattern_matcher'] is not None
17 |
18 |
19 | def test_scan_ext_not_present():
20 | options = {'match_extensions': ['.html']}
21 | result = scanner(options).scan()
22 | assert result['pattern_matcher'] == {}
23 |
24 |
25 | def test_ignore_extensions():
26 | options = {'ignore_extensions': ['.test', '.python']}
27 | result = scanner(options).scan()
28 | assert result['pattern_matcher'] == {}
29 | assert result['choice_matcher'] != {}
30 |
31 |
32 | def test_ignore_filenames():
33 | options = {'ignore_filenames': ['test_matcher.test']}
34 | result = scanner(options).scan()
35 | assert result['pattern_matcher'] == {}
36 |
37 |
38 | def test_ignore_paths():
39 | base_dir = Path(__file__).parents[1]
40 | files_dir = base_dir / 'assets' / 'files'
41 | paths = [files_dir.as_posix()]
42 | options = {'ignore_paths': paths}
43 | result = scanner(options).scan()
44 | assert result == {}
45 |
--------------------------------------------------------------------------------
/tests/unit/test_rules.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test pattern matcher rule file loading."""
2 | from pathlib import Path
3 |
4 | import libsast
5 |
6 | import pytest
7 |
8 |
9 | def test_load_dir():
10 | base_dir = Path(__file__).parents[1]
11 | files_dir = base_dir / 'assets' / 'files'
12 | rules_dir = base_dir / 'assets' / 'rules' / 'pattern_matcher'
13 | options = {'match_rules': rules_dir.as_posix()}
14 | paths = [files_dir.as_posix()]
15 | res = libsast.Scanner(options, paths).scan()
16 | assert res['pattern_matcher']['test_regex_or']
17 |
18 |
19 | def test_load_multiple_rules():
20 | base_dir = Path(__file__).parents[1]
21 | files_dir = base_dir / 'assets' / 'files'
22 | rules_dir = base_dir / 'assets' / 'multiple'
23 | options = {'match_rules': rules_dir.as_posix()}
24 | paths = [files_dir.as_posix()]
25 | res = libsast.Scanner(options, paths).scan()['pattern_matcher']
26 | assert res['test_regex_or']
27 | assert res['test_regex']
28 | assert res['test_regex_and']
29 | assert res['test_regex_or']
30 | assert res['test_regex_and_not']
31 | assert res['test_regex_or']
32 |
33 |
34 | def test_load_file():
35 | base_dir = Path(__file__).parents[1]
36 | files_dir = base_dir / 'assets' / 'files'
37 | rule_file = base_dir / 'assets' / 'rules'
38 | rule_file = rule_file / 'pattern_matcher' / 'patterns.yaml'
39 | options = {'match_rules': rule_file.as_posix()}
40 | paths = [files_dir.as_posix()]
41 | res = libsast.Scanner(options, paths).scan()
42 | assert res['pattern_matcher']['test_regex_or']
43 |
44 |
45 | def test_load_url():
46 | rule_url = ('https://raw.githubusercontent.com/ajinabraham/'
47 | 'libsast/master/tests/assets/rules/'
48 | 'pattern_matcher/patterns.yaml')
49 | base_dir = Path(__file__).parents[1]
50 | files_dir = base_dir / 'assets' / 'files'
51 | options = {'match_rules': rule_url}
52 | paths = [files_dir.as_posix()]
53 | res = libsast.Scanner(options, paths).scan()
54 | assert res['pattern_matcher']['test_regex_or']
55 |
56 |
57 | def test_load_invalid_url():
58 | rule_url = ('https://raw.githubusercontent.com/ajinabraham/'
59 | 'libsast/master/tests/assets/rules')
60 | base_dir = Path(__file__).parents[1]
61 | files_dir = base_dir / 'assets' / 'files'
62 | options = {'match_rules': rule_url}
63 | paths = [files_dir.as_posix()]
64 | with pytest.raises(libsast.exceptions.RuleDownloadError):
65 | libsast.Scanner(options, paths).scan()
66 |
67 |
68 | def test_load_file_invalid_path():
69 | base_dir = Path(__file__).parents[1]
70 | files_dir = base_dir / 'assets' / 'files'
71 | rule_file = base_dir / 'assets' / 'rules' / 'patterns.yoo'
72 | options = {'match_rules': rule_file.as_posix()}
73 | paths = [files_dir.as_posix()]
74 | with pytest.raises(libsast.exceptions.InvalidRuleError):
75 | libsast.Scanner(options, paths).scan()
76 |
77 |
78 | def test_load_file_invalid_yaml():
79 | base_dir = Path(__file__).parents[1]
80 | files_dir = base_dir / 'assets' / 'files'
81 | rule_file = base_dir / 'assets' / 'invalid' / 'invalid_yaml.yaml'
82 | options = {'match_rules': rule_file.as_posix()}
83 | paths = [files_dir.as_posix()]
84 | with pytest.raises(libsast.exceptions.YamlRuleParseError):
85 | libsast.Scanner(options, paths).scan()
86 |
87 |
88 | def test_load_file_invalid_type():
89 | base_dir = Path(__file__).parents[1]
90 | files_dir = base_dir / 'assets' / 'files'
91 | rule_file = base_dir / 'assets' / 'invalid' / 'invalid_type.yaml'
92 | options = {'match_rules': rule_file.as_posix()}
93 | paths = [files_dir.as_posix()]
94 | with pytest.raises(libsast.exceptions.MatcherNotFoundError):
95 | libsast.Scanner(options, paths).scan()
96 |
97 |
98 | def test_load_file_missing_type():
99 | base_dir = Path(__file__).parents[1]
100 | files_dir = base_dir / 'assets' / 'files'
101 | rule_file = base_dir / 'assets' / 'invalid' / 'missing_type.yaml'
102 | options = {'match_rules': rule_file.as_posix()}
103 | paths = [files_dir.as_posix()]
104 | with pytest.raises(libsast.exceptions.PatternKeyMissingError):
105 | libsast.Scanner(options, paths).scan()
106 |
107 |
108 | def test_load_file_missing_pattern():
109 | base_dir = Path(__file__).parents[1]
110 | files_dir = base_dir / 'assets' / 'files'
111 | rule_file = base_dir / 'assets' / 'invalid' / 'missing_pattern.yaml'
112 | options = {'match_rules': rule_file.as_posix()}
113 | paths = [files_dir.as_posix()]
114 | with pytest.raises(libsast.exceptions.PatternKeyMissingError):
115 | libsast.Scanner(options, paths).scan()
116 |
117 |
118 | def test_ignore_comments():
119 | base_dir = Path(__file__).parents[1]
120 | files_dir = base_dir / 'assets' / 'files' / 'comments.java'
121 | rule_file = base_dir / 'assets' / 'rules'
122 | rule_file = rule_file / 'pattern_matcher' / 'patterns.yaml'
123 | options = {'match_rules': rule_file.as_posix()}
124 | paths = [files_dir.as_posix()]
125 | res = libsast.Scanner(options, paths).scan()
126 | assert res['pattern_matcher'] == {}
127 |
--------------------------------------------------------------------------------
/tests/unit/test_semgrep.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test semantic grep patterns."""
2 | from .setup_test import scanner
3 |
4 | from libsast import (
5 | standards,
6 | )
7 |
8 |
9 | def test_semgrep():
10 | stds = standards.get_standards()
11 | result = scanner({}).scan()
12 | match = result['semantic_grep']['matches']['boto-client-ip']
13 | assert match
14 | assert match['files'][0]['match_position']
15 | assert match['files'][0]['match_lines']
16 | assert match['files'][0]['file_path']
17 | assert match['metadata']
18 | assert match['metadata']['description']
19 | assert match['metadata']['severity']
20 | assert match['metadata']['cwe'] == stds['cwe']['cwe-1050']
21 | assert match['metadata']['owasp-web'] == stds['owasp-web']['a8']
22 |
--------------------------------------------------------------------------------
/tests/unit/test_standards.py:
--------------------------------------------------------------------------------
1 | """Unit Tests - test standards mapping."""
2 | from pathlib import Path
3 |
4 | from libsast import (
5 | Scanner,
6 | standards,
7 | )
8 |
9 |
10 | def test_standards_mapping():
11 | stds = standards.get_standards()
12 | base_dir = Path(__file__).parents[1]
13 | files_dir = base_dir / 'assets' / 'files'
14 | rules_dir = base_dir / 'assets' / 'rules' / 'pattern_matcher'
15 | options = {'match_rules': rules_dir.as_posix()}
16 | paths = [files_dir.as_posix()]
17 | res = Scanner(options, paths).scan()
18 | match = res['pattern_matcher']['test_regex_multiline_and_metadata']
19 | assert match
20 | assert match['metadata']
21 | assert match['metadata']['cwe'] == stds['cwe']['cwe-1051']
22 | assert match['metadata']['owasp-mobile'] == stds['owasp-mobile']['m1']
23 | assert match['metadata']['owasp-mobile-2024'] == stds['owasp-mobile-2024']['m1']
24 | assert match['metadata']['owasp-web'] == stds['owasp-web']['a10']
25 | assert match['metadata']['masvs'] == stds['masvs']['storage-3']
26 | assert match['metadata']['foo'] == 'bar'
27 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py39, py310
3 | skipsdist = True
4 |
5 | [testenv]
6 | deps =
7 | poetry
8 | commands =
9 | poetry run pytest -v --cache-clear tests
10 | setenv =
11 | SEMGREP_R2C_INTERNAL_EXPLICIT_SEMGREPIGNORE = ./tests/assets/files/.semgrepignore
12 |
13 | [testenv:lint]
14 | skip_install = true
15 | deps =
16 | autopep8
17 | pydocstyle
18 | flake8
19 | flake8-broken-line
20 | flake8-bugbear
21 | flake8-builtins
22 | flake8-colors
23 | flake8-commas
24 | flake8-comprehensions
25 | flake8-docstrings
26 | flake8-eradicate
27 | flake8-import-order
28 | flake8-logging-format
29 | flake8-quotes
30 | flake8-self
31 | pep8-naming
32 | radon
33 | commands =
34 | autopep8 --in-place --recursive --exclude venv,*.pyc .
35 | flake8 {posargs}
36 |
37 | [testenv:bandit]
38 | skip_install = true
39 | deps =
40 | poetry
41 | commands =
42 | poetry run bandit -ll libsast -r
43 |
44 | [testenv:build]
45 | skip_install = true
46 | allowlist_externals =
47 | bash
48 | deps =
49 | poetry
50 | commands =
51 | poetry build
52 | bash -c 'python -m pip install dist/*.whl'
53 | libsast
54 |
55 | [testenv:clean]
56 | deps =
57 | skip_install = true
58 | allowlist_externals =
59 | bash
60 | find
61 | rm
62 | commands =
63 | find . -name "*.py[co]" -delete
64 | bash -c 'find . -name "__pycache__" -exec rm -fr \{\} \; -prune'
65 | bash -c 'find . -depth -name "*.egg-info" -exec rm -fr \{\} \;'
66 | rm -rf .pytest_cache
67 | rm -rf build
68 | rm -rf dist
69 |
70 | ; Reference for error codes:
71 | ; http://www.pydocstyle.org/en/latest/error_codes.html
72 | [flake8]
73 | enable-extensions = G
74 | exclude =
75 | .tox
76 | .git
77 | __pycache__
78 | build
79 | dist
80 | venv
81 | ignore =
82 | D101,
83 | # Missing docstring in public class
84 | D103,
85 | # Missing docstring in public function
86 | D104,
87 | # Missing docstring
88 | D107,
89 | # docstring is not mandatory
90 | D401,
91 | # Allow non imperative mood
92 | Q003,
93 | # Allow only ' for strings
94 | I100,
95 | # Use python sort imports
96 | SF01,
97 | # Allow Private member access
98 | radon_max_cc = 10
99 | max-line-length = 88
--------------------------------------------------------------------------------