├── .github
└── workflows
│ ├── pylint.yml
│ ├── python-app.yml
│ ├── run-yara-forge.yml
│ └── weekly-release.yml
├── .gitignore
├── .gitmodules
├── .vscode
└── settings.json
├── LICENSE
├── README.md
├── main
├── __init__.py
├── other_evals.py
├── rule_collector.py
├── rule_output.py
└── rule_processors.py
├── qa
├── __init__.py
└── rule_qa.py
├── requirements.txt
├── tests
└── test_rule_collector.py
├── yara-forge-config.yml
├── yara-forge-custom-scoring.yml
└── yara-forge.py
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
1 | name: Pylint
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: ["3.8", "3.9", "3.10"]
11 | steps:
12 | - name: Check out repository with submodules
13 | uses: actions/checkout@v3
14 | with:
15 | submodules: 'recursive' # Fetches all submodules recursively
16 | - name: Set up Python ${{ matrix.python-version }}
17 | uses: actions/setup-python@v3
18 | with:
19 | python-version: ${{ matrix.python-version }}
20 | - name: Install dependencies
21 | run: |
22 | python -m pip install --upgrade pip
23 | pip install pylint
24 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
25 | - name: Analysing the code with pylint
26 | run: |
27 | pylint --fail-under=9.0 $(git ls-files '*.py')
28 |
--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Python application
5 |
6 | on:
7 | push:
8 | branches: [ "master" ]
9 | pull_request:
10 | branches: [ "master" ]
11 |
12 | permissions:
13 | contents: read
14 |
15 | jobs:
16 | build:
17 |
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - name: Check out repository with submodules
22 | uses: actions/checkout@v3
23 | with:
24 | submodules: 'recursive' # Fetches all submodules recursively
25 | - name: Set up Python 3.10
26 | uses: actions/setup-python@v3
27 | with:
28 | python-version: "3.10"
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install flake8 pytest
33 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
34 | - name: Lint with flake8
35 | run: |
36 | # stop the build if there are Python syntax errors or undefined names
37 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
38 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
39 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
40 | - name: Test with pytest
41 | run: |
42 | python -m pytest tests
43 |
--------------------------------------------------------------------------------
/.github/workflows/run-yara-forge.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3 |
4 | name: Run YARA-Forge
5 |
6 | on:
7 | push:
8 | branches: [ "master" ]
9 | pull_request:
10 | branches: [ "master" ]
11 |
12 | permissions:
13 | contents: read
14 |
15 | jobs:
16 | build:
17 |
18 | runs-on: ubuntu-latest
19 |
20 | steps:
21 | - name: Check out repository with submodules
22 | uses: actions/checkout@v3
23 | with:
24 | submodules: 'recursive'
25 |
26 | - name: Set up Python 3.10
27 | uses: actions/setup-python@v3
28 | with:
29 | python-version: "3.10"
30 |
31 | - name: Install system dependencies
32 | run: |
33 | sudo apt-get update
34 | sudo apt-get install -y g++ python3-dev libre2-dev
35 |
36 | - name: Confirm libre2-dev headers
37 | run: |
38 | dpkg -L libre2-dev | grep re2.h || echo "re2.h not found"
39 | test -f /usr/include/re2/re2.h && echo "Header exists ✅" || (echo "Header missing ❌" && exit 1)
40 |
41 | - name: Install Python dependencies
42 | run: |
43 | python -m pip install --upgrade pip
44 | pip install -r requirements.txt
45 | pip install -r qa/yaraQA/requirements.txt
46 |
47 | - name: Run YARA-Forge
48 | run: |
49 | python yara-forge.py
50 |
--------------------------------------------------------------------------------
/.github/workflows/weekly-release.yml:
--------------------------------------------------------------------------------
1 | name: Weekly Release
2 |
3 | permissions:
4 | contents: write
5 |
6 | on:
7 | schedule:
8 | - cron: '0 0 * * 0' # Runs at 00:00 every Sunday
9 | workflow_dispatch:
10 |
11 | jobs:
12 | create-release:
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - name: Check out repository with submodules
17 | uses: actions/checkout@v3
18 | with:
19 | submodules: 'recursive'
20 |
21 | - name: Set up Python 3.10
22 | uses: actions/setup-python@v3
23 | with:
24 | python-version: "3.10"
25 |
26 | - name: Install system dependencies
27 | run: |
28 | sudo apt-get update
29 | sudo apt-get install -y g++ python3-dev libre2-dev
30 |
31 | - name: Confirm libre2-dev headers
32 | run: |
33 | dpkg -L libre2-dev | grep re2.h || echo "re2.h not found"
34 | test -f /usr/include/re2/re2.h && echo "Header exists ✅" || (echo "Header missing ❌" && exit 1)
35 |
36 | - name: Install Python dependencies
37 | run: |
38 | python -m pip install --upgrade pip
39 | pip install -r requirements.txt
40 | pip install -r qa/yaraQA/requirements.txt
41 |
42 | - name: Run YARA-Forge
43 | run: |
44 | python yara-forge.py
45 |
46 | - name: Get current date
47 | run: echo "CURRENT_DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
48 | shell: bash
49 |
50 | - name: Zip and upload packages
51 | run: |
52 | for folder in packages/*; do
53 | if [ -d "$folder" ]; then
54 | foldername=$(basename "$folder")
55 | zipfile="yara-forge-rules-${foldername}.zip"
56 | zip -r "$zipfile" "$folder"
57 | echo "${foldername}_zip_path=${zipfile}" >> zip_paths.env
58 | fi
59 | done
60 | id: zip_files
61 | shell: bash
62 |
63 | - name: Set zip paths as env
64 | run: cat zip_paths.env >> $GITHUB_ENV
65 |
66 | - name: Create Release
67 | id: create_release
68 | uses: softprops/action-gh-release@v1
69 | with:
70 | tag_name: ${{ env.CURRENT_DATE }}
71 | name: YARA Forge Rule Set Release ${{ env.CURRENT_DATE }}
72 | body_path: build_stats.md
73 | draft: false
74 | prerelease: false
75 | env:
76 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
77 |
78 | - name: Upload build log file
79 | if: always()
80 | uses: actions/upload-release-asset@v1
81 | env:
82 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83 | with:
84 | upload_url: ${{ steps.create_release.outputs.upload_url }}
85 | asset_path: yara-forge.log
86 | asset_name: yara-forge-log.txt
87 | asset_content_type: text/plain
88 |
89 | - name: Upload rule issues log file
90 | if: always()
91 | uses: actions/upload-release-asset@v1
92 | env:
93 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
94 | with:
95 | upload_url: ${{ steps.create_release.outputs.upload_url }}
96 | asset_path: yara-forge-rule-issues.yml
97 | asset_name: yara-forge-rule-issues.yml
98 | asset_content_type: text/plain
99 |
100 | - name: Upload core asset
101 | if: always()
102 | uses: actions/upload-release-asset@v1
103 | env:
104 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
105 | with:
106 | upload_url: ${{ steps.create_release.outputs.upload_url }}
107 | asset_path: ${{ env.core_zip_path }}
108 | asset_name: yara-forge-rules-core.zip
109 | asset_content_type: application/zip
110 |
111 | - name: Upload extended asset
112 | if: always()
113 | uses: actions/upload-release-asset@v1
114 | env:
115 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
116 | with:
117 | upload_url: ${{ steps.create_release.outputs.upload_url }}
118 | asset_path: ${{ env.extended_zip_path }}
119 | asset_name: yara-forge-rules-extended.zip
120 | asset_content_type: application/zip
121 |
122 | - name: Upload full asset
123 | if: always()
124 | uses: actions/upload-release-asset@v1
125 | env:
126 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
127 | with:
128 | upload_url: ${{ steps.create_release.outputs.upload_url }}
129 | asset_path: ${{ env.full_zip_path }}
130 | asset_name: yara-forge-rules-full.zip
131 | asset_content_type: application/zip
132 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | packages/*
3 | *.pyc
4 | yara-forge.log
5 | repos/*
6 | yara-forge-rule-issues.yml
7 | build_stats.md
8 | yara-forge-config-testing.yml
9 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "qa/yaraQA"]
2 | path = qa/yaraQA
3 | url = https://github.com/Neo23x0/yaraQA.git
4 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cSpell.words": [
3 | "dateparser"
4 | ]
5 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yara-forge
2 |
3 | Automated YARA Rule Standardization and Quality Assurance Tool
4 |
5 | YARA Forge is a robust tool designed to streamline the process of sourcing, standardizing, and optimizing YARA rules. It automates the collection of rules from various online repositories, ensures they adhere to a unified standard, conducts thorough quality checks, and eliminates any broken or non-compliant rules.
6 |
7 | The tool generates curated rule packages, ready for integration into various security products, with an emphasis on performance and stability.
8 |
9 | Perfect for analysts and security teams seeking consistent, reliable, and effective YARA rules.
10 |
11 | This [web page](https://yarahq.github.io/) contains all information on the YARA Forge project.
12 |
13 | Note: the repositories used for YARA Forge have been carefully selected. If you want to add other sets that random people publish on the Internet, you're on your own.
14 |
--------------------------------------------------------------------------------
/main/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YARAHQ/yara-forge/641a61f4d694c8f6b378ca093165d62e42205688/main/__init__.py
--------------------------------------------------------------------------------
/main/other_evals.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains the code for the performance tests.
3 | """
4 | import re
5 | import logging
6 | import time
7 | import zipfile
8 |
9 | class PerformanceTimer:
10 | """
11 | Performance Tests
12 | """
13 |
14 | # The test file was created with the following commands:
15 | # ReactOS v0.4.14 ISO folder > reactos
16 | # strings * >> react-os-strings.txt
17 | # strings -el * >> react-os-strings.txt
18 | # strings ./system32/ * >> react-os-strings.txt
19 | # strings -el ./system32/ * >> react-os-strings.txt
20 | sample_data_file = "./tests/data/react-os-strings.txt.zip"
21 |
22 | def __init__(self):
23 | # Load the sample data file, decompress the ZIP archive and load into memory
24 | with zipfile.ZipFile(self.sample_data_file, 'r') as zip_ref:
25 | for name in zip_ref.namelist():
26 | with zip_ref.open(name) as f:
27 | self.test_string = f.read().decode("utf-8")
28 | # Run the baseline measurements
29 | self.bad_duration, self.good_duration = self.baseline_measurements()
30 | # Define the threshold for the regex strings
31 | self.threshold = ( self.bad_duration + self.good_duration ) / 2
32 | logging.debug("Regex Baseline Threshold: %f", self.threshold)
33 |
34 | def baseline_measurements(self):
35 | """
36 | Test the performance of the baseline regex.
37 | """
38 | # Log the start of the baseline measurements
39 | logging.debug("Starting the regex baseline measurements")
40 |
41 | # Test the performance of the baseline regex
42 | bad_duration = self.test_regex_performance(r"[\w\-.]{1,3}@[\w\-.]{1,3}")
43 | logging.debug("Bad regex duration: %f", bad_duration)
44 | good_duration = self.test_regex_performance(r"Who is John Galt\?")
45 | logging.debug("Good regex duration: %f", good_duration)
46 |
47 | return bad_duration, good_duration
48 |
49 | def test_regex_performance(self, regex, iterations=5):
50 | """
51 | Test the performance of a regex.
52 | """
53 | # Remove a '/' at the beginning and end of the regex
54 | if regex[0] == '/' and regex[-1] == '/':
55 | regex = regex[1:-1]
56 | try:
57 | # Compile the regex first for better performance
58 | pattern = re.compile(regex)
59 | except re.error as e:
60 | logging.error("Regex error: %s", e)
61 | return 0
62 |
63 | # Record the start time
64 | start_time = time.time()
65 |
66 | # Apply the regex to the test string for the given number of iterations
67 | for _ in range(iterations):
68 | re.findall(pattern, self.test_string)
69 |
70 | # Record the end time
71 | end_time = time.time()
72 |
73 | # Calculate the total duration
74 | duration = end_time - start_time
75 |
76 | return duration
77 |
--------------------------------------------------------------------------------
/main/rule_collector.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains functions for retrieving YARA rules from online repositories.
3 | """
4 | import os
5 | import shutil
6 | import datetime
7 | import logging
8 | #from pprint import pprint
9 | import plyara
10 | from git import Repo
11 |
12 |
13 | def process_yara_file(file_path, repo_folder, yara_rule_sets):
14 | # Debug output
15 | logging.debug("Found YARA rule file: %s", file_path)
16 |
17 | # Read the YARA file
18 | with open(file_path, "r", encoding="utf-8") as f:
19 | yara_file_content = f.read()
20 | # Parse the rules in the file
21 | try:
22 | # Get the rule file path in the repository
23 | relative_path = os.path.relpath(file_path, start=repo_folder)
24 | # Parse the YARA rules in the file
25 | yara_parser = plyara.Plyara()
26 | yara_rules = yara_parser.parse_string(yara_file_content)
27 | # Create a YARA rule set object
28 | yara_rule_set = {
29 | "rules": yara_rules,
30 | "file_path": relative_path,
31 | }
32 | # Debug output
33 | logging.debug("Found %d YARA rules in file: %s",
34 | len(yara_rules), file_path)
35 | # Append to list of YARA rule sets
36 | yara_rule_sets.append(yara_rule_set)
37 |
38 | except Exception as e:
39 | print(e)
40 | logging.error("Skipping YARA rule in the following " \
41 | "file because of a syntax error: %s ", file_path)
42 |
43 |
44 | def retrieve_yara_rule_sets(repo_staging_dir, yara_repos):
45 | """
46 | Retrieves YARA rules from online repositories.
47 | """
48 |
49 | # The list of YARA rule sets of all repositories
50 | yara_rule_repo_sets = []
51 |
52 | # Check if the directory exists
53 | if os.path.exists(repo_staging_dir):
54 | # Remove the existing repo directory and all its contents
55 | shutil.rmtree(os.path.join(repo_staging_dir), ignore_errors=False)
56 |
57 | # Loop over the repositories
58 | for repo in yara_repos:
59 |
60 | # Output the repository information to the console in a single line
61 | logging.info("Retrieving YARA rules from repository: %s", repo['name'])
62 |
63 | # Extract the owner and the repository name from the URL
64 | repo_url_parts = repo['url'].split("/")
65 | repo['owner'] = repo_url_parts[3]
66 | repo['repo'] = '/'.join(repo_url_parts[4:]).split(".")[0]
67 |
68 | # If the repository hasn't not been cloned yet, clone it
69 | if not os.path.exists(os.path.join(repo_staging_dir, repo['owner'], repo['repo'])):
70 | # Clone the repository
71 | repo_folder = os.path.join(repo_staging_dir, repo['owner'], repo['repo'])
72 | repo['commit_hash'] = Repo.clone_from(repo['url'], repo_folder, branch=repo['branch']).head.commit.hexsha
73 | else:
74 | # Get the latest commit hash
75 | repo_folder = os.path.join(repo_staging_dir, repo['owner'], repo['repo'])
76 | repo['commit_hash'] = Repo(repo_folder).head.commit.hexsha
77 |
78 | # Walk through the extracted folders and find a LICENSE file
79 | # and save it into the repository object
80 | repo['license'] = "NO LICENSE SET"
81 | repo['license_url'] = "N/A"
82 | for root, dir, files in os.walk(repo_folder):
83 | for file in files:
84 | if file == "LICENSE" or file == "LICENSE.txt" or file == "LICENSE.md":
85 | file_path = os.path.join(root, file)
86 | url_path = os.path.relpath(file_path, start=repo_folder)
87 | if root == repo_folder: # Check if the file is in the root directory
88 | repo['license_url'] = f'{repo["url"]}/blob/{repo["commit_hash"]}/{url_path}'
89 | with open(file_path, "r", encoding="utf-8") as f:
90 | repo['license'] = f.read()
91 | break # if we found the license in the root directory, we don't need to look further
92 | elif 'license_url' not in repo: # If the file is not in the root directory and no license has been found yet
93 | repo['license_url'] = f'{repo["url"]}/blob/{repo["commit_hash"]}/{url_path}'
94 | with open(file_path, "r", encoding="utf-8") as f:
95 | repo['license'] = f.read()
96 |
97 | # Walk through the extracted folders and find all YARA files
98 | yara_rule_sets = []
99 |
100 | # Walk a sub folder if one is set in the config
101 | walk_folder = repo_folder
102 | if 'path' in repo:
103 | walk_folder = os.path.join(repo_folder, repo['path'])
104 | # Print the processed folder
105 | logging.debug("Processing folder: %s", walk_folder)
106 |
107 | # Check if the path should be walked
108 | recursive = True
109 | # Check if the path should be walked
110 | if 'recursive' in repo:
111 | recursive = repo['recursive']
112 |
113 | if recursive:
114 | # Walk the folder recursively
115 | for root, _, files in os.walk(walk_folder):
116 | for file in files:
117 | if file.endswith(".yar") or file.endswith(".yara"):
118 | file_path = os.path.join(root, file)
119 | process_yara_file(file_path, repo_folder, yara_rule_sets)
120 | else:
121 | # Only walk the top-level directory
122 | for file in os.listdir(walk_folder):
123 | file_path = os.path.join(walk_folder, file)
124 | if os.path.isfile(file_path) and (file.endswith(".yar") or file.endswith(".yara")):
125 | process_yara_file(file_path, repo_folder, yara_rule_sets)
126 |
127 | # Append the YARA rule repository
128 | yara_rule_repo = {
129 | "name": repo['name'],
130 | "url": repo['url'],
131 | "author": repo['author'],
132 | "owner": repo['owner'],
133 | "repo": repo['repo'],
134 | "branch": repo['branch'],
135 | "rules_sets": yara_rule_sets,
136 | "quality": repo['quality'],
137 | "license": repo['license'],
138 | "license_url": repo['license_url'],
139 | "commit_hash": repo['commit_hash'],
140 | "retrieval_date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
141 | "repo_path": repo_folder,
142 | }
143 | yara_rule_repo_sets.append(yara_rule_repo)
144 |
145 | # Output the number of YARA rules retrieved from the repository
146 | logging.info("Retrieved %d YARA rules from repository: %s",
147 | len(yara_rule_sets), repo['name'])
148 |
149 | # Return the YARA rule sets
150 | return yara_rule_repo_sets
151 |
--------------------------------------------------------------------------------
/main/rule_output.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains functions for writing YARA rules into separate files.
3 | """
4 | import os
5 | import logging
6 | import datetime
7 | import re
8 | from pprint import pprint
9 | import dateparser
10 | from plyara.utils import rebuild_yara_rule
11 |
12 |
13 | def write_yara_packages(processed_yara_repos, program_version, yaraqa_commit, YARA_FORGE_CONFIG):
14 | """
15 | Writes YARA rules into separate files.
16 | """
17 |
18 | # List of files that were written
19 | package_files = []
20 |
21 | rule_package_statistics_sets = []
22 |
23 | # Loop over the rule packages
24 | for rule_package in YARA_FORGE_CONFIG['yara_rule_packages']:
25 |
26 | # Statistics for the rule package
27 | rule_package_statistics = {
28 | "total_rules": 0,
29 | "total_rules_skipped_age": 0,
30 | "total_rules_skipped_quality": 0,
31 | "total_rules_skipped_importance": 0,
32 | "total_rules_skipped_score": 0,
33 | "repo_statistics": [],
34 | "name": rule_package['name'],
35 | }
36 |
37 | # Create the directory for the rule package
38 | package_dir = os.path.join("packages", rule_package['name'])
39 | if not os.path.exists(package_dir):
40 | os.makedirs(package_dir)
41 | # Create the rule file name
42 | rule_file_name = f"yara-rules-{rule_package['name']}.yar"
43 | # Create the rule file path
44 | rule_file_path = os.path.join(package_dir, rule_file_name)
45 |
46 | # Write information about the rule package, the output file name
47 | # and the output file path to the console
48 | logging.info("------------------------------------------------------------------------")
49 | logging.info("Creating YARA rule package '%s': %s", rule_package['name'], rule_file_path)
50 | logging.info("Description: %s", rule_package['description'])
51 | logging.info("Minimum Quality: %d", rule_package['minimum_quality'])
52 | logging.info("Minimum Age: %d", rule_package['minimum_age'])
53 | logging.info("Output File: %s", rule_file_path)
54 |
55 | # List of strings composed of the rules from each repository
56 | output_rule_set_strings = []
57 |
58 | # Loop over the repositories
59 | for repo in processed_yara_repos:
60 | # Debug output
61 | logging.info("Writing YARA rules from repository: %s", repo['name'])
62 |
63 | # Repo rule set string
64 | repo_rules_strings = []
65 | already_added_priv_rules = []
66 |
67 | # Statistics for the rule package
68 | rule_repo_statistics = {
69 | "total_rules": 0,
70 | "total_rules_skipped_age": 0,
71 | "total_rules_skipped_quality": 0,
72 | "total_rules_skipped_importance": 0,
73 | "total_rules_skipped_score": 0,
74 | }
75 |
76 | # Loop over the rule sets in the repository and modify the rules
77 | for rule_sets in repo['rules_sets']:
78 | # Debug output
79 | logging.debug("Writing YARA rules from rule set: %s", rule_sets['file_path'])
80 | # List of required private rules
81 | required_private_rules = []
82 | # Loop over the rules in the rule set
83 | for rule in rule_sets['rules']:
84 |
85 | # Debug output
86 | #pprint(rule)
87 |
88 | # Perform some check based on the meta data of the rule
89 | skip_rule = False
90 | skip_rule_reason = None
91 | # Some values that will help with the decision whether to skip the rule
92 | importance = None
93 | # Loop over the metadata
94 | for metadata in rule['metadata']:
95 |
96 | # Age check ------------------------------------------------------
97 | # Check if the rule has a minimum age
98 | if "modified" in metadata:
99 | rule_date = dateparser.parse(metadata['modified'])
100 | if rule_date is not None: # Check the rule_date is a valid date
101 | # Check if the rule is old enough
102 | if (datetime.datetime.now() - rule_date).days < rule_package['minimum_age']:
103 | skip_rule = True
104 | skip_rule_reason = "age"
105 | # Check if the rule is younger than the maximum age
106 | if "date" in metadata:
107 | rule_date = dateparser.parse(metadata['date'])
108 | if rule_date is not None: # Check the rule_date is a valid date
109 | # Check if the rule is old enough
110 | if (datetime.datetime.now() - rule_date).days > rule_package['max_age']:
111 | skip_rule = True
112 | skip_rule_reason = "age"
113 |
114 | # Score check ----------------------------------------------------
115 | if "score" in metadata:
116 | # Check if the rule has the require score
117 | if metadata['score'] < rule_package['minimum_score']:
118 | skip_rule = True
119 | skip_rule_reason = "score"
120 |
121 | # Quality check --------------------------------------------------
122 | if "quality" in metadata:
123 | # Check if the rule has the require quality
124 | if metadata['quality'] < rule_package['minimum_quality']:
125 | skip_rule = True
126 | skip_rule_reason = "quality"
127 |
128 | # Importance check -----------------------------------------------
129 | if "importance" in metadata:
130 | importance = metadata['importance']
131 |
132 | # If importance is set, check the importance level defined for the repo and overwrite
133 | # the skip_rule variable if the importance of the rule is higher than the importance
134 | # defined for the rule package
135 | if importance is not None:
136 | if importance >= rule_package['force_include_importance_level']:
137 | skip_rule = False
138 | skip_rule_reason = None
139 | logging.debug("Forcing rule '%s' because of importance", rule['rule_name'])
140 | if importance < rule_package['force_exclude_importance_level']:
141 | skip_rule = True
142 | skip_rule_reason = "importance"
143 |
144 | # We skip private rules and add them only if other rules require them
145 | if 'scopes' in rule:
146 | if 'private' in rule['scopes']:
147 | skip_rule = True
148 |
149 | # Skip the rule if it doesn't match the minimum quality or age
150 | if skip_rule:
151 | logging.debug("Skipping rule '%s' because of %s", rule['rule_name'], skip_rule_reason)
152 | if skip_rule_reason == "age":
153 | rule_repo_statistics['total_rules_skipped_age'] += 1
154 | elif skip_rule_reason == "quality":
155 | rule_repo_statistics['total_rules_skipped_quality'] += 1
156 | elif skip_rule_reason == "importance":
157 | rule_repo_statistics['total_rules_skipped_importance'] += 1
158 | elif skip_rule_reason == "score":
159 | rule_repo_statistics['total_rules_skipped_score'] += 1
160 | continue
161 | else:
162 | # Collect all private rules used in the accepted rules
163 | if 'private_rules_used' in rule:
164 | for priv_rule in rule['private_rules_used']:
165 | if priv_rule not in required_private_rules:
166 | required_private_rules.append(priv_rule)
167 |
168 | # Write the rule into the output file
169 | repo_rules_strings.append(rebuild_yara_rule(rule))
170 | rule_repo_statistics['total_rules'] += 1
171 |
172 | # Now we prepare the private rules
173 | # Loop over the required private rules
174 | for priv_rule in required_private_rules:
175 | # Get the rule from the plyara object
176 | priv_rule_string = rebuild_yara_rule(priv_rule["rule"])
177 | # Append rule if it hasn't been added yet
178 | if priv_rule["rule"]["rule_name"] not in already_added_priv_rules:
179 | # Prepend the rule to the output string
180 | repo_rules_strings.insert(0, priv_rule_string)
181 | # Add the rule to the list of already added rules
182 | already_added_priv_rules.append(priv_rule["rule"]["rule_name"])
183 | rule_repo_statistics['total_rules'] += 1
184 |
185 | # Only write the rule set if there's at least one rule in the set
186 | if len(repo_rules_strings) > 0:
187 | # Prepend header to the output string
188 | repo_rule_set_header = YARA_FORGE_CONFIG['repo_header'].format(
189 | repo_name=repo['name'],
190 | repo_url=repo['url'],
191 | retrieval_date=datetime.datetime.now().strftime("%Y-%m-%d"),
192 | repo_commit=repo['commit_hash'],
193 | total_rules=rule_repo_statistics['total_rules'],
194 | total_rules_skipped_age=rule_repo_statistics['total_rules_skipped_age'],
195 | total_rules_skipped_quality=rule_repo_statistics['total_rules_skipped_quality'],
196 | total_rules_skipped_importance=rule_repo_statistics['total_rules_skipped_importance'],
197 | total_rules_skipped_score=rule_repo_statistics['total_rules_skipped_score'],
198 | repo_license=repo['license']
199 | )
200 | # Append the rule set string to the list of rule set strings
201 | output_rule_set_strings.append(repo_rule_set_header)
202 | output_rule_set_strings.extend(repo_rules_strings)
203 |
204 | # Write the rule set statistics including total and skipped rules to the console
205 | logging.info("Rule set: '%s' Total rules: %d, Skipped: %d (age), %d (quality), %d (importance), %d (score)",
206 | repo['name'],
207 | rule_repo_statistics['total_rules'],
208 | rule_repo_statistics['total_rules_skipped_age'],
209 | rule_repo_statistics['total_rules_skipped_quality'],
210 | rule_repo_statistics['total_rules_skipped_importance'],
211 | rule_repo_statistics['total_rules_skipped_score'])
212 |
213 | # Add the repo statistics to the rule package statistics
214 | rule_package_statistics['repo_statistics'].append({
215 | "name": repo['name'],
216 | "total_rules": rule_repo_statistics['total_rules'],
217 | "total_rules_skipped_age": rule_repo_statistics['total_rules_skipped_age'],
218 | "total_rules_skipped_quality": rule_repo_statistics['total_rules_skipped_quality'],
219 | "total_rules_skipped_importance": rule_repo_statistics['total_rules_skipped_importance'],
220 | "total_rules_skipped_score": rule_repo_statistics['total_rules_skipped_score'],
221 | })
222 |
223 | # Add the repo statistics counters to the the rule package statistics
224 | rule_package_statistics['total_rules'] += rule_repo_statistics['total_rules']
225 | rule_package_statistics['total_rules_skipped_age'] += rule_repo_statistics['total_rules_skipped_age']
226 | rule_package_statistics['total_rules_skipped_quality'] += rule_repo_statistics['total_rules_skipped_quality']
227 | rule_package_statistics['total_rules_skipped_importance'] += rule_repo_statistics['total_rules_skipped_importance']
228 | rule_package_statistics['total_rules_skipped_score'] += rule_repo_statistics['total_rules_skipped_score']
229 |
230 | # Print the rule package statistics including total and skipped rules to the console
231 | logging.log(logging.INFO, "-------------------------------------------------------")
232 | logging.info("Rule package: '%s' Total rules: %d, Skipped: %d (age), %d (quality), %d (importance), %d (score)",
233 | rule_package['name'],
234 | rule_package_statistics['total_rules'],
235 | rule_package_statistics['total_rules_skipped_age'],
236 | rule_package_statistics['total_rules_skipped_quality'],
237 | rule_package_statistics['total_rules_skipped_importance'],
238 | rule_package_statistics['total_rules_skipped_score'])
239 |
240 | # Add the rule package statistics to the list of rule package statistics
241 | rule_package_statistics_sets.append(rule_package_statistics)
242 |
243 | # Only write the rule file if there's at least one rule in all sets in the package
244 | if rule_package_statistics['total_rules'] > 0:
245 | with open(rule_file_path, "w", encoding="utf-8") as f:
246 |
247 | # Compose the package header and add the statistics on total rules and skipped rules
248 | rule_set_header = YARA_FORGE_CONFIG['rule_set_header'].format(
249 | rule_package_name=rule_package['name'],
250 | rule_package_description=rule_package['description'],
251 | program_version=program_version,
252 | yaraqa_commit=yaraqa_commit,
253 | rule_package_minimum_quality=rule_package['minimum_quality'],
254 | rule_package_force_include_importance_level=rule_package['force_include_importance_level'],
255 | rule_package_force_exclude_importance_level=rule_package['force_exclude_importance_level'],
256 | rule_package_minimum_age=rule_package['minimum_age'],
257 | rule_package_minimum_score=rule_package['minimum_score'],
258 | retrieval_date=datetime.datetime.now().strftime("%Y-%m-%d"),
259 | total_rules=rule_package_statistics['total_rules'],
260 | total_rules_skipped_age=rule_package_statistics['total_rules_skipped_age'],
261 | total_rules_skipped_quality=rule_package_statistics['total_rules_skipped_quality'],
262 | total_rules_skipped_importance=rule_package_statistics['total_rules_skipped_importance'],
263 | total_rules_skipped_score=rule_package_statistics['total_rules_skipped_score'],
264 | )
265 |
266 | logging.log(logging.INFO, "You can find more information about skipped files " \
267 | "in the log file: yara-forge.log when you run it with --debug flag")
268 |
269 | # organize the imports to avoid `duplicate import` errors in yara-x.
270 | import_set = set()
271 | regex_import = re.compile('import ".*"\n')
272 | for r in range(len(output_rule_set_strings)):
273 | rule = output_rule_set_strings[r]
274 | imports = regex_import.findall(rule)
275 | if len(imports) > 0:
276 | import_set.update(imports)
277 | output_rule_set_strings[r] = regex_import.sub('', rule)
278 |
279 | # collect all the imports used by the rules at the top of the file
280 | if len(import_set) > 0:
281 | imports = '\n' + ''.join(import_set) + '\n\n'
282 | output_rule_set_strings.insert(0, imports)
283 |
284 | # Prepend the header to the output rule set strings
285 | output_rule_set_strings.insert(0, rule_set_header)
286 |
287 | # Write the output rule set strings to the file
288 | f.write("".join(output_rule_set_strings))
289 |
290 | else:
291 | # remove the output file if it exists
292 | if os.path.exists(rule_file_path):
293 | os.remove(rule_file_path)
294 |
295 | # Add the name of the repo and the file path to the output file to the list
296 | package_files.append({
297 | "name": rule_package['name'],
298 | "file_path": rule_file_path,
299 | })
300 |
301 | # Write the rule package statistics as a markdown table to the build_stats.md file
302 | write_build_stats(rule_package_statistics_sets)
303 |
304 | return package_files
305 |
306 |
307 | def write_build_stats(rule_package_statistics_sets):
308 | """
309 | Writes the rule package statistics as a markdown table to the build_stats.md file
310 |
311 | Create sections for each rule package.
312 | Then include a table and list each repo with the statistics.
313 | """
314 |
315 | # Create the build_stats.md file
316 | with open("build_stats.md", "w", encoding="utf-8") as f:
317 | # Write the header
318 | f.write("✨ This release contains the latest YARA rule sets from YARA Forge 🔨\n\n")
319 | f.write("# Build Statistics\n\n")
320 |
321 | # Write the statistics for the rule packages
322 | f.write("## Rule Packages\n\n")
323 | # Write the rule package statistics as a table
324 | f.write("| Package | Total Rules | Skipped (Age) | Skipped (Quality) | Skipped (Importance) | Skipped (Score) |\n")
325 | f.write("| ------- | ----------- | ------------- | ----------------- | -------------------- | --------------- |\n")
326 | for rule_package_stats in rule_package_statistics_sets:
327 | f.write(f"| {rule_package_stats['name']} | {rule_package_stats['total_rules']} | {rule_package_stats['total_rules_skipped_age']} | {rule_package_stats['total_rules_skipped_quality']} | {rule_package_stats['total_rules_skipped_importance']} | {rule_package_stats['total_rules_skipped_score']} |\n")
328 |
329 | # Write the statistics for the repos
330 | f.write("\n## Package Repo Statistics\n\n")
331 |
332 | # Loop over the rule packages
333 | for rule_package_statistics in rule_package_statistics_sets:
334 | # Write the rule package name as a header
335 | f.write(f"## {rule_package_statistics['name']}\n\n")
336 | # Write the rule package statistics as a table
337 | f.write("| Repo | Total Rules | Skipped (Age) | Skipped (Quality) | Skipped (Importance) | Skipped (Score) |\n")
338 | f.write("| ---- | ----------- | ------------- | ----------------- | -------------------- | --------------- |\n")
339 | # Sort the repos by name
340 | sorted_repo_statistics = sorted(rule_package_statistics['repo_statistics'], key=lambda x: x['name'])
341 | # Loop over the repos
342 | for repo_statistics in sorted_repo_statistics:
343 | f.write(f"| {repo_statistics['name']} | {repo_statistics['total_rules']} | {repo_statistics['total_rules_skipped_age']} | {repo_statistics['total_rules_skipped_quality']} | {repo_statistics['total_rules_skipped_importance']} | {repo_statistics['total_rules_skipped_score']} |\n")
344 | f.write("\n")
345 |
346 |
--------------------------------------------------------------------------------
/main/rule_processors.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains functions that process the YARA rules.
3 | """
4 | import logging
5 | import re
6 | import uuid
7 | from pprint import pprint
8 | import yaml
9 | import dateparser
10 | from plyara.utils import generate_hash
11 | from git import Repo
12 |
13 | # Date Lookup Cache
14 | date_lookup_cache = {}
15 |
16 | # Private YARA rules
17 | private_rule_mapping = []
18 |
19 | def process_yara_rules(yara_rule_repo_sets, YARA_FORGE_CONFIG):
20 | """
21 | Processes the YARA rules
22 | """
23 |
24 | # Logic hash list to avoid duplicates
25 | logic_hash_list = {}
26 |
27 | # Loop over the repositories
28 | for repo in yara_rule_repo_sets:
29 |
30 | # Rule set identifier
31 | rule_set_id = repo['name'].replace(" ", "_").replace("-", "_").upper()
32 |
33 | # Debug output
34 | logging.info("Processing YARA rules from repository: %s", repo['name'])
35 |
36 | # Keep a list of all rules to avoid duplicates
37 | all_rule_names = []
38 |
39 | # Loop over the rule sets in the repository and modify the rules
40 | num_rules = 0
41 | for rules in repo['rules_sets']:
42 | # Debug output
43 | logging.debug("Processing YARA rules from rule set: %s", rules['file_path'])
44 | # Rules that we want to keep
45 | kept_rules = []
46 | # Loop over each of the rules and modify them
47 | for rule in rules['rules']:
48 | # Debug output
49 | logging.debug("Processing YARA rule: %s", rule['rule_name'])
50 |
51 | # Rule Meta Data Modifications ----------------------------------------------
52 |
53 | # Check if the rule is a private rule
54 | is_private_rule = False
55 | if 'scopes' in rule:
56 | if 'private' in rule['scopes']:
57 | is_private_rule = True
58 |
59 | # Add metadata to rules that don't have any
60 | if 'metadata' not in rule:
61 | rule['metadata'] = []
62 |
63 | # Calculate the logic hash
64 | logic_hash = generate_hash(rule)
65 |
66 | # Duplicate Name Check
67 | # If the rule name already exists in the list, append a number to it
68 | if rule['rule_name'] in logic_hash_list.values():
69 | # Get the number of times the rule name already exists in the list
70 | num_rule_name = list(logic_hash_list.values()).count(rule['rule_name'])
71 | # Append the number to the rule name
72 | rule['rule_name'] = f"{rule['rule_name']}_{num_rule_name}"
73 |
74 | # Duplicate Content Check
75 | # Check if the rule is a duplicate (based on the logic hash)
76 | if logic_hash in logic_hash_list and not is_private_rule:
77 | logging.info("Skipping rule '%s > %s' because it has the same logic hash as '%s'",
78 | repo['name'], rule['rule_name'], logic_hash_list[logic_hash])
79 | continue
80 | # Register the logic hash
81 | logic_hash_list[logic_hash] = rule['rule_name']
82 | modify_meta_data_value(rule['metadata'], 'logic_hash', logic_hash)
83 |
84 | # Calculate a UUID for the rule hash
85 | rule_uuid = generate_uuid_from_hash(logic_hash)
86 | align_yara_rule_uuid(rule['metadata'], rule_uuid)
87 |
88 | # Modifying existing meta data values ---------------------------------------
89 |
90 | # Modify the rule references
91 | rule['metadata'] = align_yara_rule_reference(rule['metadata'], repo['url'])
92 |
93 | # Modify the rule date
94 | rule['metadata'] = align_yara_rule_date(rule['metadata'],
95 | repo['repo_path'],
96 | rules['file_path'])
97 |
98 | # Modify the rule hashes
99 | rule['metadata'] = align_yara_rule_hashes(rule['metadata'])
100 |
101 | # # Modify the rule description
102 | rule['metadata'] = align_yara_rule_description(rule['metadata'], repo['name'])
103 |
104 | # Modify the rule author
105 | rule['metadata'] = align_yara_rule_author(rule['metadata'], repo['author'])
106 |
107 | # Add tags based on meta data values and condition elements
108 | rule = add_tags_to_rule(rule)
109 |
110 | # Add a score based on the rule quality and meta data keywords
111 | rule_score = evaluate_yara_rule_score(rule, YARA_FORGE_CONFIG)
112 | modify_meta_data_value(rule['metadata'], 'score', rule_score)
113 |
114 | # Increase the quality score based on certain rule characteristics
115 | #quality_increase = evaluate_quality_increase(rule)
116 | #rule['metadata'] = modify_yara_rule_quality(rule['metadata'], quality_increase )
117 |
118 | # Get a custom importance score if available
119 | custom_importance_score = retrieve_custom_importance_score(repo['name'], rules['file_path'], rule['rule_name'])
120 | if custom_importance_score:
121 | modify_meta_data_value(rule['metadata'], 'importance', custom_importance_score)
122 | logging.debug("Custom importance score for rule %s is %d", rule['rule_name'], custom_importance_score)
123 |
124 | # Adding additional meta data values ----------------------------------------
125 | # Add a quality value based on the original repo
126 | # a quality reduction is evaluated later in the process - this is just the base value
127 | # for that calculation
128 | modify_meta_data_value(rule['metadata'], 'quality', repo['quality'])
129 |
130 | # Modify the rule name
131 | rule_name_old = rule['rule_name']
132 | rule_name_new = align_yara_rule_name(rule['rule_name'], rule_set_id)
133 | # If the rule is private, add the _PRIVATE suffix and
134 | if is_private_rule:
135 | rule_name_new = f"{rule_name_new}_PRIVATE"
136 | # Add the rule to the private rule mapping
137 | private_rule_mapping.append({
138 | "repo": rule_set_id,
139 | "old_name": rule_name_old,
140 | "new_name": rule_name_new,
141 | "rule": rule
142 | })
143 | # Set the new rule name
144 | rule['rule_name'] = rule_name_new
145 |
146 | # Check if the rule uses private rules
147 | private_rules_used = check_rule_uses_private_rules(rule_set_id, rule, private_rule_mapping)
148 | if private_rules_used:
149 | # Change the condition terms of the rule to align them with
150 | # the new private rule names
151 | rule['condition_terms'] = adjust_identifier_names(
152 | rule_set_id,
153 | rule['condition_terms'],
154 | private_rules_used)
155 | # Add the private rules used to the rule
156 | rule['private_rules_used'] = private_rules_used
157 | logging.debug("Private rules used: %s", private_rules_used)
158 |
159 | # Add a rule source URL to the original file
160 | modify_meta_data_value(
161 | rule['metadata'], 'source_url',
162 | (
163 | f'{repo["url"]}/blob/{repo["commit_hash"]}/{rules["file_path"]}'
164 | f'#L{rule["start_line"]}-L{rule["stop_line"]}'
165 | )
166 | )
167 |
168 | # Add license URL
169 | modify_meta_data_value(rule['metadata'], 'license_url', repo['license_url'])
170 |
171 | # Sort the meta data values
172 | rule['metadata'] = sort_meta_data_values(rule['metadata'], YARA_FORGE_CONFIG)
173 |
174 | # We keep the rule if the rule name is not already in the list of rule names
175 | if rule_name_new not in all_rule_names:
176 | # Add the rule name to the list of rule names
177 | all_rule_names.append(rule_name_new)
178 | kept_rules.append(rule)
179 |
180 | # Count the number of rules
181 | num_rules += len(kept_rules)
182 | # Now we replace the rules
183 | rules['rules'] = kept_rules
184 |
185 | # Info output about the number of rules in the repository
186 | logging.info("Normalized %d rules from repository: %s", num_rules, repo['name'])
187 |
188 | return yara_rule_repo_sets
189 |
190 |
191 | def add_tags_to_rule(rule):
192 | """
193 | Add tags to a rule based on meta data values and condition elements
194 | """
195 | # List of tags to add
196 | tags_to_add = []
197 | # List of possible tags
198 | tag_names = ['tag', 'tags', 'category', 'categories', 'type', 'types', 'family', 'families',
199 | 'malware', 'threat', 'threats', 'threat_type', 'actor', 'threat_actor', 'threat_actors',
200 | 'threat_types', 'threat_category', 'threat_categories', 'threat_family',
201 | 'threat_families', 'threat_group', 'threat_groups', 'scan_context',
202 | 'malware_type', 'mitre_attack', 'mitre_attack_technique', 'mitre_attack_techniques'
203 | 'attack_technique', 'attack_techniques', 'attack', 'attacks', 'attack_type']
204 | # Regular expressions to extract other tags from the description
205 | tag_regexes = [
206 | r'CVE-\d{4}-\d{4,7}', # CVE IDs
207 | r'T[0-9]{4}', # MITRE ATT&CK Technique IDs
208 | ]
209 | # Join the list of regexes with an OR operator and compile the regex
210 | tag_regex = re.compile(r'(?i)\b(%s)\b' % "|".join(tag_regexes))
211 | # List of values to ignore
212 | ignore_values = ['N/A', 'n/a', 'na', 'NA', 'unknown', 'Unknown', '', ' ']
213 | # List of possible condition elements
214 | condition_contents = {
215 | "FILE": ['uint8(0)', 'uint16(0)', 'uint32(0)', 'uint16be(0)', 'uint32be(0)',
216 | ' at 0 ', 'filesize'],
217 | # "MEMORY": [' or all of them']
218 | }
219 | condition_ends = {
220 | "FILE": [' at 0'],
221 | # "MEMORY": [' or any of them', ' or all of them', ' or 1 of them'],
222 | }
223 |
224 | # Check if the rule already has 'tags'
225 | if 'tags' in rule:
226 | # Add the tags to the list of tags to add
227 | tags_to_add.extend(rule['tags'])
228 |
229 | # We create a copy so that we can delete elements from the original
230 | meta_data_copy = rule['metadata'].copy()
231 | # Now we loop over the copy
232 | for meta_data in meta_data_copy:
233 | for key, value in meta_data.items():
234 | # If the key is in the list of possible tag names, then we found the tag
235 | if key.lower() in tag_names:
236 | # Check if the value is a list
237 | if isinstance(value, list):
238 | # Loop over the list
239 | for tag in value:
240 | # Add the tag to the list of tags to add
241 | tags_to_add.append(tag)
242 | # If the value is not a list, we just add it
243 | else:
244 | # If the value contains a comma, we split it
245 | if "," in value:
246 | # Split the value
247 | value = value.split(",")
248 | # Loop over the values
249 | for tag in value:
250 | # Add the tag to the list of tags to add
251 | tags_to_add.append(tag.strip())
252 | # Add the tag to the list of tags to add
253 | else:
254 | tags_to_add.append(value)
255 |
256 | # Remove tags that are in the ignore list
257 | tags_to_add = [tag for tag in tags_to_add if tag not in ignore_values]
258 |
259 | # Extractions from meta data ----------------------------------------------
260 | # Extract tags from the description
261 | for meta_data in meta_data_copy:
262 | for key, value in meta_data.items():
263 | # If the key is in the list of possible tag names, then we found the tag
264 | if key.lower() == "description":
265 | # Extract the tags from the description
266 | tags_from_description = tag_regex.findall(value)
267 | # Add the tags to the list of tags to add
268 | tags_to_add.extend(tags_from_description)
269 |
270 | # Condition tags ----------------------------------------------------------
271 | # If one of the values is in the condition contents we add a specific tag
272 | for condition_mapping in condition_contents.items():
273 | # Get the element
274 | tag = condition_mapping[0]
275 | # Get the condition terms
276 | condition_terms = condition_mapping[1]
277 | # Check if the element is in the condition terms
278 | for term in condition_terms:
279 | if term in rule['raw_condition']:
280 | # Add the element to the list of tags to add
281 | tags_to_add.append(tag)
282 | # If one of the is how the condition ends we add a specific tag
283 | for condition_mapping in condition_ends.items():
284 | # Get the element
285 | tag = condition_mapping[0]
286 | # Get the condition terms
287 | condition_terms = condition_mapping[1]
288 | # Check if the element is in the condition terms
289 | for term in condition_terms:
290 | if rule['raw_condition'].endswith(term):
291 | # Add the element to the list of tags to add
292 | tags_to_add.append(tag)
293 |
294 | # Clean up the tags ----------------------------------------------------------
295 | # Remove all duplicates from the tags list
296 | tags_to_add = list(dict.fromkeys(tags_to_add))
297 | # We uppercase all the tags
298 | tags_to_add = [tag.upper() for tag in tags_to_add]
299 | # We also modify the existing tags field in the meta data
300 | rule['metadata'] = modify_meta_data_value(rule['metadata'], 'tags', ", ".join(tags_to_add))
301 | # Remove symbols that are not allowed in tags (only alphanumeric characters and
302 | # underscores are allowed), replace every other character with an underscore using a regex
303 | tags_to_add = [re.sub(r'[^a-zA-Z0-9_]', '_', tag) for tag in tags_to_add]
304 | # And now we set the new tags field in the rule
305 | rule['tags'] = tags_to_add
306 | return rule
307 |
308 | def retrieve_custom_importance_score(repo_name, file_path, rule_name):
309 | """
310 | Retrieves a custom importance score for a rule
311 | """
312 | # Read the scores from the YAML file named yara-forge-custom-scoring.yml
313 | with open('yara-forge-custom-scoring.yml', 'r', encoding='utf-8') as f:
314 | custom_scoring = yaml.safe_load(f)
315 |
316 | logging.debug("Checking custom importance score for rule %s in file %s in repo %s", rule_name, file_path, repo_name)
317 |
318 | # Loop over the rules in the YAML file
319 | for importance_score in custom_scoring['importance-scores']:
320 | # Marker that indicates if every element of the rule matched
321 | rule_elements_matched = False
322 | for rule_field, rule_value in importance_score['rule'].items():
323 | if rule_field == "name":
324 | if rule_name.startswith(rule_value):
325 | logging.debug("Rule name %s starts with %s", rule_name, rule_value)
326 | rule_elements_matched = True
327 | else:
328 | rule_elements_matched = False
329 | break
330 | elif rule_field == "file":
331 | if file_path.endswith(rule_value):
332 | logging.debug("File path %s ends with %s", file_path, rule_value)
333 | rule_elements_matched = True
334 | else:
335 | rule_elements_matched = False
336 | break
337 | elif rule_field == "repo":
338 | if repo_name == rule_value:
339 | logging.debug("Repo name %s matches %s", repo_name, rule_value)
340 | rule_elements_matched = True
341 | else:
342 | rule_elements_matched = False
343 | break
344 | # If all elements of the rule matched, we return the importance score
345 | if rule_elements_matched:
346 | return importance_score['importance']
347 | return None
348 |
349 |
350 | def sort_meta_data_values(rule_meta_data, YARA_FORGE_CONFIG):
351 | """
352 | Sort the meta data values
353 | """
354 | # Fixed order of meta data values
355 | fixed_order = YARA_FORGE_CONFIG['meta_data_order']
356 |
357 | # We loop over the list of dicts and sort them by key according to our fixed_order
358 | rule_meta_data.sort(key=lambda x: fixed_order.index(list(x.keys())[0]) if list(x.keys())[0] in fixed_order else len(fixed_order))
359 |
360 | return rule_meta_data
361 |
362 | def adjust_identifier_names(repo_name, condition_terms, private_rules_used):
363 | """
364 | Adjust the identifier names of a rule to align them with the new private rule names
365 | """
366 | # Loop over the private rules used
367 | for private_rule in private_rules_used:
368 | # Loop over the condition terms
369 | for i, condition_term in enumerate(condition_terms):
370 | # Check if the condition term is the private rule
371 | if condition_term == private_rule['old_name'] and private_rule['repo'] == repo_name:
372 | # Replace the condition term with the new private rule name
373 | condition_terms[i] = private_rule['new_name']
374 | return condition_terms
375 |
376 |
377 | def check_rule_uses_private_rules(repo_name, rule, ext_private_rule_mapping):
378 | """
379 | Check if the rule uses private rules
380 | """
381 | # List of private rules used
382 | private_rules_used = []
383 | # Loop over the private rules
384 | for private_rule in ext_private_rule_mapping:
385 | # Check if the rule uses the private rule
386 | if private_rule['old_name'] in rule['condition_terms'] and private_rule['repo'] == repo_name:
387 | # Only add that rule as long as it is not already in the list
388 | if private_rule not in private_rules_used:
389 | # Add the private rule to the list of private rules used
390 | private_rules_used.append(private_rule)
391 | return private_rules_used
392 |
393 |
394 | def align_yara_rule_description(rule_meta_data, repo_description):
395 | """
396 | Check if there's a description set in the YARA rule and if not, add the repository description
397 | """
398 | # List of possible description names
399 | description_names = ['description', 'desc', 'details', 'information', 'info',
400 | 'notes', 'abstract', 'explanation', 'rationale']
401 | description_values_prefixes = ['Detects ']
402 | threat_names = ['threat_name', 'threat', 'malware', 'mal', 'malware_name', 'mal_name',
403 | 'threat_type', 'threat_category', 'threat_family', 'threat_group',]
404 | # Look for the description in the rule meta data
405 | description_found = False
406 | description_value = f"No description has been set in the source file - {repo_description}"
407 | # We create a copy so that we can delete elements from the original
408 | meta_data_copy = rule_meta_data.copy()
409 | # Now we loop over the copy
410 | for meta_data in meta_data_copy:
411 | for key, value in meta_data.items():
412 | # If the key is in the list of possible description names, then we found the description
413 | if key in description_names:
414 | description_found = True
415 | description_value = value
416 | # Remove the description from the original meta data
417 | rule_meta_data.remove(meta_data)
418 | # If the value starts with one of the prefixes, then we found the description
419 | elif isinstance(value, str) and value.startswith(tuple(description_values_prefixes)):
420 | description_found = True
421 | description_value = value
422 | # Remove the description from the original meta data
423 | rule_meta_data.remove(meta_data)
424 | # If we couldn't find a description so far, we use the first threat name we can find
425 | if not description_found:
426 | for key, value in meta_data.items():
427 | # If we can find a threat name, we use it to formulate a description
428 | if key.lower() in threat_names:
429 | description_found = True
430 | # If the threat name contains a period or dash we replace it and
431 | # put the original name in brackets
432 | description_value = f"Detects {value.replace('.', ' ').replace('-', ' ').title()} ({value})"
433 | # Remove the description from the original meta data
434 | rule_meta_data.remove(meta_data)
435 | # Lower the quality score if the descriptions hasn't been set
436 | if not description_found:
437 | modify_yara_rule_quality(rule_meta_data, -5)
438 | # Set the new description
439 | rule_meta_data.append({'description': description_value})
440 | return rule_meta_data
441 |
442 |
443 | def align_yara_rule_hashes(rule_meta_data):
444 | """
445 | Check for all the hash values in the meta data and align them to the key value 'hash'
446 | """
447 | # List of possible hash names
448 | hash_names = ['hash', 'hashes', 'md5', 'sha1', 'sha256', 'sha512', 'sha-1',
449 | 'sha-256', 'sha-512', 'sha_256', 'sha_1', 'sha_512', 'md5sum',
450 | 'sha1sum', 'sha256sum', 'sha512sum', 'md5sums', 'sha1sums', 'sha256sums',
451 | 'sha512sums', 'reference_sample', 'sample', 'original_sample_sha1']
452 | # Look for the hashes in the rule meta data
453 | hashes_found = False
454 | hashes_values = []
455 | # We create a copy so that we can delete elements from the original
456 | meta_data_copy = rule_meta_data.copy()
457 | # Now we loop over the copy
458 | for mdata in meta_data_copy:
459 | for key, value in mdata.items():
460 | # If the key is in the list of possible hash names, then we found the hashes
461 | if key.lower() in hash_names:
462 | hashes_found = True
463 | hashes_values.append(value.lower())
464 | # Remove the hashes from the original meta data
465 | rule_meta_data.remove(mdata)
466 | # If the hashes are found, modify them
467 | if hashes_found:
468 | for value in hashes_values:
469 | rule_meta_data.append({'hash': value})
470 | return rule_meta_data
471 |
472 |
473 | def modify_yara_rule_quality(rule_meta_data, reduction_value):
474 | """
475 | Modifies the quality score of a YARA rule.
476 | """
477 | # We create a copy so that we can delete elements from the original
478 | meta_data_copy = rule_meta_data.copy()
479 | # Now we loop over the copy
480 | for mdata in meta_data_copy:
481 | for k, _ in mdata.items():
482 | # If the key is in the meta data, then we modify it
483 | if k == "quality":
484 | mdata[k] += reduction_value
485 | return meta_data_copy
486 | return rule_meta_data
487 |
488 |
489 | def modify_meta_data_value(rule_meta_data, key, value):
490 | """
491 | Modify a value in the meta data, if it exists, otherwise add it
492 | """
493 | # We create a copy so that we can delete elements from the original
494 | meta_data_copy = rule_meta_data.copy()
495 | # Now we loop over the copy
496 | for mdata in meta_data_copy:
497 | for k, _ in mdata.items():
498 | # If the key is in the meta data, then we modify it
499 | if k == key:
500 | mdata[k] = value
501 | return meta_data_copy
502 | # If the key is not in the meta data, then we add it
503 | rule_meta_data.append({key: value})
504 | return rule_meta_data
505 |
506 |
507 | # Evaluate the YARA rule score
508 | def evaluate_yara_rule_score(rule, YARA_FORGE_CONFIG):
509 | """
510 | Evaluate the YARA rule score
511 |
512 | We fist set the base score from the config
513 |
514 | We then take the next best score based on this order:
515 | - Predefined score from the meta data
516 | - Meta data score based on keywords
517 |
518 | If we can't find a score, we use the base score
519 | """
520 | # Score for the rule quality
521 | rule_score = YARA_FORGE_CONFIG['rule_base_score']
522 |
523 | # Check if the rule already has a score
524 | for meta_data in rule['metadata']:
525 | for key, value in meta_data.items():
526 | if key == 'score':
527 | # If the rule already has a score, we use that
528 | return int(value)
529 |
530 | # Score for the rule meta data
531 | meta_data_rule_score = evaluate_yara_rule_meta_data(rule)
532 | if meta_data_rule_score > 0:
533 | logging.debug("Rule '%s' has a meta data score of %d", rule['rule_name'],
534 | meta_data_rule_score)
535 | return meta_data_rule_score
536 |
537 | return rule_score
538 |
539 |
540 | # deprecated function - caused too many negative side effects (crap rules being included because they had good meta data)
541 | # def evaluate_quality_increase(rule):
542 | # """
543 | # Evaluate the quality increase for a rule
544 | # """
545 | # # The pure existence of these meta data values increases the quality score
546 | # quality_increase = 0
547 | # # List of possible meta data keywords
548 | # meta_data_keywords = ['modified', 'last_modified', 'last_modified_at', 'last_modified_date',
549 | # 'last_change', 'last_change_date', 'last_update', 'last_update_date',
550 | # 'updated', 'updated_at', 'updated_date', 'updated_timestamp',
551 | # 'update', 'modification_date', 'modification', 'change', 'change_date']
552 | # # Check if one of the keywords appears in the meta data values
553 | # for meta_data in rule['metadata']:
554 | # for field, _ in meta_data.items():
555 | # if field in meta_data_keywords:
556 | # quality_increase = 20
557 | # return quality_increase
558 |
559 |
560 | def evaluate_yara_rule_meta_data(rule):
561 | """
562 | Evaluate the score modifier based on the rule meta data
563 | """
564 | # List of possible meta data keywords
565 | meta_data_keywords_suspicious = ['suspicious', 'susp_']
566 | # List of possible meta data keywords
567 | meta_data_keywords_hunting = ['hunting', 'experimental', 'test', 'testing', 'false positive',
568 | 'unstable', 'untested', 'unverified', 'unreliable',
569 | 'unconfirmed', 'hunt_']
570 | # Exclude some meta data values
571 | exclude_meta_data_values = ['reference']
572 | # Check if one of the keywords appears in the meta data values
573 | for meta_data in rule['metadata']:
574 | for field, value in meta_data.items():
575 | # If the value is in the exclude list, we skip it
576 | if field in exclude_meta_data_values:
577 | continue
578 | if isinstance(value, str) and any(keyword in value.lower() for keyword in meta_data_keywords_suspicious):
579 | return 65
580 | if isinstance(value, str) and any(keyword in value.lower() for keyword in meta_data_keywords_hunting):
581 | return 50
582 | # Check if one of the keywords appears in the rule name
583 | for keyword in meta_data_keywords_suspicious:
584 | if keyword in rule['rule_name'].lower():
585 | return 65
586 | for keyword in meta_data_keywords_hunting:
587 | if keyword in rule['rule_name'].lower():
588 | return 50
589 | return 0
590 |
591 |
592 | def align_yara_rule_author(rule_meta_data, repo_author):
593 | """
594 | Change YARA rule author
595 | """
596 | # List of possible author names
597 | author_names = ['author', 'authors', 'writer', 'creator', 'created_by', 'created_by',
598 | 'copyright', 'made_by', 'contributor', 'contributed_by']
599 | # Look for the author in the rule meta data
600 | author_found = False
601 | author_value = ""
602 | # We create a copy so that we can delete elements from the original
603 | meta_data_copy = rule_meta_data.copy()
604 | # Now we loop over the copy
605 | for meta_data in meta_data_copy:
606 | for key, value in meta_data.items():
607 | # If the key is in the list of possible author names, then we found the author
608 | if key in author_names:
609 | author_found = True
610 | author_value = value
611 | # Remove the author from the original meta data
612 | rule_meta_data.remove(meta_data)
613 | # If the author is found, modify it
614 | if author_found:
615 | rule_meta_data.append({'author': author_value})
616 | # If the author is not found, add it
617 | if not author_found:
618 | rule_meta_data.append({'author': repo_author})
619 | return rule_meta_data
620 |
621 |
622 | def align_yara_rule_uuid(rule_meta_data, uuid):
623 | """
624 | Change YARA rule UUID
625 | """
626 | # List of possible author names
627 | uuid_names = ['uuid', 'id', 'rid', 'rule_id', 'rule_uuid', 'ruleid',
628 | 'ruleuuid', 'identifier', 'rule_identifier']
629 | # Look for the author in the rule meta data
630 | uuid_value = uuid
631 | # We create a copy so that we can delete elements from the original
632 | meta_data_copy = rule_meta_data.copy()
633 | # Now we loop over the copy
634 | for meta_data in meta_data_copy:
635 | for key, value in meta_data.items():
636 | # If the key is in the list of possible author names, then we found the author
637 | if key in uuid_names:
638 | # If the values is not a string but a integer value, convert it to string
639 | if isinstance(value, int):
640 | value = str(value)
641 | # Check if the value is a valid UUIDv5
642 | if is_valid_uuidv5(value):
643 | # If the value is a valid UUID, we use it
644 | uuid_value = value
645 | # Remove the author from the original meta data
646 | rule_meta_data.remove(meta_data)
647 | else:
648 | # If the value is not a valid UUID, we use the hash of the rule
649 | logging.debug("The value '%s' is not a valid UUID. Using our UUID instead "
650 | "and renaming the old ID to 'orig_id' if the field was 'id'.",
651 | value)
652 | # If the field was 'id', we rename it to 'orig-id'
653 | if key == 'id':
654 | logging.debug("Renaming the old ID to 'orig_id'.")
655 | modify_meta_data_value(rule_meta_data, 'orig_id', value)
656 | rule_meta_data.remove(meta_data)
657 | # else, we just leave everything as it is and do nothing with the value
658 |
659 | # We add the UUID to the rule meta data
660 | rule_meta_data.append({'id': uuid_value})
661 | return rule_meta_data
662 |
663 |
664 | def is_valid_uuidv5(value):
665 | """
666 | Check if the value is a valid UUID
667 | """
668 | try:
669 | uuid.UUID(value)
670 | return True
671 | except ValueError:
672 | return False
673 |
674 | def align_yara_rule_name(rule_name, rule_set_id):
675 | """
676 | Change YARA rule name
677 | """
678 | # New name elements
679 | new_name_elements = []
680 | # Add the rule set identifier
681 | new_name_elements.append(rule_set_id)
682 | # Dissect the rule name
683 | name_elements = rule_name.split("_")
684 | # Change every element of the rule
685 | for element in name_elements:
686 | # If the element is already all uppercase, add it to the new name
687 | if element.isupper():
688 | new_name_elements.append(element)
689 | continue
690 | # If the element is all lowercase or anything else, then title case it
691 | new_name_elements.append(element.title())
692 | return "_".join(new_name_elements)
693 |
694 |
695 | def align_yara_rule_reference(rule_meta_data, rule_set_url):
696 | """
697 | Modify the YARA rule references
698 | """
699 | # List of possible reference names
700 | other_ref_names = ['reference', 'references', 'ref', 'url', 'source', 'link',
701 | 'website', 'webpage', 'report']
702 | other_indicators = ['http://', 'https://']
703 | # Look for the reference in the rule meta data
704 | reference_found = False
705 | reference_value = ""
706 | # We create a copy so that we can delete elements from the original
707 | meta_data_copy = rule_meta_data.copy()
708 | # Now we loop over the copy
709 | for meta_data in meta_data_copy:
710 | for key, value in meta_data.items():
711 | # If the key is in the list of possible reference names, then we found the reference
712 | if key in other_ref_names:
713 | reference_found = True
714 | reference_value = value
715 | # Remove the reference from the original meta data
716 | rule_meta_data.remove(meta_data)
717 | # If the value starts with http:// or https://, then we found the reference
718 | elif isinstance(value, str) and value.startswith(tuple(other_indicators)):
719 | reference_found = True
720 | reference_value = value
721 | # Remove the reference from the original meta data
722 | rule_meta_data.remove(meta_data)
723 | # If the reference is found, modify it
724 | if reference_found:
725 | rule_meta_data.append({'reference': reference_value})
726 | # If the reference is not found, add it
727 | if not reference_found:
728 | rule_meta_data.append({'reference': rule_set_url})
729 | return rule_meta_data
730 |
731 |
732 | def align_yara_rule_date(rule_meta_data, repo_path, file_path):
733 | """
734 | Modify the YARA rule date
735 | """
736 | # List of possible date names
737 | date_names = ['date', 'created', 'created_at', 'creation_date', 'creation_time',
738 | 'creation', 'timestamp', 'time', 'datetime']
739 | modified_names = ['modified', 'last_modified', 'last_modified_at', 'last_modified_date',
740 | 'last_change', 'last_change_date', 'last_update', 'last_update_date',
741 | 'updated', 'updated_at', 'updated_date', 'updated_timestamp']
742 | # Look for the date in the rule meta data
743 | date_found = False
744 |
745 | # GIT HISTORY -----------------------------------------------------------
746 | # We retrieve values from the git history that we can use in case we don't
747 | # find these values in the meta data
748 |
749 | # Check if the date is in the cache
750 | if file_path in date_lookup_cache:
751 | # Debug info
752 | logging.debug("Retrieved date info for file %s from cache.", file_path)
753 | (git_creation_date, git_modification_date) = date_lookup_cache[file_path]
754 | else:
755 | # Getting the last modification date of the rule file from the git log
756 | # (this is not completely reliable, but better than nothing)
757 | (git_creation_date, git_modification_date) = get_rule_age_git(repo_path, file_path)
758 | if git_creation_date:
759 | # Add the date to the cache
760 | date_lookup_cache[file_path] = (git_creation_date, git_modification_date)
761 |
762 | # CREATION DATE -----------------------------------------------------------
763 | # We create a copy so that we can delete elements from the original
764 | meta_data_copy = rule_meta_data.copy()
765 | # Now we loop over the copy
766 | for meta_data in meta_data_copy:
767 | for key, value in meta_data.items():
768 | # If the key is in the list of possible date names, then we found the date
769 | if key in date_names:
770 | date_created = dateparser.parse(value)
771 | if date_created:
772 | date_found = True
773 | # Remove the date from the original meta data
774 | rule_meta_data.remove(meta_data)
775 | rule_meta_data.append({'date': date_created.strftime("%Y-%m-%d")})
776 | # If the date cannot be parsed, we removed the field, because it could cause confusion
777 | else:
778 | logging.debug("The date '%s' could not be parsed. Removing the field.", value)
779 | rule_meta_data.remove(meta_data)
780 |
781 | # If the date is not found, try to get it from any of the meta data fields
782 | if not date_found:
783 | # Check if we find the date in a different value by looking for fields that contain a date
784 | for meta_data in meta_data_copy:
785 | for key, value in meta_data.items():
786 | # If the value contains a date, then we found the date
787 | if isinstance(value, str) and dateparser.parse(value):
788 | date_created = dateparser.parse(value)
789 | if date_created:
790 | date_found = True
791 | # Remove the date from the original meta data
792 | rule_meta_data.remove(meta_data)
793 | rule_meta_data.append({'date': date_created.strftime("%Y-%m-%d")})
794 |
795 | # If the date was still not found, we try to get the date from the git log
796 | if not date_found:
797 | # Add the date to the rule meta data
798 | rule_meta_data.append({'date': git_creation_date.strftime("%Y-%m-%d")})
799 |
800 | # MODIFICATION DATE -----------------------------------------------------------
801 | # We create a copy so that we can delete elements from the original
802 | meta_data_copy = rule_meta_data.copy()
803 | # Now we check for a modification date
804 | modified_found = False
805 | for meta_data in meta_data_copy:
806 | for key, value in meta_data.items():
807 | # If the key is in the list of possible date names, then we found the date
808 | if key in modified_names:
809 | modified_value = dateparser.parse(value)
810 | if modified_value:
811 | modified_found = True
812 | # Remove the date from the original meta data
813 | rule_meta_data.remove(meta_data)
814 | # If the modified date was found and removed, add the new streamlined date value
815 | if modified_found:
816 | rule_meta_data.append({'modified': modified_value.strftime("%Y-%m-%d")})
817 |
818 | # If the modified date was still not found, we try to get the date from the git log
819 | if not modified_found:
820 | # Add the modified ate to the rule meta data
821 | rule_meta_data.append({'modified': git_modification_date.strftime("%Y-%m-%d")})
822 |
823 | return rule_meta_data
824 |
825 |
826 | def get_rule_age_git(repo_path, file_path):
827 | """
828 | Get the last modification date of the rule file from the git log
829 | """
830 |
831 | # Initialize the repository object
832 | repo = Repo(repo_path)
833 |
834 | logging.debug("Repo path '%s'", repo_path)
835 | logging.debug("Retrieving date info for file '%s' from git log.", file_path)
836 |
837 | # Iterate over the commits that modified the file, and take the first one
838 | commits = list(repo.iter_commits(paths=file_path, max_count=1))
839 | if commits:
840 | first_commit = commits[-1]
841 | last_commit = commits[0]
842 | # Extract the datetime of the first commit that added the file
843 | creation_date = first_commit.committed_datetime
844 | # Extract the datetime of the last commit that modified the file
845 | modification_date = last_commit.committed_datetime
846 | logging.debug("Retrieved date info for file %s from git log. "
847 | " Creation date: %s, Last modification: %s",
848 | file_path, creation_date, modification_date)
849 | # Return the date in the format YYYY-MM-DD
850 | return (creation_date, modification_date)
851 | print(f"No commits found for the file {file_path}.")
852 | # If we couldn't find any commits, we return the creation date of the repository
853 | repo_creation_date = list(repo.iter_commits(max_count=1))[-1].committed_datetime
854 | return (repo_creation_date, repo_creation_date)
855 |
856 |
857 | def generate_uuid_from_hash(hash):
858 | """
859 | Generate a UUID from a hash
860 | """
861 | return uuid.uuid5(uuid.NAMESPACE_DNS, hash)
862 |
--------------------------------------------------------------------------------
/qa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YARAHQ/yara-forge/641a61f4d694c8f6b378ca093165d62e42205688/qa/__init__.py
--------------------------------------------------------------------------------
/qa/rule_qa.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains functions for evaluating the quality of YARA rules.
3 |
4 | It includes functions for checking syntax issues and efficiency issues in YARA rules,
5 | and for reducing the quality score of a rule based on the issues found.
6 | """
7 |
8 | import logging
9 | import datetime
10 | import yaml
11 | import yara
12 | from plyara.utils import rebuild_yara_rule
13 | from qa.yaraQA.main.core import YaraQA
14 | from pprint import pprint
15 |
16 |
17 | def evaluate_rules_quality(processed_yara_repos, config):
18 | """
19 | Evaluates the quality of YARA rules.
20 | """
21 |
22 | # Create a yaraQA object
23 | yara_qa = YaraQA()
24 |
25 | # Rule issues list
26 | repo_issues = {}
27 |
28 | # Create a copy of the the repos to work with
29 | processed_yara_repos_copy = processed_yara_repos.copy()
30 |
31 | # Loop over the repositories
32 | for repo_rule_sets in processed_yara_repos_copy:
33 | # Analyze the rule sets
34 | logging.info("Evaluating rules from repository: %s", repo_rule_sets['name'])
35 | # Issue statistics
36 | issue_statistics = {
37 | "issues_syntax": 0,
38 | "issues_efficiency": 0,
39 | "issues_performance": 0,
40 | "issues_critical": 0,
41 | }
42 |
43 | # Loop over the rule sets in the repository
44 | for rule_set in repo_rule_sets['rules_sets']:
45 | logging.debug("Evaluating rules from rule set: {rule_set['file_path']}")
46 |
47 | rules_without_errors = []
48 |
49 | # Now we do stuff with each rule
50 | for rule in rule_set['rules']:
51 |
52 | # Skip the rule if it has critical issues
53 | skip_rule = False
54 |
55 | # Analyze the rule syntax
56 | # - Critical errors
57 | # - Compile issues
58 | issues_critical = check_issues_critical(rule)
59 | # Rule has critical issues
60 | if issues_critical:
61 | # Adding the values to the statistics
62 | issue_statistics['issues_critical'] += len(issues_critical)
63 | logging.warning("Rule %s has critical issues and cannot be used: %s", rule['rule_name'], issues_critical)
64 | skip_rule = True
65 |
66 | # Analyze the rule syntax
67 | # - Syntactical issues
68 | # - Compile issues
69 | issues_syntax = check_syntax_issues(rule)
70 | # Print the issues if debug is enabled
71 | logging.debug("Evaluated rule %s syntax issues: %s",
72 | rule['rule_name'], issues_syntax)
73 |
74 | # Analyze the rule quality
75 | # Checks for
76 | # - Performance impact issues (based on experience)
77 | # - Resource usage issues (based on experience)
78 | # - Logic flaws (based on experience)
79 | issues_efficiency = yara_qa.analyze_rule(rule)
80 | # Print the issues if debug is enabled
81 | logging.debug("Evaluated rule %s efficiency issues: %s",
82 | rule['rule_name'], issues_efficiency)
83 |
84 | # Analyze the rule performance
85 | # Checks for
86 | # - Performance issues with live tests
87 | issues_performance = yara_qa.analyze_live_rule_performance(rule)
88 | # Add the values to the statistics
89 | issue_statistics['issues_performance'] += len(issues_performance)
90 |
91 | # Reduce the rule's quality score based on the levels of
92 | # the issues found in the rules
93 | issues = issues_syntax + issues_efficiency + issues_performance + issues_critical
94 | # Adding the values to the statistics
95 | issue_statistics['issues_syntax'] += len(issues_syntax)
96 | issue_statistics['issues_efficiency'] += len(issues_efficiency)
97 | # Loop over the issues
98 | for issue in issues:
99 | issue['score'] = config['issue_levels'][issue['level']]
100 | # Calculate the total score
101 | total_quality_score = sum(issue['score'] for issue in issues)
102 |
103 | # Apply a custom quality reduction if the rule has shown to be
104 | # prone to false positives
105 | custom_quality_reduction = retrieve_custom_quality_reduction(rule)
106 | total_quality_score += custom_quality_reduction
107 |
108 | # Apply a custom score if the rule has shown to be
109 | # prone to false positives
110 | custom_score = retrieve_custom_score(rule)
111 | if custom_score:
112 | modify_meta_data_value(rule['metadata'], 'score', custom_score)
113 |
114 | # Debug output report the total score of a rule
115 | logging.debug("Rule %s total quality score: %d", rule['rule_name'], total_quality_score)
116 |
117 | # Add the total score to the rule's quality score
118 | rule['metadata'] = modify_yara_rule_quality(rule['metadata'], total_quality_score)
119 |
120 | # Add all issues to the big list of issues
121 | if repo_rule_sets['name'] in repo_issues:
122 | repo_issues[repo_rule_sets['name']].extend(issues)
123 | else:
124 | repo_issues[repo_rule_sets['name']] = issues
125 |
126 | # Add the rule to the list of rules without errors
127 | if not skip_rule:
128 | rules_without_errors.append(rule)
129 |
130 | # Replace the rules in the rule set with the rules without errors
131 | rule_set['rules'] = rules_without_errors
132 |
133 | # Print the issues statistics
134 | logging.info("Issues statistics: %d syntax issues, %d efficiency issues, " +
135 | "%d performance issues, %d critical issues",
136 | issue_statistics['issues_syntax'],
137 | issue_statistics['issues_efficiency'],
138 | issue_statistics['issues_performance'],
139 | issue_statistics['issues_critical'])
140 |
141 | # Log the issues found in the rules to a separate file
142 | write_issues_to_file(repo_issues)
143 |
144 | # Return the processed repos
145 | return processed_yara_repos_copy
146 |
147 |
148 | def write_issues_to_file(rule_issues):
149 | """
150 | Writes the issues found in the rules to a separate file.
151 | """
152 | # Write the issues to a file
153 | with open("yara-forge-rule-issues.yml", "w", encoding="utf-8") as f:
154 | # Write a comment on top of the YAML file that explains what the file contains
155 | f.write("# This file contains the issues found in the YARA rules during the QA checks\n")
156 | f.write("# The issues are grouped by repository\n")
157 | f.write("# Important: remember that the issues have different severity levels (1-4)\n")
158 | f.write("# - 1: only cosmetic or minor issues\n")
159 | f.write("# - 2: issues that have a minor impact on performance / resource usage\n")
160 | f.write("# - 3: issues that have a major impact on performance / resource usage and show a lack of care\n")
161 | f.write("# - 4: issues that are critical; mostly it's a broken rule or rules that use external variables (not available in every tool)\n")
162 | # Write a timestamp and some statistics
163 | f.write(f"# Timestamp: {datetime.datetime.now()}\n")
164 | f.write(f"# Total number of issues: {sum(len(v) for v in rule_issues.values())}\n")
165 | f.write(f"# Total number of repositories: {len(rule_issues)}\n")
166 | # Write the issues to the file
167 | yaml.dump(rule_issues, f, sort_keys=False, allow_unicode=True)
168 |
169 |
170 | def retrieve_custom_quality_reduction(rule):
171 | """
172 | Retrieves a custom quality score reduction for a rule.
173 | """
174 | # Read the scores from the YAML file named yara-forge-custom-scoring.yml
175 | with open('yara-forge-custom-scoring.yml', 'r', encoding='utf-8') as f:
176 | custom_scoring = yaml.safe_load(f)
177 | # Loop over the rules in the YAML file
178 | for custom_score in custom_scoring['noisy-rules']:
179 | # Check if the rule name matches
180 | if custom_score['name'] == rule['rule_name']:
181 | if 'quality' in custom_score:
182 | # Return the score reduction
183 | return custom_score['quality']
184 | # Check if the rule name starts with the name in the YAML file
185 | if 'type' in custom_score:
186 | if custom_score['type'] == 'prefix':
187 | if rule['rule_name'].startswith(custom_score['name']):
188 | if 'quality' in custom_score:
189 | # Return the score reduction
190 | return custom_score['quality']
191 | return 0
192 |
193 |
194 | def retrieve_custom_score(rule):
195 | """
196 | Retrieves a custom score for a rule.
197 | """
198 | # Read the scores from the YAML file named yara-forge-custom-scoring.yml
199 | with open('yara-forge-custom-scoring.yml', 'r', encoding='utf-8') as f:
200 | custom_scoring = yaml.safe_load(f)
201 | # Loop over the rules in the YAML file
202 | for custom_score in custom_scoring['noisy-rules']:
203 | # Check if the rule name matches
204 | if custom_score['name'] == rule['rule_name']:
205 | if 'score' in custom_score:
206 | # Return the score reduction
207 | return custom_score['score']
208 | # Check if the rule name starts with the name in the YAML file
209 | if 'type' in custom_score:
210 | if custom_score['type'] == 'prefix':
211 | if rule['rule_name'].startswith(custom_score['name']):
212 | if 'score' in custom_score:
213 | # Return the score reduction
214 | return custom_score['score']
215 | return None
216 |
217 |
218 |
219 |
220 | def check_syntax_issues(rule):
221 | """
222 | Checks for syntax issues in a YARA rule.
223 | """
224 | # Syntax issues list
225 | issues = []
226 |
227 | # Check if the rule requires some private rules
228 | prepended_private_rules_string = ""
229 | if 'private_rules_used' in rule:
230 | for priv_rule in rule['private_rules_used']:
231 | # Get the rule from the plyara object
232 | priv_rule_string = rebuild_yara_rule(priv_rule["rule"])
233 | # Add the rule to the string
234 | prepended_private_rules_string += priv_rule_string + "\n"
235 |
236 | # Get the serialized rule from the plyara object
237 | yara_rule_string = prepended_private_rules_string + rebuild_yara_rule(rule)
238 |
239 | # Compile the rule
240 | try:
241 | # Check for warnings
242 | yara.compile(source=yara_rule_string, error_on_warning=True)
243 | except Exception as e:
244 | issues.append({
245 | "rule": rule['rule_name'],
246 | "id": "SI2",
247 | "issue": "The rule didn't compile without issues",
248 | "element": {f"Error: {e}"},
249 | "level": 3,
250 | "type": "logic",
251 | "recommendation": "Check the warning message and fix the rule syntax",
252 | })
253 | return issues
254 |
255 |
256 | def check_issues_critical(rule):
257 | """
258 | Checks for critical syntax issues in a YARA rule.
259 | """
260 | # Syntax issues list
261 | issues = []
262 |
263 | # Check if the rule requires some private rules
264 | prepended_private_rules_string = ""
265 | if 'private_rules_used' in rule:
266 | for priv_rule in rule['private_rules_used']:
267 | # Get the rule from the plyara object
268 | priv_rule_string = rebuild_yara_rule(priv_rule["rule"])
269 | # Add the rule to the string
270 | prepended_private_rules_string += priv_rule_string + "\n"
271 |
272 | # Get the serialized rule from the plyara object
273 | yara_rule_string = prepended_private_rules_string + rebuild_yara_rule(rule)
274 |
275 | # Compile the rule
276 | try:
277 | # Check for errors
278 | yara.compile(source=yara_rule_string)
279 | except Exception as e:
280 | issues.append({
281 | "rule": rule['rule_name'],
282 | "id": "SI1",
283 | "issue": "The rule didn't compile without errors",
284 | "element": {f"Error: {e}"},
285 | "level": 4,
286 | "type": "logic",
287 | "recommendation": "Fix the rule syntax and try again",
288 | })
289 | logging.debug("Rule %s has critical issues and cannot be used: %s", rule['rule_name'], yara_rule_string)
290 | return issues
291 |
292 |
293 | def modify_yara_rule_quality(rule_meta_data, reduction_value):
294 | """
295 | Modifies the quality score of a YARA rule.
296 | """
297 | # We create a copy so that we can delete elements from the original
298 | meta_data_copy = rule_meta_data.copy()
299 | # Now we loop over the copy
300 | for mdata in meta_data_copy:
301 | for k, v in mdata.items():
302 | # If the key is in the meta data, then we modify it
303 | if k == "quality":
304 | mdata[k] += reduction_value
305 | return meta_data_copy
306 | return rule_meta_data
307 |
308 |
309 | def check_yara_packages(repo_files):
310 | """
311 | Checks the YARA packages for errors.
312 | """
313 | # Loop over the list and print the file names
314 | for repo_file in repo_files:
315 | logging.info("Checking YARA package '%s' in file: %s",
316 | repo_file['name'], repo_file['file_path'])
317 | # Compile the rule set
318 | try:
319 | # Check for errors
320 | yara.compile(filepath=repo_file['file_path'])
321 | except Exception as e:
322 | logging.error("The rule set didn't compile without errors: %s", e)
323 | return False
324 | return True
325 |
326 |
327 | def get_yara_qa_commit_hash():
328 | """
329 | Returns the current commit hash of the lst commit of the YARA QA sub repository.
330 | """
331 | # Get the current commit hash of the YARA QA sub repository
332 | try:
333 | with open(".git/modules/qa/yaraQA/refs/heads/main", "r", encoding="utf-8") as f:
334 | return f.read().strip()
335 | except Exception as e:
336 | logging.warning("Couldn't get the commit hash of the YARA QA repository: %s", e)
337 | return "unknown"
338 |
339 |
340 | def modify_meta_data_value(rule_meta_data, key, value):
341 | """
342 | Modify a value in the meta data, if it exists, otherwise add it
343 | """
344 | # We create a copy so that we can delete elements from the original
345 | meta_data_copy = rule_meta_data.copy()
346 | # Now we loop over the copy
347 | for mdata in meta_data_copy:
348 | for k, _ in mdata.items():
349 | # If the key is in the meta data, then we modify it
350 | if k == key:
351 | mdata[k] = value
352 | return mdata
353 | # If the key is not in the meta data, then we add it
354 | rule_meta_data.append({key: value})
355 | return rule_meta_data
356 |
357 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | plyara==2.2.1
2 | requests
3 | dateparser
4 | yara-python
5 | PyYAML
6 | gitpython
7 | git+https://github.com/facebook/pyre2.git@main#egg=fb-re2
8 |
--------------------------------------------------------------------------------
/tests/test_rule_collector.py:
--------------------------------------------------------------------------------
1 | """
2 | Test the rule collector.
3 | """
4 | import unittest
5 | from main.rule_collector import retrieve_yara_rule_sets
6 |
7 |
8 | class TestRuleCollector(unittest.TestCase):
9 | """
10 | Test the rule collector.
11 | """
12 | def test_retrieve_yara_rule_sets(self):
13 | """
14 | Test the retrieve_yara_rule_sets function.
15 | """
16 | # Mock the inputs
17 | repo_staging_dir = './repos'
18 | yara_repos = [{'name': 'test', 'author': 'test', 'url': 'https://github.com/Neo23x0/YARA-Style-Guide', 'branch': 'master', 'quality': 90}]
19 |
20 | # Call the function
21 | result = retrieve_yara_rule_sets(repo_staging_dir, yara_repos)
22 |
23 | # Check the result
24 | self.assertEqual(len(result), 1)
25 | self.assertEqual(result[0]['name'], 'test')
26 | self.assertEqual(len(result[0]['rules_sets']), 6)
27 | self.assertEqual(len(result[0]['rules_sets'][0]['rules']), 2)
28 |
29 |
30 | if __name__ == '__main__':
31 | unittest.main()
--------------------------------------------------------------------------------
/yara-forge-config.yml:
--------------------------------------------------------------------------------
1 | # Collection -------------------------------------------------------------------
2 | repo_staging_dir: "./repos"
3 | yara_repositories:
4 | - name: "ReversingLabs"
5 | url: "https://github.com/reversinglabs/reversinglabs-yara-rules/"
6 | author: "ReversingLabs"
7 | quality: 90
8 | branch: "develop"
9 | # NOTE: Elastic repository currently disabled due to licensing concerns.
10 | # The Elastic License v2 restricts redistribution via hosted services or products,
11 | # which conflicts with how YARA Forge packages and delivers rule sets.
12 | # To respect the terms of their license, this source is excluded for now.
13 | # Hopefully, this can be revisited if the licensing situation changes.
14 | # See: https://github.com/elastic/protections-artifacts/issues/4
15 | #- name: "Elastic"
16 | # url: "https://github.com/elastic/protections-artifacts/"
17 | # author: "Elastic"
18 | # quality: 75
19 | # branch: "main"
20 | - name: "R3c0nst"
21 | url: "https://github.com/fboldewin/YARA-rules/"
22 | author: "Frank Boldewin"
23 | quality: 90
24 | branch: "master"
25 | - name: "CAPE"
26 | url: "https://github.com/kevoreilly/CAPEv2"
27 | author: "Kevin O'Reilly"
28 | quality: 70
29 | branch: "master"
30 | - name: "BinaryAlert"
31 | url: "https://github.com/airbnb/binaryalert/"
32 | author: "Airbnb"
33 | quality: 80
34 | branch: "master"
35 | path: "rules/public"
36 | - name: "DeadBits"
37 | url: "https://github.com/deadbits/yara-rules/"
38 | author: "Adam Swanda"
39 | quality: 80
40 | branch: "master"
41 | path: "rules"
42 | - name: "DelivrTo"
43 | url: "https://github.com/delivr-to/detections"
44 | author: "Delivr.To"
45 | quality: 80
46 | branch: "main"
47 | path: "yara-rules"
48 | - name: "ESET"
49 | url: "https://github.com/eset/malware-ioc"
50 | author: "ESET TI"
51 | quality: 80
52 | branch: "master"
53 | - name: "FireEye-RT"
54 | url: "https://github.com/mandiant/red_team_tool_countermeasures/"
55 | author: "FireEye"
56 | quality: 75
57 | branch: "master"
58 | path: "rules"
59 | - name: "GCTI"
60 | url: "https://github.com/chronicle/GCTI"
61 | author: "gssincla@google.com"
62 | quality: 85
63 | branch: "main"
64 | path: "YARA"
65 | - name: "Malpedia"
66 | url: "https://github.com/malpedia/signator-rules/"
67 | author: "Felix Bilstein - yara-signator at cocacoding dot com"
68 | quality: 75
69 | branch: "main"
70 | path: "rules"
71 | - name: "Trellix ARC"
72 | url: "https://github.com/advanced-threat-research/Yara-Rules/"
73 | author: "Trellix ARC Team"
74 | quality: 70
75 | branch: "master"
76 | - name: "Arkbird SOLG"
77 | url: "https://github.com/StrangerealIntel/DailyIOC"
78 | author: "Arkbird_SOLG"
79 | quality: 75
80 | branch: "master"
81 | - name: "Telekom Security"
82 | url: "https://github.com/telekom-security/malware_analysis/"
83 | author: "Telekom Security"
84 | quality: 70
85 | branch: "main"
86 | - name: "Volexity"
87 | url: "https://github.com/volexity/threat-intel"
88 | author: "threatintel@volexity.com"
89 | quality: 80
90 | branch: "main"
91 | - name: "JPCERTCC"
92 | url: "https://github.com/JPCERTCC/MalConfScan/"
93 | author: "JPCERT/CC Incident Response Group"
94 | quality: 80
95 | branch: "master"
96 | path: "yara"
97 | - name: "SecuInfra"
98 | url: "https://github.com/SIFalcon/Detection"
99 | author: "SIFalcon"
100 | quality: 70
101 | branch: "main"
102 | path: "Yara"
103 | - name: "RussianPanda"
104 | url: "https://github.com/RussianPanda95/Yara-Rules"
105 | author: "RussianPanda"
106 | quality: 85
107 | branch: "main"
108 |
109 | # Open Source YARA Rules collection by Michael Worth
110 | - name: "CadoSecurity"
111 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
112 | author: "CadoSecurity"
113 | quality: 80
114 | branch: "master"
115 | path: "Cado%20Security"
116 | - name: "Check Point"
117 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
118 | author: "CheckPoint Research"
119 | quality: 85
120 | branch: "master"
121 | path: "Checkpoint"
122 | - name: "BlackBerry"
123 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
124 | author: "BlackBerry Threat Research Team"
125 | quality: 85
126 | branch: "master"
127 | path: "BlackBerry"
128 | - name: "Cluster25"
129 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
130 | author: "Cluster25"
131 | quality: 75
132 | branch: "master"
133 | path: "Cluster%2025"
134 | - name: "Dragon Threat Labs"
135 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
136 | author: "@dragonthreatlab"
137 | quality: 80
138 | branch: "master"
139 | path: "Dragonthreatlabs"
140 | - name: "Microsoft"
141 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
142 | author: "Microsoft"
143 | quality: 80
144 | branch: "master"
145 | path: "Microsoft"
146 | - name: "NCSC"
147 | url: "https://github.com/mikesxrs/Open-Source-YARA-rules"
148 | author: "NCSC"
149 | quality: 80
150 | branch: "master"
151 | path: "NCSC"
152 |
153 | # Repos added 23.12.2023
154 | - name: "Dr4k0nia"
155 | url: "https://github.com/dr4k0nia/yara-rules"
156 | author: "Dr4k0nia"
157 | quality: 85
158 | branch: "main"
159 | - name: "EmbeeResearch"
160 | url: "https://github.com/embee-research/Yara-detection-rules/"
161 | author: "Matthew Brennan"
162 | quality: 75
163 | branch: "main"
164 | - name: "AvastTI"
165 | url: "https://github.com/avast/ioc"
166 | author: "Avast Threat Intel Team"
167 | quality: 90
168 | branch: "master"
169 | - name: "SBousseaden"
170 | url: "https://github.com/sbousseaden/YaraHunts/"
171 | author: "SBousseaden"
172 | quality: 75
173 | branch: "master"
174 | - name: "Elceef"
175 | url: "https://github.com/elceef/yara-rulz"
176 | author: "marcin@ulikowski.pl"
177 | quality: 75
178 | branch: "main"
179 |
180 | # Repos added 24.12.2023
181 | - name: "GodModeRules"
182 | url: "https://github.com/Neo23x0/god-mode-rules/"
183 | author: "Florian Roth"
184 | quality: 85
185 | branch: "master"
186 |
187 | # Repos added 05.01.2024
188 | - name: "Cod3nym"
189 | url: "https://github.com/cod3nym/detection-rules/"
190 | author: "Jonathan Peters"
191 | quality: 80
192 | branch: "main"
193 |
194 | # Repos added 11.01.2024
195 | - name: "craiu"
196 | url: "https://github.com/craiu/yararules"
197 | author: "Costin G. Raiu, Art of Noh, craiu@noh.ro"
198 | quality: 85
199 | branch: "master"
200 |
201 | # Repos added 25.01.2024
202 | - name: "DitekSHen"
203 | url: "https://github.com/ditekshen/detection"
204 | author: "ditekshen"
205 | quality: 75
206 | branch: "master"
207 | path: "yara"
208 |
209 | # Repos added 17.04.2024
210 | - name: "WithSecureLabs"
211 | url: "https://github.com/WithSecureLabs/iocs"
212 | author: "WithSecureLabs"
213 | quality: 75
214 | branch: "master"
215 |
216 | # Repos added 21.04.2024
217 | - name: "HarfangLab"
218 | url: "https://github.com/HarfangLab/iocs"
219 | author: "HarfangLab"
220 | quality: 80
221 | branch: "main"
222 |
223 | # Repos added 06.08.2024
224 | - name: "LOLDrivers"
225 | url: "https://github.com/magicsword-io/LOLDrivers/"
226 | author: "LOLDrivers"
227 | quality: 80
228 | branch: "main"
229 | recursive: false
230 | path: 'detections/yara'
231 |
232 | # Repos added 22.12.2024
233 | - name: "SEKOIA"
234 | url: "https://github.com/SEKOIA-IO/Community"
235 | author: "Sekoia.io"
236 | quality: 80
237 | branch: "main"
238 | recursive: false
239 | path: 'yara_rules'
240 |
241 | # My own YARA rule collection used for our free scanners
242 | - name: "Signature Base"
243 | url: "https://github.com/Neo23x0/signature-base"
244 | author: "Florian Roth"
245 | quality: 85
246 | branch: "master"
247 | path: "yara"
248 |
249 | # Rule Processing --------------------------------------------------------------
250 | rule_base_score: 75
251 |
252 | # Quality Checks ---------------------------------------------------------------
253 | # Explanations for the different issue levels used in the rule quality analysis
254 | # Level 1 - cosmetic issues with the rule
255 | # Level 2 - minor issues with the rule
256 | # Level 3 - major issues with the rule
257 | # Level 4 - critical issues with the rule
258 |
259 | # Levels and quality score reduction
260 | issue_levels:
261 | 1: -2
262 | 2: -25
263 | 3: -70
264 | 4: -1000
265 |
266 | # Default quality of rules
267 | # 0 - no quality
268 | # 100 - high quality
269 | #default_quality: 75 # not used anymore
270 |
271 | # Fixed order in the meta data values
272 | meta_data_order:
273 | - description
274 | - author
275 | - id
276 | - date
277 | - modified
278 | - old_rule_name
279 | - reference
280 | - source_url
281 | - license_url
282 | - hash
283 | - logic_hash
284 | - score
285 | - quality
286 | - tags
287 |
288 | # Rule Package Output ----------------------------------------------------------
289 | yara_rule_packages:
290 | - name: "core"
291 | description: "Default YARA Rule Package - Core"
292 | minimum_quality: 70
293 | force_include_importance_level: 80
294 | force_exclude_importance_level: 50
295 | minimum_age: 1
296 | minimum_score: 65
297 | max_age: 2500 # ~ 7 years
298 | - name: "extended"
299 | description: "Default YARA Rule Package - Extended"
300 | minimum_quality: 50
301 | force_include_importance_level: 70
302 | force_exclude_importance_level: 20
303 | minimum_age: 1
304 | minimum_score: 60
305 | max_age: 5000 # ~ 14 years
306 | - name: "full"
307 | description: "Default YARA Rule Package - Full"
308 | minimum_quality: 20
309 | force_include_importance_level: 50
310 | force_exclude_importance_level: 0
311 | minimum_age: 0
312 | minimum_score: 40
313 | max_age: 10000 # ~ 30 years
314 |
315 | rule_set_header: |
316 | /*
317 | * YARA-Forge YARA Rule Package
318 | * https://github.com/YARAHQ/yara-forge
319 | *
320 | * Rule Package Information
321 | * Name: {rule_package_name}
322 | * Description: {rule_package_description}
323 | * YARA-Forge Version: {program_version}
324 | * YARA-QA Commit: {yaraqa_commit}
325 | * Minimum Quality: {rule_package_minimum_quality}
326 | * Force Include Importance Level: {rule_package_force_include_importance_level}
327 | * Force Exclude Importance Level: {rule_package_force_exclude_importance_level}
328 | * Minimum Age (in days): {rule_package_minimum_age}
329 | * Minimum Score: {rule_package_minimum_score}
330 | * Creation Date: {retrieval_date}
331 | * Number of Rules: {total_rules}
332 | * Skipped: {total_rules_skipped_age} (age), {total_rules_skipped_quality} (quality), {total_rules_skipped_score} (score), {total_rules_skipped_importance} (importance)
333 | */
334 |
335 | repo_header: |
336 | /*
337 | * YARA Rule Set
338 | * Repository Name: {repo_name}
339 | * Repository: {repo_url}
340 | * Retrieval Date: {retrieval_date}
341 | * Git Commit: {repo_commit}
342 | * Number of Rules: {total_rules}
343 | * Skipped: {total_rules_skipped_age} (age), {total_rules_skipped_quality} (quality), {total_rules_skipped_score} (score), {total_rules_skipped_importance} (importance)
344 | *
345 | *
346 | * LICENSE
347 | *
348 | * {repo_license}
349 | */
350 |
--------------------------------------------------------------------------------
/yara-forge-custom-scoring.yml:
--------------------------------------------------------------------------------
1 | # Description: Custom scoring configuration file for YARA rules
2 |
3 | # Importance score configuration
4 | # repo - the name of the repository used in the configuration - checked as equals
5 | # file - the name of the file used in the configuration - checked as endswith
6 | # name - the name of the rule used in the configuration - checked as startswith
7 | # importance - the importance score of the rule
8 | importance-scores:
9 | - rule:
10 | repo: "ReversingLabs"
11 | importance: 25
12 | - rule:
13 | repo: "Signature Base"
14 | file: "gen_webshells.yar"
15 | importance: 70
16 | - rule:
17 | repo: "YARA Style Guide"
18 | file: "gen_webshells.yar"
19 | importance: 100
20 | - rule:
21 | repo: "GodModeRules"
22 | importance: 60
23 | - rule:
24 | repo: "DitekSHen"
25 | file: "indicator_packed.yar"
26 | importance: 20
27 | - rule:
28 | repo: "DitekSHen"
29 | file: "indicator_suspicious.yar"
30 | importance: 20
31 | - rule:
32 | repo: "DitekSHen"
33 | file: "indicator_knownbad_certs.yar"
34 | importance: 20
35 |
36 | # FALSE POSITIVES
37 | # Rules that are prone to false positives
38 | # The level of the value indicates the amount of false positives caused by the rule
39 | noisy-rules:
40 | # CAPE
41 | - name: "CAPE_Cobaltstrikebeacon"
42 | quality: -90
43 | - name: "CAPE_Emotet"
44 | quality: -100
45 | - name: "CAPE_Pafish"
46 | quality: -60
47 | - name: "CAPE_Bumblebeeloader"
48 | quality: -80
49 | - name: "CAPE_Bumblebeeshellcode"
50 | quality: -80
51 | - name: "CAPE_Heavensgate"
52 | quality: -80
53 | - name: "CAPE_Origin"
54 | quality: -80
55 | - name: "CAPE_Qakbot"
56 | quality: -50
57 | - name: "CAPE_Agentteslaxor"
58 | quality: -50
59 | - name: "CAPE_UPX"
60 | quality: -40
61 | score: 30
62 | - name: "CAPE_NSIS"
63 | quality: -40
64 | score: 30
65 | - name: "CAPE_Syscall"
66 | quality: -40
67 | score: 30
68 | - name: "CAPE_Sparkrat"
69 | quality: -80
70 |
71 | # Elastic
72 | - name: "ELASTIC_Multi_EICAR_Ac8F42D6"
73 | quality: -50
74 | - name: "ELASTIC_Linux_Trojan_Torii_Fa253F2A"
75 | quality: -80
76 | - name: "ELASTIC_Windows_Hacktool_Cheatengine_Fedac96D"
77 | quality: -40
78 | - name: "ELASTIC_Linux_Worm_Generic_98Efcd38"
79 | quality: -50
80 | score: 60
81 | - name: "ELASTIC_Linux_Trojan_Torii_Fa253F2A"
82 | quality: -50
83 | - name: "ELASTIC_Linux_Trojan_Iroffer_53692410"
84 | quality: -50
85 | score: 60
86 | - name: "ELASTIC_Linux_Trojan_Iroffer_013E07De"
87 | quality: -50
88 | score: 60
89 | - name: "ELASTIC_Windows_Trojan_Bruteratel_Ade6C9D5"
90 | quality: -30
91 | score: 60
92 | - name: "ELASTIC_Linux_Hacktool_Earthworm_E3Da43E2"
93 | quality: -30
94 | score: 60
95 | - name: "ELASTIC_Linux_Exploit_Lotoor_F8E9F93C"
96 | quality: -80
97 | - name: "ELASTIC_Linux_Trojan_Generic_D3Fe3Fae"
98 | quality: -30
99 | score: 60
100 | - name: "ELASTIC_Windows_Vulndriver_Iqvw_B8B45E6B"
101 | quality: -20
102 | score: 60
103 | - name: "ELASTIC_Windows_Vulndriver_Amifldrv_E387D5Ad"
104 | quality: -20
105 | score: 60
106 | - name: "ELASTIC_Linux_Hacktool_Earthworm_82D5C4Cf"
107 | quality: -30
108 | score: 60
109 | - name: "ELASTIC_Linux_Hacktool_Flooder_678C1145"
110 | quality: -30
111 | score: 60
112 | - name: "ELASTIC_Linux_Trojan_Chinaz_A2140Ca1"
113 | quality: -30
114 | score: 60
115 | - name: "ELASTIC_Linux_Trojan_Gafgyt_94A44Aa5"
116 | quality: -30
117 | score: 60
118 | - name: "ELASTIC_Linux_Trojan_Tsunami_47F93Be2"
119 | quality: -70
120 | score: 60
121 | - name: "ELASTIC_Linux_Exploit_Dirtycow_8555F149"
122 | quality: -80
123 |
124 | # FireEye
125 | - name: "FIREEYE_RT_Hunting_Dotnettojscript_Functions"
126 | quality: -80
127 | - name: "FIREEYE_RT_Hunting_LNK_Win_Genericlauncher"
128 | score: 60
129 | - name: "FIREEYE_RT_APT_Backdoor_Win_Dshell_2"
130 | quality: -30
131 | score: 60
132 |
133 | # Tellix / McAfee
134 | - name: "MCAFEE_ATR_Vbs_Mykins_Botnet"
135 | quality: -30
136 | score: 60
137 | - name: "TRELLIX_ARC_Vbs_Mykins_Botnet"
138 | quality: -30
139 | score: 60
140 |
141 | # Telekom Security
142 | - name: "TELEKOM_SECURITY_Allow_Rdp_Session_Without_Password"
143 | quality: -60
144 | - name: "TELEKOM_SECURITY_Cn_Utf8_Windows_Terminal"
145 | quality: -50
146 | - name: "TELEKOM_SECURITY_Potential_Termserv_Dll_Replacement"
147 | quality: -70
148 | - name: "TELEKOM_SECURITY_Rdp_Change_Port_Number"
149 | quality: -60
150 | - name: "TELEKOM_SECURITY_Rdp_Enable_Multiple_Sessions"
151 | quality: -60
152 | score: 60
153 | - name: "TELEKOM_SECURITY_Cn_Utf8_Windows_Terminal"
154 | quality: -90
155 | score: 40
156 |
157 | # ESET
158 | - name: "ESET_Skip20_Sqllang_Hook"
159 | quality: -80
160 | - name: "ESET_Turla_Outlook_Pdf"
161 | quality: -60
162 | score: 60
163 |
164 | # Arkbird SOLG
165 | - name: "ARKBIRD_SOLG_APT_Lazarus_Loader_Dec_2020_1"
166 | quality: -70
167 | - name: "ARKBIRD_SOLG_APT_Dustsquad_PE_Nov19_1"
168 | quality: -70
169 | - name: "ARKBIRD_SOLG_APT_Dustsquad_PE_Nov19_2"
170 | quality: -70
171 | - name: "ARKBIRD_SOLG_SR_APT_Dustsquad_PE_Nov19"
172 | quality: -70
173 | - name: "ARKBIRD_SOLG_APT_APT27_Hyperbro_Apr_2021_1"
174 | quality: -40
175 | - name: "ARKBIRD_SOLG_Loader_Buer_Nov_2020_1" # strings with too many matches in large files
176 | quality: -20
177 | - name: "ARKBIRD_SOLG_Ins_NSIS_Buer_Nov_2020_1"
178 | quality: -60
179 | - name: "ARKBIRD_SOLG_Ins_NSIS_Buer_Nov_2020_2"
180 | quality: -60
181 | - name: "ARKBIRD_SOLG_APT_APT28_Zekapab_Mar_2021_1"
182 | quality: -40
183 | score: 60
184 | - name: "ARKBIRD_SOLG_RAN_ELF_Hive_Oct_2021_1"
185 | quality: -40
186 | score: 60
187 |
188 | # Malpedia
189 | - name: "MALPEDIA_Win_Xtunnel_Auto"
190 | quality: -60
191 | - name: "MALPEDIA_Win_Telebot_Auto"
192 | quality: -90
193 | - name: "MALPEDIA_Win_Flawedammyy_Auto"
194 | quality: -40
195 | - name: "MALPEDIA_Win_Hookinjex_Auto"
196 | quality: -50
197 | - name: "MALPEDIA_Win_R980_Auto"
198 | quality: -30
199 | - name: "MALPEDIA_Win_Velso_Auto"
200 | quality: -60
201 | - name: "MALPEDIA_Win_Rdat_Auto"
202 | quality: -30
203 | score: 60
204 | - name: "MALPEDIA_Win_Qtbot_Auto"
205 | quality: -50
206 | score: 60
207 | - name: "MALPEDIA_Win_Strongpity_Auto"
208 | quality: -30
209 | score: 60
210 | - name: "MALPEDIA_Win_Synccrypt_Auto"
211 | quality: -30
212 | - name: "MALPEDIA_Win_Lorenz_Auto"
213 | quality: -30
214 | score: 60
215 | - name: "MALPEDIA_Win_Gauss_Auto"
216 | quality: -60
217 | score: 60
218 | - name: "MALPEDIA_Win_Kleptoparasite_Stealer_Auto"
219 | quality: -40
220 | score: 60
221 | - name: "MALPEDIA_Elf_Gobrat_Auto"
222 | quality: -40
223 | score: 60
224 | - name: "MALPEDIA_Win_Neutrino_Auto"
225 | quality: -30
226 | score: 60
227 | - name: "MALPEDIA_Win_Alina_Pos_Auto"
228 | quality: -60
229 | score: 60
230 | - name: "MALPEDIA_Elf_Blackcat_Auto"
231 | quality: -30
232 | score: 60
233 | - name: "MALPEDIA_Win_Valley_Rat_Auto"
234 | quality: -30
235 | score: 60
236 | - name: "MALPEDIA_Win_Epsilon_Red_Auto"
237 | quality: -60
238 | score: 60
239 | - name: "MALPEDIA_Win_Hookinjex_Auto"
240 | quality: -50
241 | score: 60
242 | - name: "MALPEDIA_Win_Rektloader_Auto"
243 | quality: -70
244 | score: 60
245 | - name: "MALPEDIA_Win_Xfilesstealer_Auto"
246 | quality: -60
247 | score: 60
248 | - name: "MALPEDIA_Win_Magic_Rat_Auto"
249 | quality: -30
250 | score: 60
251 | - name: "MALPEDIA_Win_Infy_Auto"
252 | quality: -30
253 | score: 60
254 | - name: "MALPEDIA_Win_Goldbackdoor_Auto"
255 | quality: -60
256 | score: 60
257 | - name: "MALPEDIA_Win_Blister_Auto"
258 | quality: -50
259 | score: 60
260 | - name: "MALPEDIA_Win_Aresloader_Auto"
261 | quality: -50
262 | score: 60
263 | - name: "MALPEDIA_Win_Confucius_Auto"
264 | quality: -60
265 | score: 60
266 | - name: "MALPEDIA_Win_Blacksoul_Auto"
267 | quality: -60
268 | score: 60
269 | - name: "MALPEDIA_Win_Winmm_Auto"
270 | quality: -40
271 | score: 60
272 | - name: "MALPEDIA_Win_Blacksuit_Auto"
273 | quality: -60
274 | score: 60
275 | - name: "MALPEDIA_Win_Subzero_Auto"
276 | quality: -60
277 | score: 60
278 | - name: "MALPEDIA_Win_Xorist_Auto"
279 | quality: -60
280 | score: 60
281 | - name: "MALPEDIA_Win_Unidentified_090_Auto"
282 | quality: -60
283 | score: 60
284 | - name: "MALPEDIA_Win_Maze_Auto" # $sequence_8 = { 41 41 41 41 41 41 41 }
285 | quality: -100
286 | score: 60
287 |
288 | # Signature Base
289 | - name: "SIGNATURE_BASE_Cobaltstrike_C2_Host_Indicator"
290 | quality: -20
291 | score: 60
292 | - name: "SIGNATURE_BASE_APT_Lazarus_Dropper_Jun18_1" # strings with too many matches in large files
293 | quality: -20
294 | score: 60
295 | - name: "SIGNATURE_BASE_M_APT_VIRTUALPITA_1"
296 | quality: -40
297 | score: 60
298 | - name: "SIGNATURE_BASE_Hdconfig"
299 | quality: -30
300 | score: 60
301 |
302 | # DeadBits
303 | - name: "DEADBITS_APT32_Kerrdown"
304 | quality: -100
305 | - name: "DEADBITS_Glupteba"
306 | quality: -70
307 |
308 | # BinaryAlert
309 | - name: "BINARYALERT_Ransomware_Windows_Wannacry"
310 | quality: -30
311 | - name: "BINARYALERT_Eicar_Substring_Test"
312 | quality: -40
313 | score: 50
314 |
315 | # JPCERT
316 | - name: "JPCERTCC_Plugx"
317 | quality: -70
318 | score: 60
319 | - name: "JPCERTCC_Ursnif"
320 | quality: -70
321 | score: 60
322 | - name: "JPCERTCC_Ursnif_1"
323 | quality: -20
324 | score: 60
325 | - name: "JPCERTCC_Cobaltstrike"
326 | quality: -70
327 | score: 60
328 | - name: "JPCERTCC_Datper"
329 | quality: -50
330 | score: 60
331 |
332 | # Binary Alert
333 | - name: "BINARYALERT_Hacktool_Windows_Mimikatz_Modules"
334 | quality: -70
335 | score: 60
336 |
337 | # Delivr.to
338 | - name: "DELIVRTO_SUSP_SVG_Onload_Onerror_Jul23"
339 | quality: -100
340 | - name: "DELIVRTO_SUSP_HTML_B64_WASM_Blob"
341 | quality: -80
342 |
343 | # SecuInfra
344 | - name: "SECUINFRA_OBFUS_Powershell_Common_Replace"
345 | quality: -100
346 | - name: "SECUINFRA_SUSP_Websites"
347 | quality: -80
348 | - name: "SECUINFRA_SUSP_EXE_In_ISO"
349 | quality: -80
350 | - name: "SECUINFRA_SUSP_VBS_In_ISO"
351 | quality: -60
352 | - name: "SECUINFRA_APT_Bitter_Zxxz_Downloader"
353 | quality: -50
354 | - name: "SECUINFRA_SUSP_Reverse_DOS_Header"
355 | quality: -80
356 | - name: "SECUINFRA_SUSP_LNK_Powershell"
357 | quality: -80
358 | score: 40
359 | - name: "SECUINFRA_SUSP_LNK_CMD"
360 | quality: -80
361 | score: 40
362 | - name: "SECUINFRA_SUSP_Ngrok_URL"
363 | quality: -80
364 | score: 40
365 | - name: "SECUINFRA_SUSP_Powershell_Base64_Decode"
366 | quality: -20
367 | score: 60
368 | - name: "SECUINFRA_SUSP_Scheduled_Tasks_Create_From_Susp_Dir"
369 | quality: -20
370 | score: 60
371 | - name: "SECUINFRA_SUSP_VBS_Wscript_Shell"
372 | quality: -60
373 | score: 45
374 | - name: "SECUINFRA_SUS_Unsigned_APPX_MSIX_Installer_Feb23"
375 | quality: -40
376 | score: 50
377 |
378 | # BlackBerry
379 | - name: "BLACKBERRY_Mal_Infostealer_Win32_Jupyter_Infostealer_Module"
380 | quality: -80
381 |
382 | # GCTI
383 | - name: "GCTI_Sliver_Implant_32Bit"
384 | quality: -50
385 | score: 60
386 |
387 | # EmbeeResearch
388 | - name: "EMBEERESEARCH_Win_Havoc_Ntdll_Hashes_Oct_2022"
389 | quality: -80
390 | score: 40
391 | - name: "EMBEERESEARCH_Win_Redline_Wextract_Hunting_Oct_2023"
392 | quality: -60
393 | score: 60
394 | - name: "EMBEERESEARCH_Win_Amadey_Bytecodes_Oct_2023"
395 | quality: -60
396 | score: 60
397 | - name: "EMBEERESEARCH_Win_Bruteratel_Syscall_Hashes_Oct_2022"
398 | quality: -50
399 | score: 60
400 | - name: "EMBEERESEARCH_Win_Asyncrat_Unobfuscated" # Rule requires new YARA feature 4.3+
401 | quality: -100
402 |
403 | # SBousseaden
404 | - name: "SBOUSSEADEN_Truncated_Win10_X64_Nativesyscall"
405 | quality: -90
406 | score: 40
407 | - name: "SBOUSSEADEN_Hunt_Skyproj_Backdoor"
408 | quality: -70
409 | score: 40
410 | - name: "SBOUSSEADEN_Hunt_Multi_EDR_Discovery"
411 | quality: -70
412 | score: 40
413 | - name: "SBOUSSEADEN_Hunt_Lsass_Ntds_Ext"
414 | quality: -70
415 | score: 40
416 | - name: "SBOUSSEADEN_Hunt_Credaccess_Iis_Xor"
417 | quality: -30
418 | score: 60
419 | - name: "SBOUSSEADEN_Mem_Webcreds_Regexp_Xor"
420 | quality: -30
421 | score: 60
422 |
423 | # Dr4k0nia
424 | - name: "DR4K0NIA_Msil_Suspicious_Use_Of_Strreverse"
425 | quality: -30
426 | score: 60
427 |
428 | # AvastTI
429 | - name: "AVASTTI_Manjusaka_Payload_Mz"
430 | quality: -40
431 | score: 60
432 |
433 | # Cod3nym
434 | - name: "COD3NYM_SUSP_OBF_NET_Confuserex_Name_Pattern_Jan24"
435 | quality: 0
436 | score: 50
437 | - name: "COD3NYM_SUSP_OBF_NET_Eazfuscator_String_Encryption_Jan24"
438 | quality: 0
439 | score: 50
440 |
441 | # RussianPanda
442 | - name: "RUSSIANPANDA_Check_Installed_Software"
443 | quality: -50
444 | score: 45
445 |
446 | # ditekShen
447 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_"
448 | quality: -30
449 | score: 40
450 | type: "prefix"
451 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_Finger_Download_Pattern"
452 | quality: -30
453 | score: 50
454 | - name: "DITEKSHEN_INDICATOR_OLE_EXPLOIT_CVE_2017_11882_1"
455 | quality: -100
456 | score: 30
457 | - name: "DITEKSHEN_INDICATOR_RTF_Remotetemplate"
458 | quality: -40
459 | score: 60
460 | - name: "DITEKSHEN_INDICATOR_PDF_Ipdropper"
461 | quality: -40
462 | score: 60
463 | - name: "DITEKSHEN_INDICATOR_KB_CERT_62E745E92165213C971F5C490Aea12A5"
464 | quality: -100
465 | score: 40
466 | - name: "DITEKSHEN_INDICATOR_KB_CERT_43Bb437D609866286Dd839E1D00309F5"
467 | quality: -100
468 | score: 40
469 | - name: "DITEKSHEN_INDICATOR_KB_CERT_23389161E45A218Bd24E6E859Ae11153"
470 | quality: -100
471 | score: 40
472 | - name: "DITEKSHEN_INDICATOR_KB_CERT_26279F0F2F11970Dccf63Eba88F2D4C4"
473 | quality: -100
474 | score: 40
475 | - name: "DITEKSHEN_INDICATOR_KB_CERT_0D07705Fa0E0C4827Cc287Cfcdec20C4"
476 | quality: -100
477 | score: 40
478 | - name: "DITEKSHEN_INDICATOR_KB_CERT_0F9D91C6Aba86F4E54Cbb9Ef57E68346"
479 | quality: -100
480 | score: 40
481 | - name: "DITEKSHEN_INDICATOR_KB_CERT_07F9D80B85Ceff7Ee3F58Dc594Fe66B6"
482 | quality: -100
483 | score: 40
484 | - name: "DITEKSHEN_INDICATOR_KB_CERT_01803Bc7537A1818C4Ab135469963C10"
485 | quality: -70
486 | score: 40
487 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_EXE_Sqlquery_Confidentialdatastore"
488 | quality: -40
489 | score: 60
490 | - name: "DITEKSHEN_INDICATOR_KB_CERT_1F3216F428F850Be2C66Caa056F6D821"
491 | quality: -70
492 | score: 40
493 | - name: "DITEKSHEN_INDICATOR_KB_CERT_7C1118Cbbadc95Da3752C46E47A27438"
494 | quality: -70
495 | score: 40
496 | - name: "DITEKSHEN_INDICATOR_KB_CERT_"
497 | quality: -20
498 | score: 50
499 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_EXE_Regkeycomb_Disablewindefender"
500 | quality: -90
501 | score: 50
502 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_EXE_Nonewindowsua"
503 | quality: -90
504 | score: 50
505 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_EXE_Reversed"
506 | quality: -90
507 | score: 50
508 | - name: "DITEKSHEN_MALWARE_Win_Dlagent02"
509 | quality: -40
510 | score: 60
511 | - name: "DITEKSHEN_INDICATOR_TOOL_WEDGECUT"
512 | quality: -80
513 | score: 50
514 | - name: "DITEKSHEN_MALWARE_Win_Asyncrat"
515 | quality: -40
516 | score: 60
517 | - name: "DITEKSHEN_MALWARE_Osx_Lamepyre"
518 | quality: -40
519 | score: 60
520 | - name: "DITEKSHEN_MALWARE_Win_Strelastealer"
521 | quality: -40
522 | score: 60
523 | - name: "DITEKSHEN_MALWARE_Win_Dlagent02"
524 | quality: -40
525 | score: 60
526 | - name: "DITEKSHEN_MALWARE_Win_Avemaria"
527 | quality: -60
528 | score: 60
529 | - name: "DITEKSHEN_MALWARE_Win_Fabookie_02"
530 | quality: -70
531 | score: 60
532 | - name: "DITEKSHEN_INDICATOR_EXE_Packed_Dotfuscator"
533 | quality: -80
534 | score: 50
535 | - name: "DITEKSHEN_INDICATOR_KB_CERT_0C5396Dcb2949C70Fac48Ab08A07338E"
536 | quality: -90
537 | score: 40
538 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_EXE_Rawgithub_URL"
539 | quality: -90
540 | score: 40
541 | - name: "DITEKSHEN_INDICATOR_KB_CERT_20A20Dfce424E6Bbcc162A5Fcc0972Ee"
542 | quality: -90
543 | score: 40
544 | - name: "DITEKSHEN_INDICATOR_KB_CERT_0B1F8Cd59E64746Beae153Ecca21066B"
545 | quality: -90
546 | score: 40
547 | - name: "DITEKSHEN_INDICATOR_EXE_Packed_"
548 | quality: -70
549 | score: 40
550 | type: "prefix"
551 | - name: "DITEKSHEN_INDICATOR_TOOL_EXP_Serioussam02"
552 | quality: -100
553 | score: 40
554 | - name: "DITEKSHEN_INDICATOR_EXE_Dotnet_Encrypted"
555 | quality: -70
556 | score: 50
557 | - name: "DITEKSHEN_INDICATOR_KB_CERT_04F131322Cc31D92C849Fca351D2F141"
558 | quality: -90
559 | score: 40
560 | - name: "DITEKSHEN_INDICATOR_KB_CERT_3991D810Fb336E5A7D8C2822"
561 | quality: -90
562 | score: 40
563 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_Finger_Download_Pattern"
564 | quality: -70
565 | score: 40
566 | - name: "DITEKSHEN_INDICATOR_SUSPICIOUS_PWSH_Passwordcredential_Retrievepassword"
567 | quality: -70
568 | score: 40
569 | - name: "DITEKSHEN_INDICATOR_KB_CERT_028Aa6E7B516C0D155F15D6290A430E3"
570 | quality: -90
571 | score: 40
572 | - name: "DITEKSHEN_INDICATOR_TOOL_EXP_Apachestrusts"
573 | quality: -90
574 | score: 40
575 |
576 | # WithSecureLabs
577 | - name: "ducktail_artifacts"
578 | quality: -50
579 | score: 60
580 | - name: "ducktail_dotnet_core_infostealer"
581 | quality: -20
582 | - name: "ducktail_exceldna_packed"
583 | quality: -80
584 |
--------------------------------------------------------------------------------
/yara-forge.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: iso-8859-1 -*-
3 | # -*- coding: utf-8 -*-
4 | #
5 | # YARA Forge
6 | # A YARA Rule Concentrator
7 | # Florian Roth
8 | # August 2024
9 |
10 | __version__ = '0.9.0'
11 |
12 | import argparse
13 | #import pprint
14 | import logging
15 | import sys
16 | import yaml
17 |
18 | from main.rule_collector import retrieve_yara_rule_sets
19 | from main.rule_processors import process_yara_rules
20 | from main.rule_output import write_yara_packages
21 | from qa.rule_qa import evaluate_rules_quality, check_yara_packages, get_yara_qa_commit_hash
22 |
23 |
24 |
25 | # Write a section header with dividers
26 | def write_section_header(title, divider_with=72):
27 | print("\n" + "=" * divider_with)
28 | print(title.center(divider_with).upper())
29 | print("=" * divider_with + "\n")
30 |
31 |
32 | if __name__ == "__main__":
33 |
34 | print(r' __ _____ ____ ___ ______ ')
35 | print(r' \ \/ / | / __ \/ | / ____/___ _________ ____ ')
36 | print(r' \ / /| | / /_/ / /| | / /_ / __ \/ ___/ __ `/ _ \ ')
37 | print(r' / / ___ |/ _, _/ ___ | / __/ / /_/ / / / /_/ / __/ ')
38 | print(r' /_/_/ |_/_/ |_/_/ |_| /_/ \____/_/ \__, /\___/ ')
39 | print(r' /____/ ')
40 | print(r' YARA Forge ')
41 | print(r' Bringing Order to Chaos ')
42 | print(r' ')
43 | print(r' Version %s ' % __version__)
44 | print(r' Florian Roth, January 2024 ')
45 |
46 | parser = argparse.ArgumentParser()
47 | parser.add_argument("--debug", help="enable debug output", action="store_true")
48 | parser.add_argument("-c", "--config", help="specify a different config file", default="yara-forge-config.yml")
49 | args = parser.parse_args()
50 |
51 | # Create a new logger to log into the command line and a log file name yara-forge.log
52 | # (only set the level to debug if the debug argument is set)
53 | logger = logging.getLogger()
54 | logger.setLevel(logging.DEBUG if args.debug else logging.INFO)
55 | # Set the level of the plyara logger to warning
56 | logging.getLogger('plyara').setLevel(logging.WARNING)
57 | logging.getLogger('tzlocal').setLevel(logging.CRITICAL)
58 | # Create a handler for the command line
59 | ch = logging.StreamHandler()
60 | ch.setLevel(logging.DEBUG if args.debug else logging.INFO)
61 | # Create a handler for the log file
62 | fh = logging.FileHandler("yara-forge.log")
63 | fh.setLevel(logging.DEBUG)
64 | # Create a formatter for the log messages that go to the log file
65 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
66 | # Create a formatter for the log messages that go to the command line
67 | formatter_cmd = logging.Formatter('%(message)s')
68 | # Add the formatter to the handlers
69 | ch.setFormatter(formatter_cmd)
70 | fh.setFormatter(formatter)
71 | # Add the handlers to the logger
72 | logger.addHandler(ch)
73 | logger.addHandler(fh)
74 |
75 | # Read configuration file
76 | with open(args.config, 'r') as f:
77 | YARA_FORGE_CONFIG = yaml.safe_load(f)
78 |
79 | # Retrieve the YARA rule sets
80 | write_section_header("Retrieving YARA rule sets")
81 | yara_rule_repo_sets = retrieve_yara_rule_sets(
82 | YARA_FORGE_CONFIG['repo_staging_dir'],
83 | YARA_FORGE_CONFIG['yara_repositories'])
84 | #pprint.pprint(yara_rule_repo_sets)
85 |
86 | # Process the YARA rules
87 | write_section_header("Processing YARA rules")
88 | processed_yara_repos = process_yara_rules(yara_rule_repo_sets, YARA_FORGE_CONFIG)
89 |
90 | # Evaluate the quality of the rules
91 | write_section_header("Evaluating YARA rules")
92 | evaluated_yara_repos = evaluate_rules_quality(processed_yara_repos, YARA_FORGE_CONFIG)
93 |
94 | # Write the YARA packages
95 | write_section_header("Writing YARA packages")
96 | repo_files = write_yara_packages(evaluated_yara_repos, program_version=__version__, yaraqa_commit=get_yara_qa_commit_hash(), YARA_FORGE_CONFIG=YARA_FORGE_CONFIG)
97 |
98 | # We quality check the output files and look for errors
99 | write_section_header("Quality checking YARA packages")
100 | test_successful = check_yara_packages(repo_files)
101 | if test_successful:
102 | logging.log(logging.INFO, "Quality check finished successfully")
103 | sys.exit(0)
104 | else:
105 | logging.log(logging.ERROR, "Quality check failed")
106 | sys.exit(1)
107 |
--------------------------------------------------------------------------------