├── .gitignore
├── LICENSE
├── README.md
├── bgpranking
├── __init__.py
├── bgpranking.py
├── default
│ ├── __init__.py
│ ├── abstractmanager.py
│ ├── exceptions.py
│ └── helpers.py
├── helpers.py
├── parsers
│ ├── __init__.py
│ ├── abusech.py
│ ├── abusech_feodo.py
│ ├── abusech_threatfox.py
│ ├── dshield.py
│ ├── malc0de.py
│ ├── nothink.py
│ └── shadowserver.py
└── statsripe.py
├── bin
├── __init__.py
├── archiver.py
├── asn_descriptions.py
├── dbinsert.py
├── fetcher.py
├── manual_ranking.py
├── parser.py
├── ranking.py
├── run_backend.py
├── sanitizer.py
├── shutdown.py
├── ssfetcher.py
├── start.py
├── start_website.py
├── stop.py
└── update.py
├── cache
├── cache.conf
├── run_redis.sh
└── shutdown_redis.sh
├── config
├── generic.json.sample
├── modules
│ ├── Alienvault.json
│ ├── BlocklistDeApache.json
│ ├── BlocklistDeBots.json
│ ├── BlocklistDeFTP.json
│ ├── BlocklistDeIMAP.json
│ ├── BlocklistDeMail.json
│ ├── BlocklistDeSIP.json
│ ├── BlocklistDeSSH.json
│ ├── BlocklistDeStrong.json
│ ├── CIArmy.json
│ ├── CleanMXMalwares.json
│ ├── CleanMXPhishing.json
│ ├── CleanMXPortals.json
│ ├── CoinBlockerLists.json
│ ├── DshieldDaily.json
│ ├── DshieldTopIPs.json
│ ├── EmergingThreatsCompromized.json
│ ├── FeodotrackerIPBlockList.json
│ ├── Malc0de.json
│ ├── MalwareDomainListIP.json
│ ├── SSLBlacklist.json
│ ├── ThreatFoxIOC.json
│ ├── greensnow.json
│ ├── jq_all_the_things.sh
│ ├── module.schema
│ ├── pop3gropers.json
│ ├── shadowserver_only.sh
│ └── validate_all.sh
└── shadowserver.json.sample
├── poetry.lock
├── pyproject.toml
├── ranking
├── kvrocks.conf
└── run_kvrocks.sh
├── setup.py
├── storage
├── kvrocks.conf
└── run_kvrocks.sh
├── temp
├── intake.conf
├── prepare.conf
├── run_redis.sh
└── shutdown_redis.sh
├── tools
├── 3rdparty.py
├── clear_prepare_db.py
├── migrate.py
├── monitoring.py
└── validate_config_files.py
└── website
├── __init__.py
├── readme.md
└── web
├── __init__.py
├── genericapi.py
├── helpers.py
├── proxied.py
├── static
├── forkme_right_darkblue_121621.png
├── linegraph.css
├── linegraph.js
├── linegraph_country.css
└── linegraph_country.js
└── templates
├── asn.html
├── country.html
├── country_asn_map.html
├── index.html
├── ipasn.html
├── main.html
└── top_forms.html
/.gitignore:
--------------------------------------------------------------------------------
1 | # Local exclude
2 | scraped/
3 | *.swp
4 | lookyloo/ete3_webserver/webapi.py
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *.cover
52 | .hypothesis/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 |
62 | # Flask stuff:
63 | instance/
64 | .webassets-cache
65 |
66 | # Scrapy stuff:
67 | .scrapy
68 |
69 | # Sphinx documentation
70 | docs/_build/
71 |
72 | # PyBuilder
73 | target/
74 |
75 | # Jupyter Notebook
76 | .ipynb_checkpoints
77 |
78 | # pyenv
79 | .python-version
80 |
81 | # celery beat schedule file
82 | celerybeat-schedule
83 |
84 | # SageMath parsed files
85 | *.sage.py
86 |
87 | # dotenv
88 | .env
89 |
90 | # virtualenv
91 | .venv
92 | venv/
93 | ENV/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
108 |
109 | # web
110 | secret_key
111 |
112 | cache.pid
113 | *.rdb
114 |
115 | # Local config files
116 | config/*.json
117 | config/*.json.bkp
118 |
119 | rawdata
120 |
121 | storage/db/
122 | storage/kvrocks*
123 | ranking/db/
124 | ranking/kvrocks*
125 | website/web/static/d3.*.js
126 | website/web/static/bootstrap-select.min.*
127 |
128 | *.pid
129 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published
637 | by the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # BGP Ranking
2 |
3 | For an Internet Service Provider, AS numbers are a logical representation of
4 | the other ISP peering or communicating with its autonomous system. ISP customers
5 | are using the capacity of the Internet Service Provider to reach Internet
6 | services over other AS. Some of those communications can be malicious (e.g. due
7 | to malware activities on an end-user equipments) and hosted at specific AS location.
8 |
9 | In order to provide an improved security view on those AS numbers, a trust ranking
10 | scheme is implemented based on existing dataset of compromised systems,
11 | malware C&C IP and existing datasets. BGP Ranking provides a way to collect
12 | such malicious activities, aggregate the information per ASN and provide a ranking
13 | model to rank the ASN from the most malicious to the less malicious ASN.
14 |
15 | The official website of the project is: [https://github.com/D4-project/bgp-ranking/](https://github.com/D4-project/bgp-ranking/)
16 |
17 | There is a public BGP Ranking at [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/)
18 |
19 | BGP Ranking is free software licensed under the GNU Affero General Public License
20 |
21 | BGP Ranking is a software to rank AS numbers based on their malicious activities.
22 |
23 | # Python client
24 |
25 | ```bash
26 | $ pip install git+https://github.com/D4-project/BGP-Ranking.git/#egg=pybgpranking\&subdirectory=client
27 | $ bgpranking --help
28 | usage: bgpranking [-h] [--url URL] (--asn ASN | --ip IP)
29 |
30 | Run a query against BGP Ranking
31 |
32 | optional arguments:
33 | -h, --help show this help message and exit
34 | --url URL URL of the instance.
35 | --asn ASN ASN to lookup
36 | --ip IP IP to lookup
37 | ```
38 |
39 | ## History
40 |
41 | - The first version of BGP Ranking was done in 2010 by [Raphael Vinot](https://github.com/Rafiot) with the support of [Alexandre Dulaunoy](https://github.com/adulau/).
42 | CIRCL supported the project from the early beginning and setup an online version to share information about the malicious ranking of ISPs.
43 |
44 | - In late 2018 within the scope of the D4 Project (a CIRCL project co-funded by INEA under the CEF Telecom program), a new version of BGP Ranking was completed rewritten in python3.6+ with an ARDB back-end.
45 |
46 | - In January 2022, BGP Ranking version 2.0 was released including a new backend on [kvrocks](https://github.com/KvrocksLabs/kvrocks) and many improvements.
47 |
48 | # Online service
49 |
50 | BGP Ranking service is available online [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/).
51 |
52 | A Python library and client software is [available](https://github.com/D4-project/BGP-Ranking/tree/master/client) using the default API available from bgpranking.circl.lu.
53 |
54 | # CURL Example
55 |
56 | ## Get the ASN from an IP or a prefix
57 | ```bash
58 | curl https://bgpranking-ng.circl.lu/ipasn_history/?ip=143.255.153.0/24
59 | ```
60 |
61 | ## Response
62 |
63 | ```json
64 | {
65 | "meta": {
66 | "address_family": "v4",
67 | "ip": "143.255.153.0/24",
68 | "source": "caida"
69 | },
70 | "response": {
71 | "2019-05-19T12:00:00": {
72 | "asn": "264643",
73 | "prefix": "143.255.153.0/24"
74 | }
75 | }
76 | }
77 | ```
78 |
79 | ## Get the ranking of the AS
80 | ```
81 | curl -X POST -d '{"asn": "5577", "date": "2019-05-19"}' https://bgpranking-ng.circl.lu/json/asn
82 | ```
83 |
84 | Note: `date` isn't required.
85 |
86 | ### Response
87 |
88 | ```json
89 | {
90 | "meta": {
91 | "asn": "5577"
92 | },
93 | "response": {
94 | "asn_description": "ROOT, LU",
95 | "ranking": {
96 | "rank": 0.0004720052083333333,
97 | "position": 7084,
98 | "total_known_asns": 15375
99 | }
100 | }
101 | }
102 | ```
103 |
104 | ## Get historical information for an ASN
105 |
106 | ```
107 | curl -X POST -d '{"asn": "5577", "period": 5}' https://bgpranking-ng.circl.lu/json/asn_history
108 | ```
109 |
110 | ### Response
111 |
112 | ```json
113 | {
114 | "meta": {
115 | "asn": "5577",
116 | "period": 5
117 | },
118 | "response": {
119 | "asn_history": [
120 | [
121 | "2019-11-10",
122 | 0.00036458333333333335
123 | ],
124 | [
125 | "2019-11-11",
126 | 0.00036168981481481485
127 | ],
128 | [
129 | "2019-11-12",
130 | 0.0003761574074074074
131 | ],
132 | [
133 | "2019-11-13",
134 | 0.0003530092592592593
135 | ],
136 | [
137 | "2019-11-14",
138 | 0.0003559027777777778
139 | ]
140 | ]
141 | }
142 | }
143 | ```
144 |
145 |
146 | # Server Installation (if you want to run your own)
147 |
148 | **IMPORTANT**: Use [poetry](https://github.com/python-poetry/poetry#installation)
149 |
150 | **NOTE**: Yes, it requires python3.6+. No, it will never support anything older.
151 |
152 | ## Install redis
153 |
154 | ```bash
155 | git clone https://github.com/antirez/redis.git
156 | cd redis
157 | git checkout 5.0
158 | make
159 | make test
160 | cd ..
161 | ```
162 | **Note**: If it fails, have a look at [the documentation](https://github.com/redis/redis#building-redis).
163 |
164 | ## Install kvrocks
165 |
166 | ```bash
167 | git clone https://github.com/KvrocksLabs/kvrocks.git
168 | cd kvrocks
169 | git checkout 2.5
170 | ./x.py build
171 | cd ..
172 | ```
173 | **Note**: If it fails, have a look at [the documentation](https://github.com/apache/kvrocks#build-and-run-kvrocks).
174 |
175 | ## Install & run BGP Ranking
176 |
177 | ```bash
178 | git clone https://github.com/D4-project/BGP-Ranking.git
179 | cd BGP-Ranking
180 | poetry install
181 | echo BGPRANKING_HOME="'`pwd`'" > .env
182 | poetry shell
183 | # Starts all the backend
184 | start
185 | ```
186 |
187 | ## Shutdown BGP Ranking
188 |
189 | ```bash
190 | stop
191 | ```
192 |
193 | # Directory structure
194 |
195 | *Config files*: `bgpranking / config / *.json`
196 |
197 | *Per-module parsers*: `bgpraking / parsers`
198 |
199 | *Libraries* : `brpranking / libs`
200 |
201 | # Raw dataset directory structure
202 |
203 | ## Files to import
204 |
205 | *Note*: The default location of `` is the root directory of the repo.
206 |
207 | ` / / `
208 |
209 | ## Last modified date (if possible) and lock file
210 |
211 | ` / / / meta`
212 |
213 | ## Imported files less than 2 months old
214 |
215 | ` / / / archive`
216 |
217 | ## Imported files more than 2 months old
218 |
219 | ` / / / archive / deep`
220 |
221 | # Databases
222 |
223 | ## Intake (redis, port 6579)
224 |
225 | *Usage*: All the modules push their entries in this database.
226 |
227 | Creates the following hashes:
228 |
229 | ```python
230 | UUID = {'ip': , 'source': , 'datetime': }
231 | ```
232 |
233 | Creates a set `intake` for further processing containing all the UUIDs.
234 |
235 |
236 | ## Pre-Insert (redis, port 6580)
237 |
238 |
239 | *Usage*: Make sure th IPs are global, validate input from the intake module.
240 |
241 | Pop UUIDs from `intake`, get the hashes with that key
242 |
243 | Creates the following hashes:
244 |
245 | ```python
246 | UUID = {'ip': , 'source': , 'datetime': , 'date': }
247 | ```
248 |
249 | Creates a set `to_insert` for further processing containing all the UUIDs.
250 |
251 | Creates a set `for_ris_lookup` to lookup on the RIS database. Contains all the IPs.
252 |
253 | ## Routing Information Service cache (redis, port 6581)
254 |
255 | *Usage*: Lookup IPs against the RIPE's RIS database
256 |
257 | Pop IPs from `for_ris_lookup`.
258 |
259 | Creates the following hashes:
260 |
261 | ```python
262 | IP = {'asn': , 'prefix': , 'description': }
263 | ```
264 |
265 | ## Ranking Information cache (redis, port 6582)
266 |
267 | *Usage*: Store the current list of known ASNs at RIPE, and the prefixes originating from them.
268 |
269 | Creates the following sets:
270 |
271 | ```python
272 | asns = set([, ...])
273 | |v4 = set([, ...])
274 | |v6 = set([, ...])
275 | ```
276 |
277 | And the following keys:
278 |
279 | ```python
280 | |v4|ipcount =
281 | |v6|ipcount =
282 | ```
283 |
284 | ## Long term storage (kvrocks, port 5188)
285 |
286 | *Usage*: Stores the IPs with the required meta informations required for ranking.
287 |
288 | Pop UUIDs from `to_insert`, get the hashes with that key
289 |
290 | Use the IP from that hash to get the RIS informations.
291 |
292 | Creates the following sets:
293 |
294 | ```python
295 | # All the sources, by day
296 | |sources = set([, ...])
297 | # All the ASNs by source, by day
298 | | -> set([, ...])
299 | # All the prefixes, by ASN, by source, by day
300 | || -> set([, ...])
301 | # All the tuples (ip, datetime), by prefixes, by ASN, by source, by day
302 | ||| -> set([|, ...])
303 | ```
304 |
--------------------------------------------------------------------------------
/bgpranking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/__init__.py
--------------------------------------------------------------------------------
/bgpranking/bgpranking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | import re
6 |
7 | from redis import ConnectionPool, Redis
8 | from redis.connection import UnixDomainSocketConnection
9 |
10 | from .default import get_config, get_socket_path
11 |
12 | from typing import TypeVar, Union, Optional, Dict, Any, List, Tuple
13 | import datetime
14 | from datetime import timedelta
15 | from dateutil.parser import parse
16 | from collections import defaultdict
17 |
18 | import json
19 |
20 | from .default import InvalidDateFormat
21 | from .helpers import get_modules
22 | from .statsripe import StatsRIPE
23 |
24 | Dates = TypeVar('Dates', datetime.datetime, datetime.date, str)
25 |
26 |
27 | class BGPRanking():
28 |
29 | def __init__(self) -> None:
30 | self.logger = logging.getLogger(f'{self.__class__.__name__}')
31 | self.logger.setLevel(get_config('generic', 'loglevel'))
32 |
33 | self.cache_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
34 | path=get_socket_path('cache'), decode_responses=True)
35 |
36 | self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
37 | self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
38 | self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
39 |
40 | @property
41 | def cache(self):
42 | return Redis(connection_pool=self.cache_pool, db=1)
43 |
44 | def check_redis_up(self) -> bool:
45 | return self.cache.ping()
46 |
47 | def __normalize_date(self, date: Optional[Dates]) -> str:
48 | if not date:
49 | return datetime.date.today().isoformat()
50 | if isinstance(date, datetime.datetime):
51 | return date.date().isoformat()
52 | elif isinstance(date, datetime.date):
53 | return date.isoformat()
54 | elif isinstance(date, str):
55 | try:
56 | return parse(date).date().isoformat()
57 | except ValueError:
58 | raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.')
59 |
60 | def _ranking_cache_wrapper(self, key):
61 | if not self.cache.exists(key):
62 | if self.ranking.exists(key):
63 | try:
64 | content: List[Tuple[bytes, float]] = self.ranking.zrangebyscore(key, '-Inf', '+Inf', withscores=True)
65 | # Cache for 10 hours
66 | self.cache.zadd(key, {value: rank for value, rank in content})
67 | self.cache.expire(key, 36000)
68 | except Exception as e:
69 | self.logger.exception(f'Something went poorly when caching {key}.')
70 | raise e
71 |
72 | def asns_global_ranking(self, date: Optional[Dates]=None, source: Union[list, str]='',
73 | ipversion: str='v4', limit: int=100):
74 | '''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
75 | to_return: Dict[str, Any] = {
76 | 'meta': {'ipversion': ipversion, 'limit': limit},
77 | 'source': source,
78 | 'response': set()
79 | }
80 | d = self.__normalize_date(date)
81 | to_return['meta']['date'] = d
82 | if source:
83 | if isinstance(source, list):
84 | keys = []
85 | for s in source:
86 | key = f'{d}|{s}|asns|{ipversion}'
87 | self._ranking_cache_wrapper(key)
88 | keys.append(key)
89 | # union the ranked sets
90 | key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}'
91 | if not self.cache.exists(key):
92 | self.cache.zunionstore(key, keys)
93 | else:
94 | key = f'{d}|{source}|asns|{ipversion}'
95 | else:
96 | key = f'{d}|asns|{ipversion}'
97 | self._ranking_cache_wrapper(key)
98 | to_return['response'] = self.cache.zrevrange(key, start=0, end=limit, withscores=True)
99 | return to_return
100 |
101 | def asn_details(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
102 | ipversion: str='v4'):
103 | '''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
104 | to_return: Dict[str, Any] = {
105 | 'meta': {'asn': asn, 'ipversion': ipversion, 'source': source},
106 | 'response': set()
107 | }
108 |
109 | d = self.__normalize_date(date)
110 | to_return['meta']['date'] = d
111 | if source:
112 | if isinstance(source, list):
113 | keys = []
114 | for s in source:
115 | key = f'{d}|{s}|{asn}|{ipversion}|prefixes'
116 | self._ranking_cache_wrapper(key)
117 | keys.append(key)
118 | # union the ranked sets
119 | key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}'
120 | if not self.cache.exists(key):
121 | self.cache.zunionstore(key, keys)
122 | else:
123 | key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
124 | else:
125 | key = f'{d}|{asn}|{ipversion}'
126 | self._ranking_cache_wrapper(key)
127 | to_return['response'] = self.cache.zrevrange(key, start=0, end=-1, withscores=True)
128 | return to_return
129 |
130 | def asn_rank(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
131 | ipversion: str='v4', with_position: bool=False):
132 | '''Get the rank of a single ASN, weighted by source.'''
133 | to_return: Dict[str, Any] = {
134 | 'meta': {'asn': asn, 'ipversion': ipversion,
135 | 'source': source, 'with_position': with_position},
136 | 'response': 0.0
137 | }
138 |
139 | d = self.__normalize_date(date)
140 | to_return['meta']['date'] = d
141 | if source:
142 | to_return['meta']['source'] = source
143 | if isinstance(source, list):
144 | keys = []
145 | for s in source:
146 | key = f'{d}|{s}|{asn}|{ipversion}'
147 | self._ranking_cache_wrapper(key)
148 | keys.append(key)
149 | r = sum(float(self.cache.get(key)) for key in keys if self.cache.exists(key))
150 | else:
151 | key = f'{d}|{source}|{asn}|{ipversion}'
152 | self._ranking_cache_wrapper(key)
153 | r = self.cache.get(key)
154 | else:
155 | key = f'{d}|asns|{ipversion}'
156 | self._ranking_cache_wrapper(key)
157 | r = self.cache.zscore(key, asn)
158 | if not r:
159 | r = 0
160 | if with_position and not source:
161 | position = self.cache.zrevrank(key, asn)
162 | if position is not None:
163 | position += 1
164 | to_return['response'] = {'rank': float(r), 'position': position,
165 | 'total_known_asns': self.cache.zcard(key)}
166 | else:
167 | to_return['response'] = float(r)
168 | return to_return
169 |
170 | def get_sources(self, date: Optional[Dates]=None):
171 | '''Get the sources availables for a specific day (default: today).'''
172 | to_return: Dict[str, Any] = {'meta': {}, 'response': set()}
173 |
174 | d = self.__normalize_date(date)
175 | to_return['meta']['date'] = d
176 | key = f'{d}|sources'
177 | to_return['response'] = self.storage.smembers(key)
178 | return to_return
179 |
180 | def get_asn_descriptions(self, asn: int, all_descriptions=False) -> Dict[str, Any]:
181 | to_return: Dict[str, Union[Dict, List, str]] = {
182 | 'meta': {'asn': asn, 'all_descriptions': all_descriptions},
183 | 'response': []
184 | }
185 | descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
186 | if all_descriptions or not descriptions:
187 | to_return['response'] = descriptions
188 | else:
189 | to_return['response'] = descriptions[sorted(descriptions.keys(), reverse=True)[0]]
190 | return to_return
191 |
192 | def get_prefix_ips(self, asn: int, prefix: str, date: Optional[Dates]=None,
193 | source: Union[list, str]='', ipversion: str='v4'):
194 | to_return: Dict[str, Any] = {
195 | 'meta': {'asn': asn, 'prefix': prefix, 'ipversion': ipversion,
196 | 'source': source},
197 | 'response': defaultdict(list)
198 | }
199 |
200 | d = self.__normalize_date(date)
201 | to_return['meta']['date'] = d
202 |
203 | if source:
204 | to_return['meta']['source'] = source
205 | if isinstance(source, list):
206 | sources = source
207 | else:
208 | sources = [source]
209 | else:
210 | sources = self.get_sources(d)['response']
211 |
212 | for source in sources:
213 | ips = set([ip_ts.split('|')[0]
214 | for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')])
215 | [to_return['response'][ip].append(source) for ip in ips]
216 | return to_return
217 |
218 | def get_asn_history(self, asn: int, period: int=100, source: Union[list, str]='',
219 | ipversion: str='v4', date: Optional[Dates]=None):
220 | to_return: Dict[str, Any] = {
221 | 'meta': {'asn': asn, 'period': period, 'ipversion': ipversion, 'source': source},
222 | 'response': []
223 | }
224 |
225 | if date is None:
226 | python_date: datetime.date = datetime.date.today()
227 | elif isinstance(date, str):
228 | python_date = parse(date).date()
229 | elif isinstance(date, datetime.datetime):
230 | python_date = date.date()
231 | else:
232 | python_date = date
233 |
234 | to_return['meta']['date'] = python_date.isoformat()
235 |
236 | for i in range(period):
237 | d = python_date - timedelta(days=i)
238 | rank = self.asn_rank(asn, d, source, ipversion)
239 | if 'response' not in rank:
240 | rank = 0
241 | to_return['response'].insert(0, (d.isoformat(), rank['response']))
242 | return to_return
243 |
244 | def country_rank(self, country: str, date: Optional[Dates]=None, source: Union[list, str]='',
245 | ipversion: str='v4'):
246 | to_return: Dict[str, Any] = {
247 | 'meta': {'country': country, 'ipversion': ipversion, 'source': source},
248 | 'response': []
249 | }
250 |
251 | d = self.__normalize_date(date)
252 | to_return['meta']['date'] = d
253 |
254 | ripe = StatsRIPE()
255 | response = ripe.country_asns(country, query_time=d, details=1)
256 | if (not response.get('data') or not response['data'].get('countries') or not
257 | response['data']['countries'][0].get('routed')):
258 | logging.warning(f'Invalid response: {response}')
259 | return 0, [(0, 0)]
260 | routed_asns = re.findall(r"AsnSingle\(([\d]*)\)", response['data']['countries'][0]['routed'])
261 | ranks = [self.asn_rank(asn, d, source, ipversion)['response'] for asn in routed_asns]
262 | to_return['response'] = [sum(ranks), zip(routed_asns, ranks)]
263 | return to_return
264 |
265 | def country_history(self, country: Union[list, str], period: int=30, source: Union[list, str]='',
266 | ipversion: str='v4', date: Optional[Dates]=None):
267 | to_return: Dict[str, Any] = {
268 | 'meta': {'country': country, 'ipversion': ipversion, 'source': source},
269 | 'response': defaultdict(list)
270 | }
271 | if date is None:
272 | python_date: datetime.date = datetime.date.today()
273 | elif isinstance(date, str):
274 | python_date = parse(date).date()
275 | elif isinstance(date, datetime.datetime):
276 | python_date = date.date()
277 | else:
278 | python_date = date
279 |
280 | if isinstance(country, str):
281 | country = [country]
282 | for c in country:
283 | for i in range(period):
284 | d = python_date - timedelta(days=i)
285 | rank, details = self.country_rank(c, d, source, ipversion)['response']
286 | if rank is None:
287 | rank = 0
288 | to_return['response'][c].insert(0, (d.isoformat(), rank, list(details)))
289 | return to_return
290 |
291 | def get_source_config(self):
292 | pass
293 |
294 | def get_sources_configs(self):
295 | loaded = []
296 | for modulepath in get_modules():
297 | with open(modulepath) as f:
298 | loaded.append(json.load(f))
299 | return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded}
300 |
--------------------------------------------------------------------------------
/bgpranking/default/__init__.py:
--------------------------------------------------------------------------------
1 | env_global_name: str = 'BGPRANKING_HOME'
2 |
3 | from .exceptions import (BGPRankingException, FetcherException, ArchiveException, # noqa
4 | CreateDirectoryException, MissingEnv, InvalidDateFormat, # noqa
5 | MissingConfigFile, MissingConfigEntry, ThirdPartyUnreachable) # noqa
6 |
7 | # NOTE: the imports below are there to avoid too long paths when importing the
8 | # classes/methods in the rest of the project while keeping all that in a subdirectory
9 | # and allow to update them easily.
10 | # You should not have to change anything in this file below this line.
11 |
12 | from .abstractmanager import AbstractManager # noqa
13 |
14 | from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noqa
15 |
16 | from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa
17 |
--------------------------------------------------------------------------------
/bgpranking/default/abstractmanager.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import asyncio
5 | import logging
6 | import signal
7 | import time
8 | from abc import ABC
9 | from datetime import datetime, timedelta
10 | from subprocess import Popen
11 | from typing import List, Optional, Tuple
12 |
13 | from redis import Redis
14 | from redis.exceptions import ConnectionError
15 |
16 | from .helpers import get_socket_path
17 |
18 |
19 | class AbstractManager(ABC):
20 |
21 | script_name: str
22 |
23 | def __init__(self, loglevel: int=logging.DEBUG):
24 | self.loglevel = loglevel
25 | self.logger = logging.getLogger(f'{self.__class__.__name__}')
26 | self.logger.setLevel(loglevel)
27 | self.logger.info(f'Initializing {self.__class__.__name__}')
28 | self.process: Optional[Popen] = None
29 | self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
30 |
31 | @staticmethod
32 | def is_running() -> List[Tuple[str, float]]:
33 | try:
34 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
35 | return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
36 | except ConnectionError:
37 | print('Unable to connect to redis, the system is down.')
38 | return []
39 |
40 | @staticmethod
41 | def force_shutdown():
42 | try:
43 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
44 | r.set('shutdown', 1)
45 | except ConnectionError:
46 | print('Unable to connect to redis, the system is down.')
47 |
48 | def set_running(self) -> None:
49 | self.__redis.zincrby('running', 1, self.script_name)
50 |
51 | def unset_running(self) -> None:
52 | current_running = self.__redis.zincrby('running', -1, self.script_name)
53 | if int(current_running) <= 0:
54 | self.__redis.zrem('running', self.script_name)
55 |
56 | def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
57 | if shutdown_check > sleep_in_sec:
58 | shutdown_check = sleep_in_sec
59 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
60 | while sleep_until > datetime.now():
61 | time.sleep(shutdown_check)
62 | if self.shutdown_requested():
63 | return False
64 | return True
65 |
66 | async def long_sleep_async(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
67 | if shutdown_check > sleep_in_sec:
68 | shutdown_check = sleep_in_sec
69 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
70 | while sleep_until > datetime.now():
71 | await asyncio.sleep(shutdown_check)
72 | if self.shutdown_requested():
73 | return False
74 | return True
75 |
76 | def shutdown_requested(self) -> bool:
77 | try:
78 | return True if self.__redis.exists('shutdown') else False
79 | except ConnectionRefusedError:
80 | return True
81 | except ConnectionError:
82 | return True
83 |
84 | def _to_run_forever(self) -> None:
85 | pass
86 |
87 | def run(self, sleep_in_sec: int) -> None:
88 | self.logger.info(f'Launching {self.__class__.__name__}')
89 | try:
90 | while True:
91 | if self.shutdown_requested():
92 | break
93 | try:
94 | if self.process:
95 | if self.process.poll() is not None:
96 | self.logger.critical(f'Unable to start {self.script_name}.')
97 | break
98 | else:
99 | self.set_running()
100 | self._to_run_forever()
101 | except Exception:
102 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
103 | finally:
104 | if not self.process:
105 | # self.process means we run an external script, all the time,
106 | # do not unset between sleep.
107 | self.unset_running()
108 | if not self.long_sleep(sleep_in_sec):
109 | break
110 | except KeyboardInterrupt:
111 | self.logger.warning(f'{self.script_name} killed by user.')
112 | finally:
113 | if self.process:
114 | try:
115 | # Killing everything if possible.
116 | self.process.send_signal(signal.SIGWINCH)
117 | self.process.send_signal(signal.SIGTERM)
118 | except Exception:
119 | pass
120 | try:
121 | self.unset_running()
122 | except Exception:
123 | # the services can already be down at that point.
124 | pass
125 | self.logger.info(f'Shutting down {self.__class__.__name__}')
126 |
127 | async def _to_run_forever_async(self) -> None:
128 | pass
129 |
130 | async def run_async(self, sleep_in_sec: int) -> None:
131 | self.logger.info(f'Launching {self.__class__.__name__}')
132 | try:
133 | while True:
134 | if self.shutdown_requested():
135 | break
136 | try:
137 | if self.process:
138 | if self.process.poll() is not None:
139 | self.logger.critical(f'Unable to start {self.script_name}.')
140 | break
141 | else:
142 | self.set_running()
143 | await self._to_run_forever_async()
144 | except Exception:
145 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
146 | finally:
147 | if not self.process:
148 | # self.process means we run an external script, all the time,
149 | # do not unset between sleep.
150 | self.unset_running()
151 | if not await self.long_sleep_async(sleep_in_sec):
152 | break
153 | except KeyboardInterrupt:
154 | self.logger.warning(f'{self.script_name} killed by user.')
155 | finally:
156 | if self.process:
157 | try:
158 | # Killing everything if possible.
159 | self.process.send_signal(signal.SIGWINCH)
160 | self.process.send_signal(signal.SIGTERM)
161 | except Exception:
162 | pass
163 | try:
164 | self.unset_running()
165 | except Exception:
166 | # the services can already be down at that point.
167 | pass
168 | self.logger.info(f'Shutting down {self.__class__.__name__}')
169 |
--------------------------------------------------------------------------------
/bgpranking/default/exceptions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 |
5 | class BGPRankingException(Exception):
6 | pass
7 |
8 |
9 | class FetcherException(BGPRankingException):
10 | pass
11 |
12 |
13 | class ArchiveException(BGPRankingException):
14 | pass
15 |
16 |
17 | class CreateDirectoryException(BGPRankingException):
18 | pass
19 |
20 |
21 | class MissingEnv(BGPRankingException):
22 | pass
23 |
24 |
25 | class InvalidDateFormat(BGPRankingException):
26 | pass
27 |
28 |
29 | class MissingConfigFile(BGPRankingException):
30 | pass
31 |
32 |
33 | class MissingConfigEntry(BGPRankingException):
34 | pass
35 |
36 |
37 | class ThirdPartyUnreachable(BGPRankingException):
38 | pass
39 |
40 |
41 | class ConfigError(BGPRankingException):
42 | pass
43 |
--------------------------------------------------------------------------------
/bgpranking/default/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | import json
4 | import logging
5 | import os
6 | from functools import lru_cache
7 | from pathlib import Path
8 | from typing import Any, Dict, Optional, Union
9 |
10 | from . import env_global_name
11 | from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
12 |
13 | configs: Dict[str, Dict[str, Any]] = {}
14 | logger = logging.getLogger('Helpers')
15 |
16 |
17 | @lru_cache(64)
18 | def get_homedir() -> Path:
19 | if not os.environ.get(env_global_name):
20 | # Try to open a .env file in the home directory if it exists.
21 | if (Path(__file__).resolve().parent.parent.parent / '.env').exists():
22 | with (Path(__file__).resolve().parent.parent.parent / '.env').open() as f:
23 | for line in f:
24 | key, value = line.strip().split('=', 1)
25 | if value[0] in ['"', "'"]:
26 | value = value[1:-1]
27 | os.environ[key] = value
28 |
29 | if not os.environ.get(env_global_name):
30 | guessed_home = Path(__file__).resolve().parent.parent.parent
31 | raise MissingEnv(f"{env_global_name} is missing. \
32 | Run the following command (assuming you run the code from the clonned repository):\
33 | export {env_global_name}='{guessed_home}'")
34 | return Path(os.environ[env_global_name])
35 |
36 |
37 | @lru_cache(64)
38 | def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
39 | global configs
40 | if configs:
41 | return
42 | if path_to_config_files:
43 | if isinstance(path_to_config_files, str):
44 | config_path = Path(path_to_config_files)
45 | else:
46 | config_path = path_to_config_files
47 | else:
48 | config_path = get_homedir() / 'config'
49 | if not config_path.exists():
50 | raise ConfigError(f'Configuration directory {config_path} does not exists.')
51 | elif not config_path.is_dir():
52 | raise ConfigError(f'Configuration directory {config_path} is not a directory.')
53 |
54 | configs = {}
55 | for path in config_path.glob('*.json'):
56 | with path.open() as _c:
57 | configs[path.stem] = json.load(_c)
58 |
59 |
60 | @lru_cache(64)
61 | def get_config(config_type: str, entry: str, quiet: bool=False) -> Any:
62 | """Get an entry from the given config_type file. Automatic fallback to the sample file"""
63 | global configs
64 | if not configs:
65 | load_configs()
66 | if config_type in configs:
67 | if entry in configs[config_type]:
68 | return configs[config_type][entry]
69 | else:
70 | if not quiet:
71 | logger.warning(f'Unable to find {entry} in config file.')
72 | else:
73 | if not quiet:
74 | logger.warning(f'No {config_type} config file available.')
75 | if not quiet:
76 | logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.')
77 | with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
78 | sample_config = json.load(_c)
79 | return sample_config[entry]
80 |
81 |
82 | def safe_create_dir(to_create: Path) -> None:
83 | if to_create.exists() and not to_create.is_dir():
84 | raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory')
85 | to_create.mkdir(parents=True, exist_ok=True)
86 |
87 |
88 | def get_socket_path(name: str) -> str:
89 | mapping = {
90 | 'cache': Path('cache', 'cache.sock'),
91 | 'intake': Path('temp', 'intake.sock'),
92 | 'prepare': Path('temp', 'prepare.sock')
93 | }
94 | return str(get_homedir() / mapping[name])
95 |
96 |
97 | def try_make_file(filename: Path):
98 | try:
99 | filename.touch(exist_ok=False)
100 | return True
101 | except FileExistsError:
102 | return False
103 |
--------------------------------------------------------------------------------
/bgpranking/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import json
5 | from functools import lru_cache
6 | from pathlib import Path
7 | from typing import Dict, List
8 |
9 | import requests
10 |
11 | from pyipasnhistory import IPASNHistory
12 |
13 | from .default import get_homedir, get_config, ThirdPartyUnreachable, safe_create_dir
14 |
15 |
16 | @lru_cache(64)
17 | def get_data_dir() -> Path:
18 | capture_dir = get_homedir() / 'rawdata'
19 | safe_create_dir(capture_dir)
20 | return capture_dir
21 |
22 |
23 | @lru_cache(64)
24 | def get_modules_dir() -> Path:
25 | modules_dir = get_homedir() / 'config' / 'modules'
26 | safe_create_dir(modules_dir)
27 | return modules_dir
28 |
29 |
30 | @lru_cache(64)
31 | def get_modules() -> List[Path]:
32 | return [modulepath for modulepath in get_modules_dir().glob('*.json')]
33 |
34 |
35 | def load_all_modules_configs() -> Dict[str, Dict]:
36 | configs = {}
37 | for p in get_modules():
38 | with p.open() as f:
39 | j = json.load(f)
40 | configs[f"{j['vendor']}-{j['name']}"] = j
41 | return configs
42 |
43 |
44 | def get_ipasn():
45 | ipasnhistory_url = get_config('generic', 'ipasnhistory_url')
46 | ipasn = IPASNHistory(ipasnhistory_url)
47 | if not ipasn.is_up:
48 | raise ThirdPartyUnreachable(f"Unable to reach IPASNHistory on {ipasnhistory_url}")
49 | return ipasn
50 |
51 |
52 | def sanity_check_ipasn(ipasn):
53 | try:
54 | meta = ipasn.meta()
55 | except requests.exceptions.ConnectionError:
56 | return False, "IP ASN History is not reachable, try again later."
57 |
58 | if 'error' in meta:
59 | raise ThirdPartyUnreachable(f'IP ASN History has a problem: {meta["error"]}')
60 |
61 | v4_percent = meta['cached_dates']['caida']['v4']['percent']
62 | v6_percent = meta['cached_dates']['caida']['v6']['percent']
63 | if v4_percent < 90 or v6_percent < 90: # (this way it works if we only load 10 days)
64 | # Try again later.
65 | return False, f"IP ASN History is not ready: v4 {v4_percent}% / v6 {v6_percent}% loaded"
66 | return True, f"IP ASN History is ready: v4 {v4_percent}% / v6 {v6_percent}% loaded"
67 |
--------------------------------------------------------------------------------
/bgpranking/parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/parsers/__init__.py
--------------------------------------------------------------------------------
/bgpranking/parsers/abusech.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | import re
6 | from io import BytesIO
7 |
8 | from typing import List
9 |
10 |
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 | self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0])
13 | return self.extract_ipv4(f.getvalue())
14 |
--------------------------------------------------------------------------------
/bgpranking/parsers/abusech_feodo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | import re
6 | from io import BytesIO
7 |
8 | from typing import List
9 |
10 |
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 | self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0])
13 | return self.extract_ipv4(f.getvalue())
14 |
--------------------------------------------------------------------------------
/bgpranking/parsers/abusech_threatfox.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import json
5 |
6 | from datetime import datetime
7 | from io import BytesIO
8 | from typing import List
9 |
10 |
11 | def parse_raw_file(self, f: BytesIO) -> List[str]:
12 | self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
13 | to_return = []
14 | for entry in json.loads(f.getvalue().decode()).values():
15 | ip_port = entry[0]['ioc_value']
16 | to_return.append(ip_port.split(':')[0])
17 | return to_return
18 |
--------------------------------------------------------------------------------
/bgpranking/parsers/dshield.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | import re
6 | from io import BytesIO
7 | from typing import List
8 |
9 |
10 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
11 | self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
12 | iplist = self.extract_ipv4(f.getvalue())
13 | # The IPS have leading 0s. Getting tid of them directly here.
14 | return self.strip_leading_zeros(iplist)
15 |
--------------------------------------------------------------------------------
/bgpranking/parsers/malc0de.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | import re
6 | from io import BytesIO
7 |
8 | from typing import List
9 |
10 |
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 | self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
13 | return self.extract_ipv4(f.getvalue())
14 |
--------------------------------------------------------------------------------
/bgpranking/parsers/nothink.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | import re
6 | from io import BytesIO
7 |
8 | from typing import List
9 |
10 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
11 | self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
12 | return self.extract_ipv4(f.getvalue())
13 |
--------------------------------------------------------------------------------
/bgpranking/parsers/shadowserver.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from dateutil.parser import parse
5 | from csv import DictReader
6 | from io import BytesIO, StringIO
7 | from typing import Tuple, Generator
8 | from datetime import datetime
9 |
10 |
11 | def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
12 | default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
13 | reader = DictReader(StringIO(f.getvalue().decode()))
14 | for row in reader:
15 | if 'timestamp' in row:
16 | ts = parse(row['timestamp'])
17 | else:
18 | ts = default_ts
19 |
20 | if 'ip' in row:
21 | ip = row['ip']
22 | elif 'src_ip' in row:
23 | # For sinkhole6_http
24 | ip = row['src_ip']
25 | else:
26 | self.logger.critical(f'No IPs in the list {self.source}.')
27 | break
28 | yield ip, ts
29 |
--------------------------------------------------------------------------------
/bgpranking/statsripe.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import copy
5 | import json
6 |
7 | from datetime import datetime, timedelta
8 | from enum import Enum
9 | from ipaddress import IPv4Address, IPv6Address, IPv4Network, IPv6Network
10 | from typing import TypeVar, Optional, Dict, Any
11 |
12 | import requests
13 |
14 | from dateutil.parser import parse
15 |
16 | from .helpers import get_homedir, safe_create_dir
17 |
18 | IPTypes = TypeVar('IPTypes', IPv4Address, IPv6Address, 'str')
19 | PrefixTypes = TypeVar('PrefixTypes', IPv4Network, IPv6Network, 'str')
20 | TimeTypes = TypeVar('TimeTypes', datetime, 'str')
21 |
22 |
23 | class ASNsTypes(Enum):
24 | transiting = 't'
25 | originating = 'o'
26 | all_types = 't,o'
27 | undefined = ''
28 |
29 |
30 | class AddressFamilies(Enum):
31 | ipv4 = 'v4'
32 | ipv6 = 'v6'
33 | all_families = 'v4,v6'
34 | undefined = ''
35 |
36 |
37 | class Noise(Enum):
38 | keep = 'keep'
39 | remove = 'filter'
40 |
41 |
42 | class StatsRIPE():
43 |
44 | def __init__(self, sourceapp='bgpranking-ng - CIRCL'):
45 | self.url = "https://stat.ripe.net/data/{method}/data.json?{parameters}"
46 | self.sourceapp = sourceapp
47 | self.cache_dir = get_homedir() / 'rawdata' / 'stats_ripe'
48 |
49 | def __time_to_text(self, query_time: TimeTypes) -> str:
50 | if isinstance(query_time, datetime):
51 | return query_time.isoformat()
52 | return query_time
53 |
54 | def _get_cache(self, method, parameters):
55 | '''The dataset is updated every 8 hours (midnight, 8, 16).
56 | If parameters has a key 'query_time' on any of these hours, try to get it.
57 | If not, try to get the closest one.
58 | If it has nothing, assume non and try to get the closest timestamp
59 | When caching, get query_time from response['data']['query_time']
60 | '''
61 | parameters = copy.copy(parameters)
62 | if not parameters.get('query_time'):
63 | # use timedelta because the generation of the new dataset takes a while.
64 | parameters['query_time'] = (datetime.now() - timedelta(hours=8)).isoformat()
65 |
66 | d = parse(parameters['query_time'])
67 | if d.hour == 8 and d.minute == 0 and d.second == 0:
68 | pass
69 | else:
70 | d = d.replace(hour=min([0, 8, 16], key=lambda x: abs(x - d.hour)),
71 | minute=0, second=0, microsecond=0)
72 | parameters['query_time'] = d.isoformat()
73 | cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()])
74 | c_path = self.cache_dir / method / cache_filename
75 | if c_path.exists():
76 | with open(c_path, 'r') as f:
77 | return json.load(f)
78 | return False
79 |
80 | def _save_cache(self, method, parameters, response):
81 | parameters['query_time'] = response['data']['query_time']
82 | cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()])
83 | safe_create_dir(self.cache_dir / method)
84 | c_path = self.cache_dir / method / cache_filename
85 | with open(c_path, 'w') as f:
86 | json.dump(response, f, indent=2)
87 |
88 | def _get(self, method: str, parameters: Dict) -> Dict:
89 | parameters['sourceapp'] = self.sourceapp
90 | cached = self._get_cache(method, parameters)
91 | if cached:
92 | return cached
93 | url = self.url.format(method=method, parameters='&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()]))
94 | response = requests.get(url)
95 | j_content = response.json()
96 | self._save_cache(method, parameters, j_content)
97 | return j_content
98 |
99 | def network_info(self, ip: IPTypes) -> dict:
100 | parameters = {'resource': ip}
101 | return self._get('network-info', parameters)
102 |
103 | def prefix_overview(self, prefix: PrefixTypes, min_peers_seeing: int= 0,
104 | max_related: int=0, query_time: Optional[TimeTypes]=None) -> dict:
105 | parameters: Dict[str, Any] = {'resource': prefix}
106 | if min_peers_seeing:
107 | parameters['min_peers_seeing'] = min_peers_seeing
108 | if max_related:
109 | parameters['max_related'] = max_related
110 | if query_time:
111 | parameters['query_time'] = self.__time_to_text(query_time)
112 | return self._get('prefix-overview', parameters)
113 |
114 | def ris_asns(self, query_time: Optional[TimeTypes]=None, list_asns: bool=False, asn_types: ASNsTypes=ASNsTypes.undefined):
115 | parameters: Dict[str, Any] = {}
116 | if list_asns:
117 | parameters['list_asns'] = list_asns
118 | if asn_types:
119 | parameters['asn_types'] = asn_types.value
120 | if query_time:
121 | parameters['query_time'] = self.__time_to_text(query_time)
122 | return self._get('ris-asns', parameters)
123 |
124 | def ris_prefixes(self, asn: int, query_time: Optional[TimeTypes]=None,
125 | list_prefixes: bool=False, types: ASNsTypes=ASNsTypes.undefined,
126 | af: AddressFamilies=AddressFamilies.undefined, noise: Noise=Noise.keep):
127 | parameters: Dict[str, Any] = {'resource': str(asn)}
128 | if query_time:
129 | parameters['query_time'] = self.__time_to_text(query_time)
130 | if list_prefixes:
131 | parameters['list_prefixes'] = list_prefixes
132 | if types:
133 | parameters['types'] = types.value
134 | if af:
135 | parameters['af'] = af.value
136 | if noise:
137 | parameters['noise'] = noise.value
138 | return self._get('ris-prefixes', parameters)
139 |
140 | def country_asns(self, country: str, details: int=0, query_time: Optional[TimeTypes]=None):
141 | parameters: Dict[str, Any] = {'resource': country}
142 | if details:
143 | parameters['lod'] = details
144 | if query_time:
145 | parameters['query_time'] = self.__time_to_text(query_time)
146 | return self._get('country-asns', parameters)
147 |
--------------------------------------------------------------------------------
/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bin/__init__.py
--------------------------------------------------------------------------------
/bin/archiver.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import json
5 | import logging
6 | import zipfile
7 |
8 | from collections import defaultdict
9 | from datetime import date
10 | from logging import Logger
11 | from pathlib import Path
12 |
13 | from dateutil import parser
14 | from dateutil.relativedelta import relativedelta
15 |
16 | from bgpranking.default import safe_create_dir, AbstractManager
17 | from bgpranking.helpers import get_modules, get_data_dir
18 |
19 |
20 | logger = logging.getLogger('Archiver')
21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
22 | level=logging.INFO)
23 |
24 |
25 | class DeepArchive():
26 |
27 | def __init__(self, config_file: Path, logger: Logger):
28 | '''Archive everyfile older than 2 month.'''
29 | with config_file.open() as f:
30 | module_parameters = json.load(f)
31 | self.logger = logger
32 | self.vendor = module_parameters['vendor']
33 | self.listname = module_parameters['name']
34 | self.directory = get_data_dir() / self.vendor / self.listname / 'archive'
35 | safe_create_dir(self.directory)
36 | self.deep_archive = self.directory / 'deep'
37 | safe_create_dir(self.deep_archive)
38 |
39 | def archive(self):
40 | to_archive = defaultdict(list)
41 | today = date.today()
42 | last_day_to_keep = date(today.year, today.month, 1) - relativedelta(months=2)
43 | for p in self.directory.iterdir():
44 | if not p.is_file():
45 | continue
46 | filedate = parser.parse(p.name.split('.')[0]).date()
47 | if filedate >= last_day_to_keep:
48 | continue
49 | to_archive['{}.zip'.format(filedate.strftime('%Y%m'))].append(p)
50 | if to_archive:
51 | self.logger.info('Found old files. Archiving: {}'.format(', '.join(to_archive.keys())))
52 | else:
53 | self.logger.debug('No old files.')
54 | for archivename, path_list in to_archive.items():
55 | with zipfile.ZipFile(self.deep_archive / archivename, 'x', zipfile.ZIP_DEFLATED) as z:
56 | for f in path_list:
57 | z.write(f, f.name)
58 | # Delete all the files if the archiving worked out properly
59 | [f.unlink() for f in path_list]
60 |
61 |
62 | class ModulesArchiver(AbstractManager):
63 |
64 | def __init__(self, loglevel: int=logging.INFO):
65 | super().__init__(loglevel)
66 | self.script_name = 'archiver'
67 | self.modules = [DeepArchive(path, self.logger) for path in get_modules()]
68 |
69 | def _to_run_forever(self):
70 | [module.archive() for module in self.modules]
71 |
72 |
73 | def main():
74 | archiver = ModulesArchiver()
75 | archiver.run(sleep_in_sec=360000)
76 |
77 |
78 | if __name__ == '__main__':
79 | main()
80 |
--------------------------------------------------------------------------------
/bin/asn_descriptions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | import re
6 | import requests
7 |
8 | from dateutil.parser import parse
9 | from redis import Redis
10 |
11 | from bgpranking.default import get_socket_path, safe_create_dir, AbstractManager, get_config
12 | from bgpranking.helpers import get_data_dir
13 |
14 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
15 | level=logging.INFO)
16 |
17 |
18 | class ASNDescriptions(AbstractManager):
19 |
20 | def __init__(self, loglevel: int=logging.INFO):
21 | super().__init__(loglevel)
22 | self.script_name = 'asn_descr'
23 | self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), db=2, decode_responses=True)
24 | self.logger.debug('Starting ASN History')
25 | self.directory = get_data_dir() / 'asn_descriptions'
26 | safe_create_dir(self.directory)
27 | self.archives = self.directory / 'archive'
28 | safe_create_dir(self.archives)
29 | self.url = 'https://www.cidr-report.org/as2.0/autnums.html'
30 |
31 | def __update_available(self):
32 | r = requests.head(self.url)
33 | print(r.headers)
34 | current_last_modified = parse(r.headers['Last-Modified'])
35 | if not self.asn_meta.exists('ans_description_last_update'):
36 | return True
37 | last_update = parse(self.asn_meta.get('ans_description_last_update')) # type: ignore
38 | if last_update < current_last_modified:
39 | return True
40 | return False
41 |
42 | def load_descriptions(self):
43 | if not self.__update_available():
44 | self.logger.debug('No new file to import.')
45 | return
46 | self.logger.info('Importing new ASN descriptions.')
47 | r = requests.get(self.url)
48 | last_modified = parse(r.headers['Last-Modified']).isoformat()
49 | p = self.asn_meta.pipeline()
50 | new_asn = 0
51 | new_description = 0
52 | for asn, descr in re.findall('as=AS(.*)&.* (.*)\n', r.text):
53 | existing_descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
54 | if not existing_descriptions:
55 | self.logger.debug(f'New ASN: {asn} - {descr}')
56 | p.hset(f'{asn}|descriptions', last_modified, descr)
57 | new_asn += 1
58 | else:
59 | last_descr = sorted(existing_descriptions.keys(), reverse=True)[0]
60 | if descr != existing_descriptions[last_descr]:
61 | self.logger.debug(f'New description for {asn}: {existing_descriptions[last_descr]} -> {descr}')
62 | p.hset(f'{asn}|descriptions', last_modified, descr)
63 | new_description += 1
64 | p.set('ans_description_last_update', last_modified)
65 | p.execute()
66 | self.logger.info(f'Done with import. New ASNs: {new_asn}, new descriptions: {new_description}')
67 | if new_asn or new_description:
68 | with open(self.archives / f'{last_modified}.html', 'w') as f:
69 | f.write(r.text)
70 |
71 | def _to_run_forever(self):
72 | self.load_descriptions()
73 |
74 |
75 | def main():
76 | asnd_manager = ASNDescriptions()
77 | asnd_manager.run(sleep_in_sec=3600)
78 |
79 |
80 | if __name__ == '__main__':
81 | main()
82 |
--------------------------------------------------------------------------------
/bin/dbinsert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | import time
6 |
7 | from typing import List
8 |
9 | from redis import Redis
10 |
11 | from bgpranking.default import get_socket_path, AbstractManager, get_config
12 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn
13 |
14 |
15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
16 | level=logging.INFO)
17 |
18 |
19 | class DBInsertManager(AbstractManager):
20 |
21 | def __init__(self, loglevel: int=logging.INFO):
22 | super().__init__(loglevel)
23 | self.script_name = 'db_insert'
24 | self.kvrocks_storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
25 | self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
26 | self.ipasn = get_ipasn()
27 | self.logger.debug('Starting import')
28 |
29 | def _to_run_forever(self):
30 | ready, message = sanity_check_ipasn(self.ipasn)
31 | if not ready:
32 | # Try again later.
33 | self.logger.warning(message)
34 | return
35 | self.logger.debug(message)
36 |
37 | while True:
38 | if self.shutdown_requested():
39 | break
40 | try:
41 | if not self.ipasn.is_up:
42 | break
43 | except Exception:
44 | self.logger.warning('Unable to query ipasnhistory')
45 | time.sleep(10)
46 | continue
47 | uuids: List[str] = self.redis_sanitized.spop('to_insert', 100) # type: ignore
48 | if not uuids:
49 | break
50 | p = self.redis_sanitized.pipeline(transaction=False)
51 | [p.hgetall(uuid) for uuid in uuids]
52 | sanitized_data = p.execute()
53 |
54 | for_query = []
55 | for i, uuid in enumerate(uuids):
56 | data = sanitized_data[i]
57 | if not data:
58 | self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.')
59 | continue
60 | for_query.append({'ip': data['ip'], 'address_family': data['address_family'],
61 | 'date': data['datetime'], 'precision_delta': {'days': 3}})
62 | try:
63 | responses = self.ipasn.mass_query(for_query)
64 | except Exception:
65 | self.logger.exception('Mass query in IPASN History failed, trying again later.')
66 | # Rollback the spop
67 | self.redis_sanitized.sadd('to_insert', *uuids)
68 | time.sleep(10)
69 | continue
70 | retry = []
71 | done = []
72 | ardb_pipeline = self.kvrocks_storage.pipeline(transaction=False)
73 | for i, uuid in enumerate(uuids):
74 | data = sanitized_data[i]
75 | if not data:
76 | self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.')
77 | done.append(uuid)
78 | continue
79 | routing_info = responses['responses'][i]['response'] # our queries are on one single date, not a range
80 | # Data gathered from IPASN History:
81 | # * IP Block of the IP
82 | # * AS number
83 | if not routing_info:
84 | self.logger.warning(f"No response for {responses['responses'][i]}")
85 | done.append(uuid)
86 | continue
87 | if 'error' in routing_info:
88 | self.logger.warning(f"Unable to find routing information for {data['ip']} - {data['datetime']}: {routing_info['error']}")
89 | done.append(uuid)
90 | continue
91 | # Single date query, getting from the object
92 | datetime_routing = list(routing_info.keys())[0]
93 | entry = routing_info[datetime_routing]
94 | if not entry:
95 | # routing info is missing, need to try again later.
96 | retry.append(uuid)
97 | continue
98 | if 'asn' in entry and entry['asn'] in [None, '0']:
99 | self.logger.warning(f"Unable to find the AS number associated to {data['ip']} - {data['datetime']} (got {entry['asn']}) - {entry}.")
100 | done.append(uuid)
101 | continue
102 | if 'prefix' in entry and entry['prefix'] in [None, '0.0.0.0/0', '::/0']:
103 | self.logger.warning(f"Unable to find the prefix associated to {data['ip']} - {data['datetime']} (got {entry['prefix']}).")
104 | done.append(uuid)
105 | continue
106 |
107 | # Format: |sources -> set([, ...])
108 | ardb_pipeline.sadd(f"{data['date']}|sources", data['source'])
109 |
110 | # Format: | -> set([, ...])
111 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}", entry['asn'])
112 | # Format: || -> set([, ...])
113 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}", entry['prefix'])
114 |
115 | # Format: ||| -> set([|, ...])
116 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}|{entry['prefix']}",
117 | f"{data['ip']}|{data['datetime']}")
118 | done.append(uuid)
119 | ardb_pipeline.execute()
120 | p = self.redis_sanitized.pipeline(transaction=False)
121 | if done:
122 | p.delete(*done)
123 | if retry:
124 | p.sadd('to_insert', *retry)
125 | p.execute()
126 |
127 |
128 | def main():
129 | dbinsert = DBInsertManager()
130 | dbinsert.run(sleep_in_sec=120)
131 |
132 |
133 | if __name__ == '__main__':
134 | main()
135 |
--------------------------------------------------------------------------------
/bin/fetcher.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import json
5 | import asyncio
6 | import logging
7 |
8 | from datetime import datetime, date
9 | from hashlib import sha512 # Faster than sha256 on 64b machines.
10 | from logging import Logger
11 | from pathlib import Path
12 |
13 | import aiohttp
14 | from dateutil import parser
15 | from pid import PidFile, PidFileError # type: ignore
16 |
17 | from bgpranking.default import AbstractManager, safe_create_dir
18 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir
19 |
20 |
21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
22 | level=logging.INFO)
23 |
24 |
25 | class Fetcher():
26 |
27 | def __init__(self, config_file: Path, logger: Logger):
28 | '''Load `config_file`, and store the fetched data into `storage_directory`
29 | Note: if the `config_file` does not provide a URL (the file is
30 | gathered by some oter mean), the fetcher is automatically stoped.'''
31 | with open(config_file, 'r') as f:
32 | module_parameters = json.load(f)
33 | self.vendor = module_parameters['vendor']
34 | self.listname = module_parameters['name']
35 | self.logger = logger
36 | self.fetcher = True
37 | if 'url' not in module_parameters:
38 | self.logger.info(f'{self.vendor}-{self.listname}: No URL to fetch, breaking.')
39 | self.fetcher = False
40 | return
41 | self.url = module_parameters['url']
42 | self.logger.debug(f'{self.vendor}-{self.listname}: Starting fetcher on {self.url}')
43 | self.directory = get_data_dir() / self.vendor / self.listname
44 | safe_create_dir(self.directory)
45 | self.meta = self.directory / 'meta'
46 | safe_create_dir(self.meta)
47 | self.archive_dir = self.directory / 'archive'
48 | safe_create_dir(self.archive_dir)
49 | self.first_fetch = True
50 |
51 | async def __get_last_modified(self):
52 | async with aiohttp.ClientSession() as session:
53 | async with session.head(self.url) as r:
54 | headers = r.headers
55 | if 'Last-Modified' in headers:
56 | return parser.parse(headers['Last-Modified'])
57 | return None
58 |
59 | async def __newer(self):
60 | '''Check if the file available for download is newed than the one
61 | already downloaded by checking the `Last-Modified` header.
62 | Note: return False if the file containing the last header content
63 | is not existing, or the header doesn't have this key.
64 | '''
65 | last_modified_path = self.meta / 'lastmodified'
66 | if not last_modified_path.exists():
67 | # The file doesn't exists
68 | if not self.first_fetch:
69 | # The URL has no Last-Modified header, we cannot use it.
70 | self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available')
71 | return True
72 | self.first_fetch = False
73 | last_modified = await self.__get_last_modified()
74 | if last_modified:
75 | self.logger.debug(f'{self.vendor}-{self.listname}: Last-Modified header available')
76 | with last_modified_path.open('w') as f:
77 | f.write(last_modified.isoformat())
78 | else:
79 | self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available')
80 | return True
81 | with last_modified_path.open() as f:
82 | file_content = f.read()
83 | last_modified_file = parser.parse(file_content)
84 | last_modified = await self.__get_last_modified()
85 | if not last_modified:
86 | # No more Last-Modified header Oo
87 | self.logger.warning(f'{self.vendor}-{self.listname}: Last-Modified header was present, isn\'t anymore!')
88 | last_modified_path.unlink()
89 | return True
90 | if last_modified > last_modified_file:
91 | self.logger.info(f'{self.vendor}-{self.listname}: Got a new file.')
92 | with last_modified_path.open('w') as f:
93 | f.write(last_modified.isoformat())
94 | return True
95 | return False
96 |
97 | def __same_as_last(self, downloaded):
98 | '''Figure out the last downloaded file, check if it is the same as the
99 | newly downloaded one. Returns true if both files have been downloaded the
100 | same day.
101 | Note: we check the new and the archive directory because we may have backlog
102 | and the newest file is always the first one we process
103 | '''
104 | to_check = []
105 | to_check_new = sorted([f for f in self.directory.iterdir() if f.is_file()])
106 | if to_check_new:
107 | # we have files waiting to be processed
108 | self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_new)} file(s) are waiting to be processed')
109 | to_check.append(to_check_new[-1])
110 | to_check_archive = sorted([f for f in self.archive_dir.iterdir() if f.is_file()])
111 | if to_check_archive:
112 | # we have files already processed, in the archive
113 | self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_archive)} file(s) have been processed')
114 | to_check.append(to_check_archive[-1])
115 | if not to_check:
116 | self.logger.debug(f'{self.vendor}-{self.listname}: New list, no hisorical files')
117 | # nothing has been downloaded ever, moving on
118 | return False
119 | dl_hash = sha512(downloaded)
120 | for last_file in to_check:
121 | with last_file.open('rb') as f:
122 | last_hash = sha512(f.read())
123 | if (dl_hash.digest() == last_hash.digest()
124 | and parser.parse(last_file.name.split('.')[0]).date() == date.today()):
125 | self.logger.debug(f'{self.vendor}-{self.listname}: Same file already downloaded today.')
126 | return True
127 | return False
128 |
129 | async def fetch_list(self):
130 | '''Fetch & store the list'''
131 | if not self.fetcher:
132 | return
133 | try:
134 | with PidFile(f'{self.listname}.pid', piddir=self.meta):
135 | if not await self.__newer():
136 | return
137 | async with aiohttp.ClientSession() as session:
138 | async with session.get(self.url) as r:
139 | content = await r.content.read()
140 | if self.__same_as_last(content):
141 | return
142 | self.logger.info(f'{self.vendor}-{self.listname}: Got a new file!')
143 | with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
144 | f.write(content)
145 | except PidFileError:
146 | self.logger.info(f'{self.vendor}-{self.listname}: Fetcher already running')
147 |
148 |
149 | class ModulesManager(AbstractManager):
150 |
151 | def __init__(self, loglevel: int=logging.DEBUG):
152 | super().__init__(loglevel)
153 | self.script_name = 'modules_manager'
154 | self.modules_paths = get_modules()
155 | self.modules = [Fetcher(path, self.logger) for path in self.modules_paths]
156 |
157 | async def _to_run_forever_async(self):
158 | # Check if there are new config files
159 | new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths]
160 | self.modules += [Fetcher(path, self.logger) for path in new_modules_paths]
161 | self.modules_paths += new_modules_paths
162 |
163 | if self.modules:
164 | for module in self.modules:
165 | if module.fetcher:
166 | await module.fetch_list()
167 | else:
168 | self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
169 |
170 |
171 | def main():
172 | m = ModulesManager()
173 | asyncio.run(m.run_async(sleep_in_sec=3600))
174 |
175 |
176 | if __name__ == '__main__':
177 | main()
178 |
--------------------------------------------------------------------------------
/bin/manual_ranking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import logging
6 | from dateutil.parser import parse
7 | from datetime import timedelta
8 |
9 | from bgpranking.helpers import load_all_modules_configs
10 | from .ranking import Ranking
11 |
12 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
13 | level=logging.INFO)
14 |
15 |
16 | if __name__ == '__main__':
17 | parser = argparse.ArgumentParser(description='Manually force the ranking of a day or a time interval.')
18 | group = parser.add_mutually_exclusive_group(required=True)
19 | group.add_argument('-d', '--day', type=str, help='Day to rank (Format: YYYY-MM-DD).')
20 | group.add_argument('-i', '--interval', type=str, nargs=2, help='Interval to rank, first to last (Format: YYYY-MM-DD YYYY-MM-DD).')
21 | args = parser.parse_args()
22 |
23 | ranking = Ranking(loglevel=logging.DEBUG)
24 | config_files = load_all_modules_configs()
25 | if args.day:
26 | day = parse(args.day).date().isoformat()
27 | ranking.rank_a_day(day)
28 | else:
29 | current = parse(args.interval[1]).date()
30 | stop_date = parse(args.interval[0]).date()
31 | while current >= stop_date:
32 | ranking.rank_a_day(current.isoformat())
33 | current -= timedelta(days=1)
34 |
--------------------------------------------------------------------------------
/bin/parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import importlib
5 | import json
6 | import logging
7 | import re
8 | import types
9 |
10 | from datetime import datetime
11 | from io import BytesIO
12 | from logging import Logger
13 | from pathlib import Path
14 | from typing import List, Union, Tuple
15 | from uuid import uuid4
16 |
17 | from redis import Redis
18 |
19 | from bgpranking.default import AbstractManager, safe_create_dir, get_socket_path
20 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir
21 |
22 |
23 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
24 | level=logging.INFO)
25 |
26 |
27 | class RawFilesParser():
28 |
29 | def __init__(self, config_file: Path, logger: Logger) -> None:
30 | self.logger = logger
31 | with open(config_file, 'r') as f:
32 | module_parameters = json.load(f)
33 | self.vendor = module_parameters['vendor']
34 | self.listname = module_parameters['name']
35 | if 'parser' in module_parameters:
36 | self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser'], 'bgpranking').parse_raw_file, self) # type: ignore
37 | self.source = f'{self.vendor}-{self.listname}'
38 | self.directory = get_data_dir() / self.vendor / self.listname
39 | safe_create_dir(self.directory)
40 | self.unparsable_dir = self.directory / 'unparsable'
41 | safe_create_dir(self.unparsable_dir)
42 | self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0)
43 | self.logger.debug(f'{self.source}: Starting intake.')
44 |
45 | @property
46 | def files_to_parse(self) -> List[Path]:
47 | return sorted([f for f in self.directory.iterdir() if f.is_file()], reverse=True)
48 |
49 | def extract_ipv4(self, bytestream: bytes) -> List[Union[bytes, Tuple[bytes, datetime]]]:
50 | return re.findall(rb'[0-9]+(?:\.[0-9]+){3}', bytestream)
51 |
52 | def strip_leading_zeros(self, ips: List[bytes]) -> List[bytes]:
53 | '''Helper to get rid of leading 0s in an IP list.
54 | Only run it when needed, it is nasty and slow'''
55 | return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
56 |
57 | def parse_raw_file(self, f: BytesIO) -> List[Union[bytes, Tuple[bytes, datetime]]]:
58 | # If the list doesn't provide a time, fallback to current day, midnight
59 | self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
60 | return self.extract_ipv4(f.getvalue())
61 |
62 | def parse_raw_files(self) -> None:
63 | nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
64 | if nb_unparsable_files:
65 | self.logger.warning(f'{self.source}: Was unable to parse {nb_unparsable_files} files.')
66 | try:
67 | for filepath in self.files_to_parse:
68 | self.logger.debug(f'{self.source}: Parsing {filepath}, {len(self.files_to_parse) - 1} to go.')
69 | with open(filepath, 'rb') as f:
70 | to_parse = BytesIO(f.read())
71 | p = self.redis_intake.pipeline()
72 | for line in self.parse_raw_file(to_parse):
73 | if isinstance(line, tuple):
74 | ip, datetime = line
75 | else:
76 | ip = line
77 | datetime = self.datetime
78 | uuid = uuid4()
79 | p.hmset(str(uuid), {'ip': ip, 'source': self.source,
80 | 'datetime': datetime.isoformat()})
81 | p.sadd('intake', str(uuid))
82 | p.execute()
83 | self._archive(filepath)
84 | except Exception as e:
85 | self.logger.exception(f"{self.source}: That didn't go well: {e}")
86 | self._unparsable(filepath)
87 |
88 | def _archive(self, filepath: Path) -> None:
89 | '''After processing, move file to the archive directory'''
90 | filepath.rename(self.directory / 'archive' / filepath.name)
91 |
92 | def _unparsable(self, filepath: Path) -> None:
93 | '''After processing, move file to the archive directory'''
94 | filepath.rename(self.unparsable_dir / filepath.name)
95 |
96 |
97 | class ParserManager(AbstractManager):
98 |
99 | def __init__(self, loglevel: int=logging.DEBUG):
100 | super().__init__(loglevel)
101 | self.script_name = 'parser'
102 | self.modules_paths = get_modules()
103 | self.modules = [RawFilesParser(path, self.logger) for path in self.modules_paths]
104 |
105 | def _to_run_forever(self):
106 | # Check if there are new config files
107 | new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths]
108 | self.modules += [RawFilesParser(path, self.logger) for path in new_modules_paths]
109 | self.modules_paths += new_modules_paths
110 |
111 | if self.modules:
112 | for module in self.modules:
113 | module.parse_raw_files()
114 | else:
115 | self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
116 |
117 |
118 | def main():
119 | parser_manager = ParserManager()
120 | parser_manager.run(sleep_in_sec=120)
121 |
122 |
123 | if __name__ == '__main__':
124 | main()
125 |
--------------------------------------------------------------------------------
/bin/ranking.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 |
6 | from datetime import datetime, date, timedelta
7 | from ipaddress import ip_network
8 | from typing import Dict, Any
9 |
10 | from redis import Redis
11 | import requests
12 |
13 | from bgpranking.default import AbstractManager, get_config
14 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn, load_all_modules_configs
15 |
16 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
17 | level=logging.INFO)
18 |
19 |
20 | class Ranking(AbstractManager):
21 |
22 | def __init__(self, loglevel: int=logging.INFO):
23 | super().__init__(loglevel)
24 | self.script_name = 'ranking'
25 | self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
26 | self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'), decode_responses=True)
27 | self.ipasn = get_ipasn()
28 |
29 | def rank_a_day(self, day: str):
30 | asns_aggregation_key_v4 = f'{day}|asns|v4'
31 | asns_aggregation_key_v6 = f'{day}|asns|v6'
32 | to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
33 | r_pipeline = self.ranking.pipeline()
34 | cached_meta: Dict[str, Dict[str, Any]] = {}
35 | config_files = load_all_modules_configs()
36 | for source in self.storage.smembers(f'{day}|sources'):
37 | if source not in config_files:
38 | # get it again, just in case it is created after we open them
39 | config_files = load_all_modules_configs()
40 | self.logger.info(f'{day} - Ranking source: {source}')
41 | source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
42 | source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
43 | to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
44 | for asn in self.storage.smembers(f'{day}|{source}'):
45 | prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
46 | prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
47 | to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
48 | if asn == '0':
49 | # Default ASN when no matches. Probably spoofed.
50 | continue
51 | self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
52 | asn_rank_v4 = 0.0
53 | asn_rank_v6 = 0.0
54 | for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
55 | if prefix == 'None':
56 | # This should not happen and requires a DB cleanup.
57 | self.logger.critical(f'Fucked up prefix in "{day}|{source}|{asn}"')
58 | continue
59 | ips = set([ip_ts.split('|')[0]
60 | for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
61 | py_prefix = ip_network(prefix)
62 | prefix_rank = float(len(ips)) / py_prefix.num_addresses
63 | r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', {prefix: prefix_rank})
64 | if py_prefix.version == 4:
65 | asn_rank_v4 += len(ips) * config_files[source]['impact']
66 | r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix_rank * config_files[source]['impact'], prefix)
67 | else:
68 | asn_rank_v6 += len(ips) * config_files[source]['impact']
69 | r_pipeline.zincrby(prefixes_aggregation_key_v6, prefix_rank * config_files[source]['impact'], prefix)
70 | if asn in cached_meta:
71 | v4info = cached_meta[asn]['v4']
72 | v6info = cached_meta[asn]['v6']
73 | else:
74 | retry = 3
75 | while retry:
76 | try:
77 | v4info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v4', date=day)
78 | v6info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v6', date=day)
79 | break
80 | except requests.exceptions.ConnectionError:
81 | # Sometimes, ipasnhistory is unreachable try again a few times
82 | retry -= 1
83 | else:
84 | # if it keeps failing, the ASN will be ranked on next run.
85 | continue
86 |
87 | cached_meta[asn] = {'v4': v4info, 'v6': v6info}
88 | ipasnhistory_date_v4 = list(v4info['response'].keys())[0]
89 | v4count = v4info['response'][ipasnhistory_date_v4][asn]['ipcount']
90 | ipasnhistory_date_v6 = list(v6info['response'].keys())[0]
91 | v6count = v6info['response'][ipasnhistory_date_v6][asn]['ipcount']
92 | if v4count:
93 | asn_rank_v4 /= float(v4count)
94 | if asn_rank_v4:
95 | r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
96 | r_pipeline.zincrby(asns_aggregation_key_v4, asn_rank_v4, asn)
97 | r_pipeline.zadd(source_aggregation_key_v4, {asn: asn_rank_v4})
98 | if v6count:
99 | asn_rank_v6 /= float(v6count)
100 | if asn_rank_v6:
101 | r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
102 | r_pipeline.zincrby(asns_aggregation_key_v6, asn_rank_v6, asn)
103 | r_pipeline.zadd(source_aggregation_key_v6, {asn: asn_rank_v6})
104 | self.ranking.delete(*to_delete)
105 | r_pipeline.execute()
106 |
107 | def compute(self):
108 | ready, message = sanity_check_ipasn(self.ipasn)
109 | if not ready:
110 | # Try again later.
111 | self.logger.warning(message)
112 | return
113 | self.logger.debug(message)
114 |
115 | self.logger.info('Start ranking')
116 | today = date.today()
117 | now = datetime.now()
118 | today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
119 | if now < today12am:
120 | # Compute yesterday and today's ranking (useful when we have lists generated only once a day)
121 | self.rank_a_day((today - timedelta(days=1)).isoformat())
122 | self.rank_a_day(today.isoformat())
123 | self.logger.info('Ranking done.')
124 |
125 | def _to_run_forever(self):
126 | self.compute()
127 |
128 |
129 | def main():
130 | ranking = Ranking()
131 | ranking.run(sleep_in_sec=3600)
132 |
133 |
134 | if __name__ == '__main__':
135 | main()
136 |
--------------------------------------------------------------------------------
/bin/run_backend.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import os
6 | import time
7 | from pathlib import Path
8 | from subprocess import Popen
9 | from typing import Optional, Dict
10 |
11 | from redis import Redis
12 | from redis.exceptions import ConnectionError
13 |
14 | from bgpranking.default import get_homedir, get_socket_path, get_config
15 |
16 |
17 | def check_running(name: str) -> bool:
18 | if name == "storage":
19 | r = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'))
20 | elif name == "ranking":
21 | r = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
22 | else:
23 | socket_path = get_socket_path(name)
24 | if not os.path.exists(socket_path):
25 | return False
26 | r = Redis(unix_socket_path=socket_path)
27 | try:
28 | return True if r.ping() else False
29 | except ConnectionError:
30 | return False
31 |
32 |
33 | def launch_cache(storage_directory: Optional[Path]=None):
34 | if not storage_directory:
35 | storage_directory = get_homedir()
36 | if not check_running('cache'):
37 | Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
38 |
39 |
40 | def shutdown_cache(storage_directory: Optional[Path]=None):
41 | if not storage_directory:
42 | storage_directory = get_homedir()
43 | r = Redis(unix_socket_path=get_socket_path('cache'))
44 | r.shutdown(save=True)
45 | print('Redis cache database shutdown.')
46 |
47 |
48 | def launch_temp(storage_directory: Optional[Path]=None):
49 | if not storage_directory:
50 | storage_directory = get_homedir()
51 | if not check_running('intake') and not check_running('prepare'):
52 | Popen(["./run_redis.sh"], cwd=(storage_directory / 'temp'))
53 |
54 |
55 | def shutdown_temp(storage_directory: Optional[Path]=None):
56 | if not storage_directory:
57 | storage_directory = get_homedir()
58 | r = Redis(unix_socket_path=get_socket_path('intake'))
59 | r.shutdown(save=True)
60 | print('Redis intake database shutdown.')
61 | r = Redis(unix_socket_path=get_socket_path('prepare'))
62 | r.shutdown(save=True)
63 | print('Redis prepare database shutdown.')
64 |
65 |
66 | def launch_storage(storage_directory: Optional[Path]=None):
67 | if not storage_directory:
68 | storage_directory = get_homedir()
69 | if not check_running('storage'):
70 | Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'storage'))
71 |
72 |
73 | def shutdown_storage(storage_directory: Optional[Path]=None):
74 | redis = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'))
75 | redis.shutdown()
76 |
77 |
78 | def launch_ranking(storage_directory: Optional[Path]=None):
79 | if not storage_directory:
80 | storage_directory = get_homedir()
81 | if not check_running('ranking'):
82 | Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'ranking'))
83 |
84 |
85 | def shutdown_ranking(storage_directory: Optional[Path]=None):
86 | redis = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
87 | redis.shutdown()
88 |
89 |
90 | def launch_all():
91 | launch_cache()
92 | launch_temp()
93 | launch_storage()
94 | launch_ranking()
95 |
96 |
97 | def check_all(stop: bool=False):
98 | backends: Dict[str, bool] = {'cache': False, 'storage': False, 'ranking': False,
99 | 'intake': False, 'prepare': False}
100 | while True:
101 | for db_name in backends.keys():
102 | print(backends[db_name])
103 | try:
104 | backends[db_name] = check_running(db_name)
105 | except Exception:
106 | backends[db_name] = False
107 | if stop:
108 | if not any(running for running in backends.values()):
109 | break
110 | else:
111 | if all(running for running in backends.values()):
112 | break
113 | for db_name, running in backends.items():
114 | if not stop and not running:
115 | print(f"Waiting on {db_name} to start")
116 | if stop and running:
117 | print(f"Waiting on {db_name} to stop")
118 | time.sleep(1)
119 |
120 |
121 | def stop_all():
122 | shutdown_cache()
123 | shutdown_temp()
124 | shutdown_storage()
125 | shutdown_ranking()
126 |
127 |
128 | def main():
129 | parser = argparse.ArgumentParser(description='Manage backend DBs.')
130 | parser.add_argument("--start", action='store_true', default=False, help="Start all")
131 | parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
132 | parser.add_argument("--status", action='store_true', default=True, help="Show status")
133 | args = parser.parse_args()
134 |
135 | if args.start:
136 | launch_all()
137 | if args.stop:
138 | stop_all()
139 | if not args.stop and args.status:
140 | check_all()
141 |
142 |
143 | if __name__ == '__main__':
144 | main()
145 |
--------------------------------------------------------------------------------
/bin/sanitizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import ipaddress
5 | import logging
6 | import time
7 |
8 | from datetime import timezone
9 | from typing import Optional, List, Dict
10 |
11 | from dateutil import parser
12 | from redis import Redis
13 | import requests
14 |
15 | from bgpranking.default import AbstractManager, get_socket_path
16 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn
17 |
18 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
19 | level=logging.INFO)
20 |
21 |
22 | class Sanitizer(AbstractManager):
23 |
24 | def __init__(self, loglevel: int=logging.INFO):
25 | super().__init__(loglevel)
26 | self.script_name = 'sanitizer'
27 | self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True)
28 | self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
29 | self.ipasn = get_ipasn()
30 | self.logger.debug('Starting import')
31 |
32 | def _sanitize_ip(self, pipeline: Redis, uuid: str, data: Dict) -> Optional[Dict]:
33 | try:
34 | ip = ipaddress.ip_address(data['ip'])
35 | if isinstance(ip, ipaddress.IPv6Address):
36 | address_family = 'v6'
37 | else:
38 | address_family = 'v4'
39 | except ValueError:
40 | self.logger.info(f"Invalid IP address: {data['ip']}")
41 | return None
42 | except KeyError:
43 | self.logger.info(f"Invalid entry {data}")
44 | return None
45 |
46 | if not ip.is_global:
47 | self.logger.info(f"The IP address {data['ip']} is not global")
48 | return None
49 |
50 | datetime = parser.parse(data['datetime'])
51 | if datetime.tzinfo:
52 | # Make sure the datetime isn't TZ aware, and UTC.
53 | datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None)
54 |
55 | # Add to temporay DB for further processing
56 | pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family,
57 | 'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()})
58 | pipeline.sadd('to_insert', uuid)
59 |
60 | return {'ip': str(ip), 'address_family': address_family,
61 | 'date': datetime.isoformat(), 'precision_delta': {'days': 3}}
62 |
63 | def _sanitize_network(self, pipeline: Redis, uuid: str, data: Dict) -> List[Dict]:
64 | try:
65 | network = ipaddress.ip_network(data['ip'])
66 | if isinstance(network, ipaddress.IPv6Network):
67 | address_family = 'v6'
68 | else:
69 | address_family = 'v4'
70 | except ValueError:
71 | self.logger.info(f"Invalid IP network: {data['ip']}")
72 | return []
73 | except KeyError:
74 | self.logger.info(f"Invalid entry {data}")
75 | return []
76 |
77 | datetime = parser.parse(data['datetime'])
78 | if datetime.tzinfo:
79 | # Make sure the datetime isn't TZ aware, and UTC.
80 | datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None)
81 |
82 | for_cache = []
83 | for ip in network.hosts():
84 | if not ip.is_global:
85 | self.logger.info(f"The IP address {ip} is not global")
86 | continue
87 |
88 | # Add to temporay DB for further processing
89 | pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family,
90 | 'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()})
91 | pipeline.sadd('to_insert', uuid)
92 |
93 | for_cache.append({'ip': str(ip), 'address_family': address_family,
94 | 'date': datetime.isoformat(), 'precision_delta': {'days': 3}})
95 | return for_cache
96 |
97 | def sanitize(self):
98 | ready, message = sanity_check_ipasn(self.ipasn)
99 | if not ready:
100 | # Try again later.
101 | self.logger.warning(message)
102 | return
103 | self.logger.debug(message)
104 |
105 | while True:
106 | try:
107 | if self.shutdown_requested() or not self.ipasn.is_up:
108 | break
109 | except requests.exceptions.ConnectionError:
110 | # Temporary issue with ipasnhistory
111 | self.logger.info('Temporary issue with ipasnhistory, trying again later.')
112 | time.sleep(10)
113 | continue
114 | uuids: Optional[List[str]] = self.redis_intake.spop('intake', 100) # type: ignore
115 | if not uuids:
116 | break
117 | for_cache = []
118 | pipeline = self.redis_sanitized.pipeline(transaction=False)
119 | for uuid in uuids:
120 | data = self.redis_intake.hgetall(uuid)
121 | if not data:
122 | continue
123 | if '/' in data['ip']:
124 | entries_for_cache = self._sanitize_network(pipeline, uuid, data)
125 | if entries_for_cache:
126 | for_cache += entries_for_cache
127 | else:
128 | entry_for_cache = self._sanitize_ip(pipeline, uuid, data)
129 | if entry_for_cache:
130 | for_cache.append(entry_for_cache)
131 |
132 | pipeline.execute()
133 | self.redis_intake.delete(*uuids)
134 |
135 | try:
136 | # Just cache everything so the lookup scripts can do their thing.
137 | self.ipasn.mass_cache(for_cache)
138 | except Exception:
139 | self.logger.info('Mass cache in IPASN History failed, trying again later.')
140 | # Rollback the spop
141 | self.redis_intake.sadd('intake', *uuids)
142 | break
143 |
144 | def _to_run_forever(self):
145 | self.sanitize()
146 |
147 |
148 | def main():
149 | sanitizer = Sanitizer()
150 | sanitizer.run(sleep_in_sec=120)
151 |
152 |
153 | if __name__ == '__main__':
154 | main()
155 |
--------------------------------------------------------------------------------
/bin/shutdown.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import time
5 |
6 | from bgpranking.default import AbstractManager
7 |
8 |
9 | def main():
10 | AbstractManager.force_shutdown()
11 | time.sleep(5)
12 | while True:
13 | try:
14 | running = AbstractManager.is_running()
15 | except FileNotFoundError:
16 | print('Redis is already down.')
17 | break
18 | if not running:
19 | break
20 | print(running)
21 | time.sleep(5)
22 |
23 |
24 | if __name__ == '__main__':
25 | main()
26 |
--------------------------------------------------------------------------------
/bin/ssfetcher.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | from logging import Logger
6 | import json
7 | import asyncio
8 |
9 | from typing import Tuple, Dict, List, Optional, TypeVar, Any
10 | from datetime import datetime, date
11 | from pathlib import Path
12 |
13 | import aiohttp
14 | from bs4 import BeautifulSoup # type: ignore
15 | from dateutil.parser import parse
16 |
17 | from bgpranking.default import AbstractManager, get_homedir, safe_create_dir
18 | from bgpranking.helpers import get_data_dir, get_modules_dir
19 |
20 |
21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
22 | level=logging.INFO)
23 |
24 |
25 | Dates = TypeVar('Dates', datetime, date, str)
26 |
27 |
28 | class ShadowServerFetcher():
29 |
30 | def __init__(self, user, password, logger: Logger) -> None:
31 | self.logger = logger
32 | self.storage_directory = get_data_dir()
33 | self.config_path_modules = get_modules_dir()
34 | self.user = user
35 | self.password = password
36 | self.index_page = 'https://dl.shadowserver.org/reports/index.php'
37 | self.vendor = 'shadowserver'
38 | self.known_list_types = ('blacklist', 'blocklist', 'botnet', 'cc', 'cisco', 'cwsandbox',
39 | 'device', 'drone', 'event4', 'malware', 'scan6', 'event6', 'netis',
40 | 'microsoft', 'scan', 'sinkhole6', 'sinkhole', 'outdated',
41 | 'compromised', 'hp', 'darknet', 'ddos')
42 | self.first_available_day: Optional[date] = None
43 | self.last_available_day: date
44 | self.available_entries: Dict[str, List[Tuple[str, str]]] = {}
45 |
46 | async def __get_index(self):
47 | auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
48 | async with aiohttp.ClientSession() as s:
49 | self.logger.debug('Fetching the index.')
50 | async with s.post(self.index_page, data=auth_details) as r:
51 | return await r.text()
52 |
53 | async def __build_daily_dict(self):
54 | html_index = await self.__get_index()
55 | soup = BeautifulSoup(html_index, 'html.parser')
56 | treeview = soup.find(id='treemenu1')
57 | for y in treeview.select(':scope > li'):
58 | year = y.contents[0]
59 | for m in y.contents[1].select(':scope > li'):
60 | month = m.contents[0]
61 | for d in m.contents[1].select(':scope > li'):
62 | day = d.contents[0]
63 | date = parse(f'{year} {month} {day}').date()
64 | self.available_entries[date.isoformat()] = []
65 | for a in d.contents[1].find_all('a', href=True):
66 | if not self.first_available_day:
67 | self.first_available_day = date
68 | self.last_available_day = date
69 | self.available_entries[date.isoformat()].append((a['href'], a.string))
70 | self.logger.debug('Dictionary created.')
71 |
72 | def __normalize_day(self, day: Optional[Dates]=None) -> str:
73 | if not day:
74 | if not self.last_available_day:
75 | raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
76 | to_return = self.last_available_day
77 | else:
78 | if isinstance(day, str):
79 | to_return = parse(day).date()
80 | elif isinstance(day, datetime):
81 | to_return = day.date()
82 | return to_return.isoformat()
83 |
84 | def __split_name(self, name):
85 | type_content, country, list_type = name.split('-')
86 | if '_' in type_content:
87 | type_content, details_type = type_content.split('_', maxsplit=1)
88 | if '_' in details_type:
89 | details_type, sub = details_type.split('_', maxsplit=1)
90 | return list_type, country, (type_content, details_type, sub)
91 | return list_type, country, (type_content, details_type)
92 | return list_type, country, (type_content)
93 |
94 | def __check_config(self, filename: str) -> Optional[Path]:
95 | self.logger.debug(f'Working on config for {filename}.')
96 | config: Dict[str, Any] = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
97 | type_content, _, type_details = self.__split_name(filename)
98 | prefix = type_content.split('.')[0]
99 |
100 | if isinstance(type_details, str):
101 | main_type = type_details
102 | config['name'] = '{}-{}'.format(prefix, type_details)
103 | else:
104 | main_type = type_details[0]
105 | config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
106 |
107 | if main_type not in self.known_list_types:
108 | self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
109 | return None
110 |
111 | if main_type == 'blacklist':
112 | config['impact'] = 5
113 | elif main_type == 'blocklist':
114 | config['impact'] = 5
115 | elif main_type == 'botnet':
116 | config['impact'] = 2
117 | elif main_type == 'malware':
118 | config['impact'] = 2
119 | elif main_type == 'cc':
120 | config['impact'] = 5
121 | elif main_type == 'cisco':
122 | config['impact'] = 3
123 | elif main_type == 'cwsandbox':
124 | config['impact'] = 5
125 | elif main_type == 'drone':
126 | config['impact'] = 2
127 | elif main_type == 'microsoft':
128 | config['impact'] = 3
129 | elif main_type == 'scan':
130 | config['impact'] = 1
131 | elif main_type == 'scan6':
132 | config['impact'] = 1
133 | elif main_type == 'sinkhole6':
134 | config['impact'] = 2
135 | elif main_type == 'sinkhole':
136 | config['impact'] = 2
137 | elif main_type == 'device':
138 | config['impact'] = 1
139 | elif main_type == 'event4':
140 | config['impact'] = 2
141 | elif main_type == 'event6':
142 | config['impact'] = 2
143 | elif main_type == 'netis':
144 | config['impact'] = 2
145 | else:
146 | config['impact'] = 1
147 |
148 | if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
149 | self.logger.debug(f'Creating config file for {filename}.')
150 | with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
151 | json.dump(config, f, indent=2)
152 | else:
153 | with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
154 | # Validate new config file with old
155 | config_current = json.load(f)
156 | if config_current != config:
157 | self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
158 | # Init list directory
159 | directory = self.storage_directory / config['vendor'] / config['name']
160 | safe_create_dir(directory)
161 | meta = directory / 'meta'
162 | safe_create_dir(meta)
163 | archive_dir = directory / 'archive'
164 | safe_create_dir(archive_dir)
165 | self.logger.debug(f'Done with config for {filename}.')
166 | return directory
167 |
168 | async def download_daily_entries(self, day: Optional[Dates]=None):
169 | await self.__build_daily_dict()
170 | for url, filename in self.available_entries[self.__normalize_day(day)]:
171 | storage_dir = self.__check_config(filename)
172 | if not storage_dir:
173 | continue
174 | # Check if the file we're trying to download has already been downloaded. Skip if True.
175 | uuid = url.split('/')[-1]
176 | if (storage_dir / 'meta' / 'last_download').exists():
177 | with open(storage_dir / 'meta' / 'last_download') as _fr:
178 | last_download_uuid = _fr.read()
179 | if last_download_uuid == uuid:
180 | self.logger.debug(f'Already downloaded: {url}.')
181 | continue
182 | async with aiohttp.ClientSession() as s:
183 | async with s.get(url) as r:
184 | self.logger.info(f'Downloading {url}.')
185 | content = await r.content.read()
186 | with (storage_dir / f'{datetime.now().isoformat()}.txt').open('wb') as _fw:
187 | _fw.write(content)
188 | with (storage_dir / 'meta' / 'last_download').open('w') as _fwt:
189 | _fwt.write(uuid)
190 |
191 |
192 | class ShadowServerManager(AbstractManager):
193 |
194 | def __init__(self, loglevel: int=logging.INFO):
195 | super().__init__(loglevel)
196 | self.script_name = 'shadowserver_fetcher'
197 | shadow_server_config_file = get_homedir() / 'config' / 'shadowserver.json'
198 | self.config = True
199 | if not shadow_server_config_file.exists():
200 | self.config = False
201 | self.logger.warning(f'No config file available {shadow_server_config_file}, the shadow server module will not be launched.')
202 | return
203 | with shadow_server_config_file.open() as f:
204 | ss_config = json.load(f)
205 | self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], self.logger)
206 |
207 | async def _to_run_forever_async(self):
208 | await self.fetcher.download_daily_entries()
209 |
210 |
211 | def main():
212 | modules_manager = ShadowServerManager()
213 | if modules_manager.config:
214 | asyncio.run(modules_manager.run_async(sleep_in_sec=3600))
215 |
216 |
217 | if __name__ == '__main__':
218 | main()
219 |
--------------------------------------------------------------------------------
/bin/start.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from subprocess import Popen, run
5 |
6 | from bgpranking.default import get_homedir
7 |
8 |
9 | def main():
10 | # Just fail if the env isn't set.
11 | get_homedir()
12 | print('Start backend (redis)...')
13 | p = run(['run_backend', '--start'])
14 | p.check_returncode()
15 | print('done.')
16 | Popen(['fetcher'])
17 | # Popen(['ssfetcher'])
18 | Popen(['parser'])
19 | Popen(['sanitizer'])
20 | Popen(['dbinsert'])
21 | Popen(['ranking'])
22 | Popen(['asn_descriptions'])
23 | print('Start website...')
24 | Popen(['start_website'])
25 | print('done.')
26 |
27 |
28 | if __name__ == '__main__':
29 | main()
30 |
--------------------------------------------------------------------------------
/bin/start_website.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import logging
5 | from subprocess import Popen
6 |
7 | from bgpranking.default import AbstractManager
8 | from bgpranking.default import get_config, get_homedir
9 |
10 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
11 | level=logging.INFO)
12 |
13 |
14 | class Website(AbstractManager):
15 |
16 | def __init__(self, loglevel: int=logging.INFO):
17 | super().__init__(loglevel)
18 | self.script_name = 'website'
19 | self.process = self._launch_website()
20 | self.set_running()
21 |
22 | def _launch_website(self):
23 | website_dir = get_homedir() / 'website'
24 | ip = get_config('generic', 'website_listen_ip')
25 | port = get_config('generic', 'website_listen_port')
26 | return Popen(['gunicorn', '-w', '10',
27 | '--graceful-timeout', '2', '--timeout', '300',
28 | '-b', f'{ip}:{port}',
29 | '--log-level', 'info',
30 | 'web:app'],
31 | cwd=website_dir)
32 |
33 |
34 | def main():
35 | w = Website()
36 | w.run(sleep_in_sec=10)
37 |
38 |
39 | if __name__ == '__main__':
40 | main()
41 |
--------------------------------------------------------------------------------
/bin/stop.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from subprocess import Popen, run
5 |
6 | from redis import Redis
7 | from redis.exceptions import ConnectionError
8 |
9 | from bgpranking.default import get_homedir, get_socket_path
10 |
11 |
12 | def main():
13 | get_homedir()
14 | p = Popen(['shutdown'])
15 | p.wait()
16 | try:
17 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
18 | r.delete('shutdown')
19 | print('Shutting down databases...')
20 | p_backend = run(['run_backend', '--stop'])
21 | p_backend.check_returncode()
22 | print('done.')
23 | except ConnectionError:
24 | # Already down, skip the stacktrace
25 | pass
26 |
27 |
28 | if __name__ == '__main__':
29 | main()
30 |
--------------------------------------------------------------------------------
/bin/update.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import hashlib
6 | import logging
7 | import platform
8 | import shlex
9 | import subprocess
10 | import sys
11 | from pathlib import Path
12 |
13 | from bgpranking.default import get_homedir, get_config
14 |
15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
16 | level=logging.INFO)
17 |
18 |
19 | def compute_hash_self():
20 | m = hashlib.sha256()
21 | with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
22 | m.update(f.read())
23 | return m.digest()
24 |
25 |
26 | def keep_going(ignore=False):
27 | if ignore:
28 | return
29 | keep_going = input('Continue? (y/N) ')
30 | if keep_going.lower() != 'y':
31 | print('Okay, quitting.')
32 | sys.exit()
33 |
34 |
35 | def run_command(command, expect_fail: bool=False, capture_output: bool=True):
36 | args = shlex.split(command)
37 | homedir = get_homedir()
38 | process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
39 | if capture_output:
40 | print(process.stdout.decode())
41 | if process.returncode and not expect_fail:
42 | print(process.stderr.decode())
43 | sys.exit()
44 |
45 |
46 | def check_poetry_version():
47 | args = shlex.split("poetry self -V")
48 | homedir = get_homedir()
49 | process = subprocess.run(args, cwd=homedir, capture_output=True)
50 | poetry_version_str = process.stdout.decode()
51 | version = poetry_version_str.split()[2]
52 | version = version.strip(')')
53 | version_details = tuple(int(i) for i in version.split('.'))
54 | if version_details < (1, 1, 0):
55 | print('The project requires poetry >= 1.1.0, please update.')
56 | print('If you installed with "pip install --user poetry", run "pip install --user -U poetry"')
57 | print('If you installed via the recommended method, use "poetry self update"')
58 | print('More details: https://github.com/python-poetry/poetry#updating-poetry')
59 | sys.exit()
60 |
61 |
62 | def main():
63 | parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
64 | parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
65 | args = parser.parse_args()
66 |
67 | old_hash = compute_hash_self()
68 |
69 | print('* Update repository.')
70 | keep_going(args.yes)
71 | run_command('git pull')
72 | new_hash = compute_hash_self()
73 | if old_hash != new_hash:
74 | print('Update script changed, please do "poetry run update"')
75 | sys.exit()
76 |
77 | check_poetry_version()
78 |
79 | print('* Install/update dependencies.')
80 | keep_going(args.yes)
81 | run_command('poetry install')
82 |
83 | print('* Validate configuration files.')
84 | keep_going(args.yes)
85 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --check')
86 |
87 | print('* Update configuration files.')
88 | keep_going(args.yes)
89 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --update')
90 |
91 | print('* Restarting')
92 | keep_going(args.yes)
93 | if platform.system() == 'Windows':
94 | print('Restarting with poetry...')
95 | run_command('poetry run stop', expect_fail=True)
96 | run_command('poetry run start', capture_output=False)
97 | print('Started.')
98 | else:
99 | service = get_config('generic', 'systemd_service_name')
100 | p = subprocess.run(["systemctl", "is-active", "--quiet", service])
101 | try:
102 | p.check_returncode()
103 | print('Restarting with systemd...')
104 | run_command(f'sudo service {service} restart')
105 | print('done.')
106 | except subprocess.CalledProcessError:
107 | print('Restarting with poetry...')
108 | run_command('poetry run stop', expect_fail=True)
109 | run_command('poetry run start', capture_output=False)
110 | print('Started.')
111 |
112 |
113 | if __name__ == '__main__':
114 | main()
115 |
--------------------------------------------------------------------------------
/cache/run_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | if [ -f ../../valkey/src/valkey-server ]; then
7 | ../../valkey/src/valkey-server ./cache.conf
8 | elif [ -f ../../redis/src/redis-server ]; then
9 | ../../redis/src/redis-server ./cache.conf
10 | else
11 | echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2
12 | /usr/bin/redis-server ./cache.conf
13 | fi
14 |
--------------------------------------------------------------------------------
/cache/shutdown_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # set -e
4 | set -x
5 |
6 | ../../redis/src/redis-cli -s ./cache.sock shutdown
7 |
--------------------------------------------------------------------------------
/config/generic.json.sample:
--------------------------------------------------------------------------------
1 | {
2 | "loglevel": "INFO",
3 | "website_listen_ip": "0.0.0.0",
4 | "website_listen_port": 5005,
5 | "systemd_service_name": "bgpranking",
6 | "storage_db_hostname": "127.0.0.1",
7 | "storage_db_port": 5188,
8 | "ranking_db_hostname": "127.0.0.1",
9 | "ranking_db_port": 5189,
10 | "ipasnhistory_url": "https://ipasnhistory.circl.lu/",
11 | "_notes": {
12 | "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
13 | "website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
14 | "website_listen_port": "Port Flask will listen on.",
15 | "systemd_service_name": "(Optional) Name of the systemd service if your project has one.",
16 | "storage_db_hostname": "Hostname of the storage database (kvrocks)",
17 | "storage_db_port": "Port of the storage database (kvrocks)",
18 | "ranking_db_hostname": "Hostname of the ranking database (kvrocks)",
19 | "ranking_db_port": "Port of the ranking database (kvrocks)",
20 | "ipasnhistory_url": "URL of the IP ASN History service, defaults to the public one."
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/config/modules/Alienvault.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://reputation.alienvault.com/reputation.generic",
3 | "vendor": "alienvault",
4 | "name": "reputation.generic",
5 | "impact": 0.01
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeApache.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/apache.txt",
3 | "vendor": "blocklist_de",
4 | "name": "apache",
5 | "impact": 0.1
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeBots.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/bots.txt",
3 | "vendor": "blocklist_de",
4 | "name": "bots",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeFTP.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/ftp.txt",
3 | "vendor": "blocklist_de",
4 | "name": "ftp",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeIMAP.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/imap.txt",
3 | "vendor": "blocklist_de",
4 | "name": "imap",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeMail.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/mail.txt",
3 | "vendor": "blocklist_de",
4 | "name": "mail",
5 | "impact": 0.1
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeSIP.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/sip.txt",
3 | "vendor": "blocklist_de",
4 | "name": "sip",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeSSH.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/ssh.txt",
3 | "vendor": "blocklist_de",
4 | "name": "ssh",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/BlocklistDeStrong.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.blocklist.de/lists/strongips.txt",
3 | "vendor": "blocklist_de",
4 | "name": "strong",
5 | "impact": 6
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/CIArmy.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.ciarmy.com/list/ci-badguys.txt",
3 | "vendor": "ciarmy",
4 | "name": "ip",
5 | "impact": 5
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/CleanMXMalwares.json:
--------------------------------------------------------------------------------
1 | {
2 | "vendor": "cleanmx",
3 | "name": "malwares",
4 | "impact": 5
5 | }
6 |
--------------------------------------------------------------------------------
/config/modules/CleanMXPhishing.json:
--------------------------------------------------------------------------------
1 | {
2 | "vendor": "cleanmx",
3 | "name": "phishing",
4 | "impact": 5
5 | }
6 |
--------------------------------------------------------------------------------
/config/modules/CleanMXPortals.json:
--------------------------------------------------------------------------------
1 | {
2 | "vendor": "cleanmx",
3 | "name": "portals",
4 | "impact": 5
5 | }
6 |
--------------------------------------------------------------------------------
/config/modules/CoinBlockerLists.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://zerodot1.gitlab.io/CoinBlockerLists/MiningServerIPList.txt",
3 | "vendor": "ZeroDot1",
4 | "name": "CoinBlockerLists",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/DshieldDaily.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.dshield.org/feeds/daily_sources",
3 | "vendor": "dshield",
4 | "name": "daily",
5 | "impact": 0.1,
6 | "parser": ".parsers.dshield"
7 | }
8 |
--------------------------------------------------------------------------------
/config/modules/DshieldTopIPs.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.dshield.org/feeds/topips.txt",
3 | "vendor": "dshield",
4 | "name": "topips",
5 | "impact": 1
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/EmergingThreatsCompromized.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://rules.emergingthreats.net/blockrules/compromised-ips.txt",
3 | "vendor": "emergingthreats",
4 | "name": "compromized",
5 | "impact": 5
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/FeodotrackerIPBlockList.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://feodotracker.abuse.ch/downloads/ipblocklist.txt",
3 | "vendor": "feodotracker",
4 | "name": "ipblocklist",
5 | "impact": 5,
6 | "parser": ".parsers.abusech_feodo"
7 | }
8 |
--------------------------------------------------------------------------------
/config/modules/Malc0de.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://malc0de.com/bl/IP_Blacklist.txt",
3 | "vendor": "malc0de",
4 | "name": "blocklist",
5 | "impact": 5,
6 | "parser": ".parsers.malc0de"
7 | }
8 |
--------------------------------------------------------------------------------
/config/modules/MalwareDomainListIP.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "http://www.malwaredomainlist.com/hostslist/ip.txt",
3 | "vendor": "malwaredomainlist",
4 | "name": "ip",
5 | "impact": 5
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/SSLBlacklist.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://sslbl.abuse.ch/blacklist/sslipblacklist.txt",
3 | "vendor": "abuse.ch",
4 | "name": "sslblacklist",
5 | "impact": 7,
6 | "parser": ".parsers.abusech"
7 | }
8 |
--------------------------------------------------------------------------------
/config/modules/ThreatFoxIOC.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://threatfox.abuse.ch/export/json/ip-port/recent/",
3 | "vendor": "abuse.ch",
4 | "name": "threatfox",
5 | "impact": 5,
6 | "parser": ".parsers.abusech_threatfox"
7 | }
8 |
--------------------------------------------------------------------------------
/config/modules/greensnow.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://blocklist.greensnow.co/greensnow.txt",
3 | "vendor": "greensnow",
4 | "name": "blocklist",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/jq_all_the_things.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | # Seeds sponge, from moreutils
7 |
8 | for dir in ./*.json
9 | do
10 | cat ${dir} | jq . | sponge ${dir}
11 | done
12 |
--------------------------------------------------------------------------------
/config/modules/module.schema:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/schema#",
3 | "title": "BGP Ranking NG module",
4 | "id": "https://www.github.com/CIRCL/bgpranking-ng/modules.json",
5 | "type": "object",
6 | "additionalProperties": false,
7 | "properties": {
8 | "url": {
9 | "type": "string"
10 | },
11 | "vendor": {
12 | "type": "string"
13 | },
14 | "name": {
15 | "type": "string"
16 | },
17 | "impact": {
18 | "type": "number"
19 | },
20 | "parser": {
21 | "type": "string"
22 | },
23 | "tags": {
24 | "type": "array",
25 | "uniqueItems": true,
26 | "items": {
27 | "type": "string"
28 | }
29 | }
30 | },
31 | "required": [
32 | "name",
33 | "vendor",
34 | "impact"
35 | ]
36 | }
37 |
--------------------------------------------------------------------------------
/config/modules/pop3gropers.json:
--------------------------------------------------------------------------------
1 | {
2 | "url": "https://home.nuug.no/~peter/pop3gropers.txt",
3 | "vendor": "bsdly",
4 | "name": "pop3gropers",
5 | "impact": 3
6 | }
7 |
--------------------------------------------------------------------------------
/config/modules/shadowserver_only.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
7 |
--------------------------------------------------------------------------------
/config/modules/validate_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | # remove the exec flag on the json files
7 | find -name "*.json" -exec chmod -x "{}" \;
8 |
9 | diffs=`git status --porcelain | wc -l`
10 |
11 | if ! [ $diffs -eq 0 ]; then
12 | echo "Please make sure you run remove the executable flag on the json files before commiting: find -name "*.json" -exec chmod -x \"{}\" \\;"
13 | # exit 1
14 | fi
15 |
16 | ./jq_all_the_things.sh
17 |
18 | diffs=`git status --porcelain | wc -l`
19 |
20 | if ! [ $diffs -eq 0 ]; then
21 | echo "Please make sure you run ./jq_all_the_things.sh before commiting."
22 | # exit 1
23 | fi
24 |
25 | for dir in ./*.json
26 | do
27 | echo -n "${dir}: "
28 | jsonschema -i ${dir} module.schema
29 | echo ''
30 | done
31 |
--------------------------------------------------------------------------------
/config/shadowserver.json.sample:
--------------------------------------------------------------------------------
1 | {
2 | "user": "[USERNAME]",
3 | "password": "[PASSWORD]"
4 | }
5 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "bgpranking"
3 | version = "2.0"
4 | description = "BGP Ranking is a software to rank AS numbers based on their malicious activities."
5 | authors = ["Raphaël Vinot "]
6 | license = "AGPLv3"
7 |
8 | [tool.poetry.scripts]
9 | start = "bin.start:main"
10 | stop = "bin.stop:main"
11 | update = "bin.update:main"
12 | shutdown = "bin.shutdown:main"
13 | run_backend = "bin.run_backend:main"
14 | start_website = "bin.start_website:main"
15 |
16 | archiver = "bin.archiver:main"
17 | asn_descriptions = "bin.asn_descriptions:main"
18 | dbinsert = "bin.dbinsert:main"
19 | fetcher = "bin.fetcher:main"
20 | parser = "bin.parser:main"
21 | ranking = "bin.ranking:main"
22 | sanitizer = "bin.sanitizer:main"
23 | ssfetcher = "bin.ssfetcher:main"
24 |
25 |
26 | [tool.poetry.dependencies]
27 | python = "^3.8.1"
28 | redis = {version = "^5.0.7", extras = ["hiredis"]}
29 | flask-restx = "^1.3.0"
30 | gunicorn = "^22.0.0"
31 | python-dateutil = "^2.9.0.post0"
32 | pyipasnhistory = "^2.1.2"
33 | pycountry = "^23.12.11"
34 | beautifulsoup4 = "^4.12.3"
35 | aiohttp = "^3.9.5"
36 | Bootstrap-Flask = "^2.4.0"
37 | pid = "^3.0.4"
38 | pybgpranking2 = "^2.0.1"
39 |
40 | [tool.poetry.dev-dependencies]
41 | ipython = [
42 | {version = "<8.13.0", python = "<3.9"},
43 | {version = "^8.18.0", python = ">=3.9"},
44 | {version = "^8.24.0", python = ">=3.10"}
45 | ]
46 | mypy = "^1.10.1"
47 | types-setuptools = "^70.2.0.20240704"
48 | types-redis = "^4.6.0.20240425"
49 | types-requests = "^2.32.0.20240622"
50 | types-python-dateutil = "^2.9.0.20240316"
51 |
52 | [build-system]
53 | requires = ["poetry-core"]
54 | build-backend = "poetry.core.masonry.api"
55 |
56 | [tool.mypy]
57 | python_version = 3.8
58 | check_untyped_defs = true
59 | ignore_errors = false
60 | ignore_missing_imports = false
61 | strict_optional = true
62 | no_implicit_optional = true
63 | warn_unused_ignores = true
64 | warn_redundant_casts = true
65 | warn_unused_configs = true
66 | warn_unreachable = true
67 |
68 | show_error_context = true
69 | pretty = true
70 |
--------------------------------------------------------------------------------
/ranking/run_kvrocks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | ../../kvrocks/build/kvrocks -c kvrocks.conf
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from setuptools import setup
4 |
5 |
6 | setup(
7 | name='bgpranking',
8 | version='0.1',
9 | author='Raphaël Vinot',
10 | author_email='raphael.vinot@circl.lu',
11 | maintainer='Raphaël Vinot',
12 | url='https://github.com/D4-project/BGP-Ranking',
13 | description='BGP Ranking, the new one.',
14 | packages=['bgpranking'],
15 | scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
16 | 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', 'bin/start_website.py',
17 | 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py',
18 | 'bin/manual_ranking.py',
19 | 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
20 | classifiers=[
21 | 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
22 | 'Development Status :: 3 - Alpha',
23 | 'Environment :: Console',
24 | 'Operating System :: POSIX :: Linux',
25 | 'Intended Audience :: Science/Research',
26 | 'Intended Audience :: Telecommunications Industry',
27 | 'Intended Audience :: Information Technology',
28 | 'Programming Language :: Python :: 3',
29 | 'Topic :: Security',
30 | 'Topic :: Internet',
31 | ],
32 | include_package_data=True,
33 | package_data={'config': ['config/*/*.conf',
34 | 'config/modules/*.json']},
35 | )
36 |
--------------------------------------------------------------------------------
/storage/run_kvrocks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | ../../kvrocks/build/kvrocks -c kvrocks.conf
7 |
--------------------------------------------------------------------------------
/temp/run_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e
4 | set -x
5 |
6 | if [ -f ../../valkey/src/valkey-server ]; then
7 | ../../valkey/src/valkey-server ./intake.conf
8 | ../../valkey/src/valkey-server ./prepare.conf
9 | elif [ -f ../../redis/src/redis-server ]; then
10 | ../../redis/src/redis-server ./intake.conf
11 | ../../redis/src/redis-server ./prepare.conf
12 | else
13 | echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2
14 | /usr/bin/redis-server ./intake.conf
15 | /usr/bin/redis-server ./prepare.conf
16 | fi
17 |
--------------------------------------------------------------------------------
/temp/shutdown_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # set -e
4 | set -x
5 |
6 | ../../redis/src/redis-cli -s ./intake.sock shutdown
7 | ../../redis/src/redis-cli -s ./prepare.sock shutdown
8 |
--------------------------------------------------------------------------------
/tools/3rdparty.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import requests
5 |
6 | from bgpranking.default import get_homedir
7 |
8 | d3js_version = '7'
9 | bootstrap_select_version = "1.14.0-beta3"
10 | jquery_version = "3.7.1"
11 |
12 | if __name__ == '__main__':
13 | dest_dir = get_homedir() / 'website' / 'web' / 'static'
14 |
15 | d3 = requests.get(f'https://d3js.org/d3.v{d3js_version}.min.js')
16 | with (dest_dir / f'd3.v{d3js_version}.min.js').open('wb') as f:
17 | f.write(d3.content)
18 | print(f'Downloaded d3js v{d3js_version}.')
19 |
20 | bootstrap_select_js = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/js/bootstrap-select.min.js')
21 | with (dest_dir / 'bootstrap-select.min.js').open('wb') as f:
22 | f.write(bootstrap_select_js.content)
23 | print(f'Downloaded bootstrap_select js v{bootstrap_select_version}.')
24 |
25 | bootstrap_select_css = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/css/bootstrap-select.min.css')
26 | with (dest_dir / 'bootstrap-select.min.css').open('wb') as f:
27 | f.write(bootstrap_select_css.content)
28 | print(f'Downloaded bootstrap_select css v{bootstrap_select_version}.')
29 |
30 | jquery = requests.get(f'https://code.jquery.com/jquery-{jquery_version}.min.js')
31 | with (dest_dir / 'jquery.min.js').open('wb') as f:
32 | f.write(jquery.content)
33 | print(f'Downloaded jquery v{jquery_version}.')
34 |
35 | print('All 3rd party modules for the website were downloaded.')
36 |
--------------------------------------------------------------------------------
/tools/clear_prepare_db.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import uuid
4 |
5 | from redis import Redis
6 | from bgpranking.default import get_socket_path
7 |
8 | redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
9 | to_delete = []
10 | for name in redis_sanitized.scan_iter(_type='HASH', count=100):
11 | try:
12 | uuid.UUID(name)
13 | except Exception as e:
14 | continue
15 | if not redis_sanitized.sismember('to_insert', name):
16 | to_delete.append(name)
17 | if len(to_delete) >= 100000:
18 | redis_sanitized.delete(*to_delete)
19 | to_delete = []
20 | if to_delete:
21 | redis_sanitized.delete(*to_delete)
22 |
--------------------------------------------------------------------------------
/tools/migrate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from datetime import datetime
5 | from typing import Set
6 |
7 | from redis import Redis
8 |
9 | redis_src = Redis(unix_socket_path='../storage/storage.sock', db=0)
10 | redis_dst = Redis('127.0.0.1', 5188)
11 |
12 | chunk_size = 100000
13 |
14 |
15 | def process_chunk(src: Redis, dst: Redis, keys: Set[str]):
16 | src_pipeline = src.pipeline()
17 | [src_pipeline.type(key) for key in keys]
18 | to_process = {key: key_type for key, key_type in zip(keys, src_pipeline.execute())}
19 |
20 | src_pipeline = src.pipeline()
21 | for key, key_type in to_process.items():
22 | if key_type == b"string":
23 | src_pipeline.get(key)
24 | elif key_type == b"list":
25 | raise Exception(f'Lists should not be used: {key}.')
26 | elif key_type == b"set":
27 | src_pipeline.smembers(key)
28 | elif key_type == b"zset":
29 | src_pipeline.zrangebyscore(key, '-Inf', '+Inf', withscores=True)
30 | elif key_type == b"hash":
31 | src_pipeline.hgetall(key)
32 | else:
33 | raise Exception(f'{key_type} not supported {key}.')
34 |
35 | dest_pipeline = dst.pipeline()
36 | for key, content in zip(to_process.keys(), src_pipeline.execute()):
37 | if to_process[key] == b"string":
38 | dest_pipeline.set(key, content)
39 | elif to_process[key] == b"set":
40 | dest_pipeline.sadd(key, *content)
41 | elif to_process[key] == b"zset":
42 | dest_pipeline.zadd(key, {value: rank for value, rank in content})
43 | elif to_process[key] == b"hash":
44 | dest_pipeline.hmset(key, content)
45 |
46 | dest_pipeline.execute()
47 |
48 |
49 | def migrate(src: Redis, dst: Redis):
50 | keys = set()
51 | pos = 0
52 | for key in src.scan_iter(count=chunk_size, match='2017*'):
53 | keys.add(key)
54 |
55 | if len(keys) == chunk_size:
56 | process_chunk(src, dst, keys)
57 | pos += len(keys)
58 | print(f'{datetime.now()} - {pos} keys done.')
59 | keys = set()
60 |
61 | # migrate remaining keys
62 | process_chunk(src, dst, keys)
63 | pos += len(keys)
64 | print(f'{datetime.now()} - {pos} keys done.')
65 |
66 |
67 | if __name__ == '__main__':
68 | migrate(redis_src, redis_dst)
69 |
--------------------------------------------------------------------------------
/tools/monitoring.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import json
4 |
5 | from redis import Redis
6 | from bgpranking.default import get_socket_path
7 | from bgpranking.helpers import get_ipasn
8 |
9 |
10 | class Monitor():
11 |
12 | def __init__(self):
13 | self.intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True)
14 | self.sanitize = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
15 | self.cache = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
16 | self.ipasn = get_ipasn()
17 |
18 | def get_values(self):
19 | ips_in_intake = self.intake.scard('intake')
20 | ready_to_insert = self.sanitize.scard('to_insert')
21 | ipasn_meta = self.ipasn.meta()
22 | if len(ipasn_meta['cached_dates']['caida']['v4']['cached']) > 15:
23 | ipasn_meta['cached_dates']['caida']['v4']['cached'] = 'Too many entries'
24 | if len(ipasn_meta['cached_dates']['caida']['v6']['cached']) > 15:
25 | ipasn_meta['cached_dates']['caida']['v6']['cached'] = 'Too many entries'
26 | return json.dumps({'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert,
27 | 'running': self.cache.zrangebyscore('running', '-inf', '+inf', withscores=True),
28 | 'IPASN History': ipasn_meta},
29 | indent=2)
30 |
31 |
32 | if __name__ == '__main__':
33 | m = Monitor()
34 | print(m.get_values())
35 |
--------------------------------------------------------------------------------
/tools/validate_config_files.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import json
5 | import logging
6 | import argparse
7 |
8 | from bgpranking.default import get_homedir
9 |
10 |
11 | def validate_generic_config_file():
12 | user_config = get_homedir() / 'config' / 'generic.json'
13 | with user_config.open() as f:
14 | generic_config = json.load(f)
15 | with (get_homedir() / 'config' / 'generic.json.sample').open() as f:
16 | generic_config_sample = json.load(f)
17 | # Check documentation
18 | for key in generic_config_sample.keys():
19 | if key == '_notes':
20 | continue
21 | if key not in generic_config_sample['_notes']:
22 | raise Exception(f'###### - Documentation missing for {key}')
23 |
24 | # Check all entries in the sample files are in the user file, and they have the same type
25 | for key in generic_config_sample.keys():
26 | if key == '_notes':
27 | continue
28 | if generic_config.get(key) is None:
29 | logger.warning(f'Entry missing in user config file: {key}. Will default to: {generic_config_sample[key]}')
30 | continue
31 | if not isinstance(generic_config[key], type(generic_config_sample[key])):
32 | raise Exception(f'Invalid type for {key}. Got: {type(generic_config[key])} ({generic_config[key]}), expected: {type(generic_config_sample[key])} ({generic_config_sample[key]})')
33 |
34 | if isinstance(generic_config[key], dict):
35 | # Check entries
36 | for sub_key in generic_config_sample[key].keys():
37 | if sub_key not in generic_config[key]:
38 | raise Exception(f'{sub_key} is missing in generic_config[key]. Default from sample file: {generic_config_sample[key][sub_key]}')
39 | if not isinstance(generic_config[key][sub_key], type(generic_config_sample[key][sub_key])):
40 | raise Exception(f'Invalid type for {sub_key} in {key}. Got: {type(generic_config[key][sub_key])} ({generic_config[key][sub_key]}), expected: {type(generic_config_sample[key][sub_key])} ({generic_config_sample[key][sub_key]})')
41 |
42 | # Make sure the user config file doesn't have entries missing in the sample config
43 | for key in generic_config.keys():
44 | if key not in generic_config_sample:
45 | raise Exception(f'{key} is missing in the sample config file. You need to compare {user_config} with {user_config}.sample.')
46 |
47 | return True
48 |
49 |
50 | def update_user_configs():
51 | for file_name in ['generic']:
52 | with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
53 | try:
54 | generic_config = json.load(f)
55 | except Exception:
56 | generic_config = {}
57 | with (get_homedir() / 'config' / f'{file_name}.json.sample').open() as f:
58 | generic_config_sample = json.load(f)
59 |
60 | has_new_entry = False
61 | for key in generic_config_sample.keys():
62 | if key == '_notes':
63 | continue
64 | if generic_config.get(key) is None:
65 | print(f'{key} was missing in {file_name}, adding it.')
66 | print(f"Description: {generic_config_sample['_notes'][key]}")
67 | generic_config[key] = generic_config_sample[key]
68 | has_new_entry = True
69 | elif isinstance(generic_config[key], dict):
70 | for sub_key in generic_config_sample[key].keys():
71 | if sub_key not in generic_config[key]:
72 | print(f'{sub_key} was missing in {key} from {file_name}, adding it.')
73 | generic_config[key][sub_key] = generic_config_sample[key][sub_key]
74 | has_new_entry = True
75 | if has_new_entry:
76 | with (get_homedir() / 'config' / f'{file_name}.json').open('w') as fw:
77 | json.dump(generic_config, fw, indent=2, sort_keys=True)
78 | return has_new_entry
79 |
80 |
81 | if __name__ == '__main__':
82 | logger = logging.getLogger('Config validator')
83 | parser = argparse.ArgumentParser(description='Check the config files.')
84 | parser.add_argument('--check', default=False, action='store_true', help='Check if the sample config and the user config are in-line')
85 | parser.add_argument('--update', default=False, action='store_true', help='Update the user config with the entries from the sample config if entries are missing')
86 | args = parser.parse_args()
87 |
88 | if args.check:
89 | if validate_generic_config_file():
90 | print(f"The entries in {get_homedir() / 'config' / 'generic.json'} are valid.")
91 |
92 | if args.update:
93 | if not update_user_configs():
94 | print(f"No updates needed in {get_homedir() / 'config' / 'generic.json'}.")
95 |
--------------------------------------------------------------------------------
/website/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/__init__.py
--------------------------------------------------------------------------------
/website/readme.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
3 | Run:
4 |
5 | ```bash
6 | start_website.py
7 | ```
8 |
9 | Un debug mode:
10 |
11 | ```bash
12 | export FLASK_APP=${BGPRANKING_HOME}/website/web/__init__.py
13 | flask run -h 0.0.0.0 -p 5005
14 | ```
15 |
16 |
--------------------------------------------------------------------------------
/website/web/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import pkg_resources
5 |
6 | from collections import defaultdict
7 | from datetime import date, timedelta
8 | from typing import Dict, Any, Tuple, List, Optional
9 |
10 | from flask import Flask, render_template, request, session, redirect, url_for
11 | from flask_bootstrap import Bootstrap5 # type: ignore
12 | from flask_restx import Api # type: ignore
13 |
14 | from bgpranking.bgpranking import BGPRanking
15 | from bgpranking.helpers import get_ipasn
16 |
17 | from .genericapi import api as generic_api
18 | from .helpers import get_secret_key, load_session, get_country_codes
19 | from .proxied import ReverseProxied
20 |
21 | app = Flask(__name__)
22 |
23 | app.wsgi_app = ReverseProxied(app.wsgi_app) # type: ignore
24 |
25 | app.config['SECRET_KEY'] = get_secret_key()
26 |
27 | Bootstrap5(app)
28 | app.config['BOOTSTRAP_SERVE_LOCAL'] = True
29 |
30 | bgpranking = BGPRanking()
31 |
32 |
33 | # ############# Web UI #############
34 |
35 | @app.route('/', methods=['GET', 'POST'])
36 | def index():
37 | if request.method == 'HEAD':
38 | # Just returns ack if the webserver is running
39 | return 'Ack'
40 | load_session()
41 | sources = bgpranking.get_sources(date=session['date'])['response']
42 | session.pop('asn', None)
43 | session.pop('country', None)
44 | ranks = bgpranking.asns_global_ranking(limit=100, **session)['response']
45 | r = [(asn, rank, bgpranking.get_asn_descriptions(int(asn))['response']) for asn, rank in ranks]
46 | return render_template('index.html', ranks=r, sources=sources, countries=get_country_codes(), **session)
47 |
48 |
49 | @app.route('/asn', methods=['GET', 'POST'])
50 | def asn_details():
51 | load_session()
52 | if 'asn' not in session:
53 | return redirect(url_for('index'))
54 | asn_descriptions = bgpranking.get_asn_descriptions(asn=session['asn'], all_descriptions=True)['response']
55 | sources = bgpranking.get_sources(date=session['date'])['response']
56 | prefix = session.pop('prefix', None)
57 | ranks = bgpranking.asn_details(**session)['response']
58 | if prefix:
59 | prefix_ips = bgpranking.get_prefix_ips(prefix=prefix, **session)['response']
60 | prefix_ips = [(ip, sorted(sources)) for ip, sources in prefix_ips.items()]
61 | prefix_ips.sort(key=lambda entry: len(entry[1]), reverse=True)
62 | else:
63 | prefix_ips = []
64 | return render_template('asn.html', sources=sources, ranks=ranks,
65 | prefix_ips=prefix_ips, asn_descriptions=asn_descriptions, **session)
66 |
67 |
68 | @app.route('/country', methods=['GET', 'POST'])
69 | def country():
70 | load_session()
71 | sources = bgpranking.get_sources(date=session['date'])['response']
72 | return render_template('country.html', sources=sources, countries=get_country_codes(), **session)
73 |
74 |
75 | @app.route('/country_history_callback', methods=['GET', 'POST'])
76 | def country_history_callback():
77 | history_data: Dict[str, Tuple[str, str, List[Any]]]
78 | history_data = request.get_json(force=True)
79 | to_display = []
80 | mapping: Dict[str, Any] = defaultdict(dict)
81 | dates = []
82 | all_asns = set([])
83 | for country, foo in history_data.items():
84 | for d, r_sum, details in foo:
85 | dates.append(d)
86 | for detail in details:
87 | asn, r = detail
88 | all_asns.add(asn)
89 | mapping[asn][d] = r
90 |
91 | to_display_temp = [[country] + dates]
92 | for a in sorted(list(all_asns), key=int):
93 | line = [a]
94 | for d in dates:
95 | if mapping[a].get(d) is not None:
96 | line.append(round(mapping[a].get(d), 3))
97 | else:
98 | line.append('N/A')
99 | to_display_temp.append(line)
100 | to_display.append(to_display_temp)
101 | return render_template('country_asn_map.html', to_display=to_display)
102 |
103 |
104 | @app.route('/ipasn', methods=['GET', 'POST'])
105 | def ipasn():
106 | d: Optional[Dict] = None
107 | if request.method == 'POST':
108 | d = request.form
109 | elif request.method == 'GET':
110 | d = request.args
111 |
112 | if not d or 'ip' not in d:
113 | return render_template('ipasn.html')
114 | else:
115 | if isinstance(d['ip'], list):
116 | ip = d['ip'][0]
117 | else:
118 | ip = d['ip']
119 | ipasn = get_ipasn()
120 | response = ipasn.query(first=(date.today() - timedelta(days=60)).isoformat(),
121 | aggregate=True, ip=ip)
122 | for r in response['response']:
123 | r['asn_descriptions'] = []
124 | asn_descriptions = bgpranking.get_asn_descriptions(asn=r['asn'], all_descriptions=True)['response']
125 | for timestamp in sorted(asn_descriptions.keys()):
126 | if r['first_seen'] <= timestamp <= r['last_seen']:
127 | r['asn_descriptions'].append(asn_descriptions[timestamp])
128 |
129 | if not r['asn_descriptions'] and timestamp <= r['last_seen']:
130 | r['asn_descriptions'].append(asn_descriptions[timestamp])
131 |
132 | return render_template('ipasn.html', ipasn_details=response['response'],
133 | **response['meta'])
134 |
135 |
136 | # ############# Web UI #############
137 |
138 | # Query API
139 |
140 | api = Api(app, title='BGP Ranking API',
141 | description='API to query BGP Ranking.',
142 | doc='/doc/',
143 | version=pkg_resources.get_distribution('bgpranking').version)
144 |
145 | api.add_namespace(generic_api)
146 |
--------------------------------------------------------------------------------
/website/web/genericapi.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | from typing import Dict, Any, Union
5 | from urllib.parse import urljoin
6 |
7 | import requests
8 |
9 | from flask import request, session
10 | from flask_restx import Namespace, Resource, fields # type: ignore
11 |
12 | from bgpranking.default import get_config
13 | from bgpranking.bgpranking import BGPRanking
14 |
15 | from .helpers import load_session
16 |
17 | api = Namespace('BGP Ranking API', description='API to query BGP Ranking.', path='/')
18 |
19 | bgpranking: BGPRanking = BGPRanking()
20 |
21 |
22 | @api.route('/redis_up')
23 | @api.doc(description='Check if redis is up and running')
24 | class RedisUp(Resource):
25 |
26 | def get(self):
27 | return bgpranking.check_redis_up()
28 |
29 |
30 | @api.route('/ipasn_history/')
31 | @api.route('/ipasn_history/')
32 | class IPASNProxy(Resource):
33 |
34 | def _proxy_url(self):
35 | if request.full_path[-1] == '?':
36 | full_path = request.full_path[:-1]
37 | else:
38 | full_path = request.full_path
39 | path_for_ipasnhistory = full_path.replace('/ipasn_history/', '')
40 | if path_for_ipasnhistory.startswith('?'):
41 | path_for_ipasnhistory = path_for_ipasnhistory.replace('?', 'ip?')
42 | if not path_for_ipasnhistory:
43 | path_for_ipasnhistory = 'ip'
44 | return urljoin(get_config('generic', 'ipasnhistory_url'), path_for_ipasnhistory)
45 |
46 | def get(self, path=''):
47 | url = self._proxy_url()
48 | return requests.get(url).json()
49 |
50 | def post(self, path=''):
51 | url = self._proxy_url()
52 | return requests.post(url, data=request.data).json()
53 |
54 |
55 | # TODO: Add other parameters for asn_rank
56 | asn_query_fields = api.model('ASNQueryFields', {
57 | 'asn': fields.String(description='The Autonomus System Number to search', required=True)
58 | })
59 |
60 |
61 | @api.route('/json/asn')
62 | class ASNRank(Resource):
63 |
64 | @api.doc(body=asn_query_fields)
65 | def post(self):
66 | # TODO
67 | # * Filter on date => if only returning one descr, return the desription at that date
68 | query: Dict[str, Any] = request.get_json(force=True)
69 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
70 | if 'asn' not in query:
71 | to_return['error'] = f'You need to pass an asn - {query}'
72 | return to_return
73 |
74 | asn_description_query = {'asn': query['asn']}
75 | responses = bgpranking.get_asn_descriptions(**asn_description_query)['response']
76 | to_return['response']['asn_description'] = responses # type: ignore
77 |
78 | asn_rank_query = {'asn': query['asn']}
79 | if 'date' in query:
80 | asn_rank_query['date'] = query['date']
81 | if 'source' in query:
82 | asn_rank_query['source'] = query['source']
83 | else:
84 | asn_rank_query['with_position'] = True
85 | if 'ipversion' in query:
86 | asn_rank_query['ipversion'] = query['ipversion']
87 |
88 | to_return['response']['ranking'] = bgpranking.asn_rank(**asn_rank_query)['response'] # type: ignore
89 | return to_return
90 |
91 |
92 | asn_descr_fields = api.model('ASNDescriptionsFields', {
93 | 'asn': fields.String(description='The Autonomus System Number to search', required=True),
94 | 'all_descriptions': fields.Boolean(description='If true, returns all the descriptions instead of only the last one', default=False)
95 | })
96 |
97 |
98 | @api.route('/json/asn_descriptions')
99 | class ASNDescription(Resource):
100 |
101 | @api.doc(body=asn_descr_fields)
102 | def post(self):
103 | query: Dict = request.get_json(force=True)
104 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
105 | if 'asn' not in query:
106 | to_return['error'] = f'You need to pass an asn - {query}'
107 | return to_return
108 |
109 | to_return['response']['asn_descriptions'] = bgpranking.get_asn_descriptions(**query)['response'] # type: ignore
110 | return to_return
111 |
112 |
113 | # TODO: Add other parameters for get_asn_history
114 | asn_history_fields = api.model('ASNQueryFields', {
115 | 'asn': fields.String(description='The Autonomus System Number to search', required=True)
116 | })
117 |
118 |
119 | @api.route('/json/asn_history')
120 | class ASNHistory(Resource):
121 |
122 | def get(self):
123 | load_session()
124 | if 'asn' in session:
125 | return bgpranking.get_asn_history(**session)
126 |
127 | @api.doc(body=asn_history_fields)
128 | def post(self):
129 | query: Dict = request.get_json(force=True)
130 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
131 | if 'asn' not in query:
132 | to_return['error'] = f'You need to pass an asn - {query}'
133 | return to_return
134 |
135 | to_return['response']['asn_history'] = bgpranking.get_asn_history(**query)['response'] # type: ignore
136 | return to_return
137 |
138 |
139 | # TODO: Add other parameters for country_history
140 | coutry_history_fields = api.model('CountryHistoryFields', {
141 | 'country': fields.String(description='The Country Code', required=True)
142 | })
143 |
144 |
145 | @api.route('/json/country_history')
146 | class CountryHistory(Resource):
147 |
148 | def get(self):
149 | load_session()
150 | return bgpranking.country_history(**session)
151 |
152 | @api.doc(body=coutry_history_fields)
153 | def post(self):
154 | query: Dict = request.get_json(force=True)
155 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
156 | to_return['response']['country_history'] = bgpranking.country_history(**query)['response'] # type: ignore
157 | return to_return
158 |
159 |
160 | # TODO: Add other parameters for asns_global_ranking
161 | asns_global_ranking_fields = api.model('ASNsGlobalRankingFields', {
162 | 'date': fields.String(description='The date')
163 | })
164 |
165 |
166 | @api.route('/json/asns_global_ranking')
167 | class ASNsGlobalRanking(Resource):
168 |
169 | @api.doc(body=asns_global_ranking_fields)
170 | def post(self):
171 | query: Dict = request.get_json(force=True)
172 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
173 | to_return['response'] = bgpranking.asns_global_ranking(**query)['response']
174 | return to_return
175 |
--------------------------------------------------------------------------------
/website/web/helpers.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import os
5 |
6 | from datetime import date, timedelta
7 | from functools import lru_cache
8 | from pathlib import Path
9 |
10 | import pycountry
11 |
12 | from flask import request, session
13 |
14 | from bgpranking.default import get_homedir
15 |
16 |
17 | def src_request_ip(request) -> str:
18 | # NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
19 | real_ip = request.headers.get('X-Real-IP')
20 | if not real_ip:
21 | real_ip = request.remote_addr
22 | return real_ip
23 |
24 |
25 | @lru_cache(64)
26 | def get_secret_key() -> bytes:
27 | secret_file_path: Path = get_homedir() / 'secret_key'
28 | if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
29 | if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
30 | with secret_file_path.open('wb') as f:
31 | f.write(os.urandom(64))
32 | with secret_file_path.open('rb') as f:
33 | return f.read()
34 |
35 |
36 | def load_session():
37 | if request.method == 'POST':
38 | d = request.form
39 | elif request.method == 'GET':
40 | d = request.args # type: ignore
41 |
42 | for key in d:
43 | if '_all' in d.getlist(key):
44 | session.pop(key, None)
45 | else:
46 | values = [v for v in d.getlist(key) if v]
47 | if values:
48 | if len(values) == 1:
49 | session[key] = values[0]
50 | else:
51 | session[key] = values
52 |
53 | # Edge cases
54 | if 'asn' in session:
55 | session.pop('country', None)
56 | elif 'country' in session:
57 | session.pop('asn', None)
58 | if 'date' not in session:
59 | session['date'] = (date.today() - timedelta(days=1)).isoformat()
60 |
61 |
62 | def get_country_codes():
63 | for c in pycountry.countries:
64 | yield c.alpha_2, c.name
65 |
--------------------------------------------------------------------------------
/website/web/proxied.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | from typing import Any, MutableMapping
4 |
5 |
6 | class ReverseProxied():
7 | def __init__(self, app: Any) -> None:
8 | self.app = app
9 |
10 | def __call__(self, environ: MutableMapping[str, Any], start_response: Any) -> Any:
11 | scheme = environ.get('HTTP_X_FORWARDED_PROTO')
12 | if not scheme:
13 | scheme = environ.get('HTTP_X_SCHEME')
14 |
15 | if scheme:
16 | environ['wsgi.url_scheme'] = scheme
17 | return self.app(environ, start_response)
18 |
--------------------------------------------------------------------------------
/website/web/static/forkme_right_darkblue_121621.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/web/static/forkme_right_darkblue_121621.png
--------------------------------------------------------------------------------
/website/web/static/linegraph.css:
--------------------------------------------------------------------------------
1 |
2 | body { font: 12px Arial;}
3 |
4 | path {
5 | stroke: steelblue;
6 | stroke-width: 2;
7 | fill: none;
8 | }
9 |
10 | .axis path,
11 | .axis line {
12 | fill: none;
13 | stroke: grey;
14 | stroke-width: 1;
15 | shape-rendering: crispEdges;
16 | }
17 |
--------------------------------------------------------------------------------
/website/web/static/linegraph.js:
--------------------------------------------------------------------------------
1 | function linegraph(call_path) {
2 | var canvas = document.querySelector("canvas"),
3 | context = canvas.getContext("2d");
4 |
5 | // set the dimensions and margins of the graph
6 | var margin = {top: 20, right: 20, bottom: 30, left: 50},
7 | width = canvas.width - margin.left - margin.right,
8 | height = canvas.height - margin.top - margin.bottom;
9 |
10 | // parse the date / time
11 | var parseTime = d3.timeParse("%Y-%m-%d");
12 |
13 | // set the ranges
14 | var x = d3.scaleTime().range([0, width]);
15 | var y = d3.scaleLinear().range([height, 0]);
16 |
17 | // define the line
18 | var line = d3.line()
19 | .x(function(d) { return x(parseTime(d[0])); })
20 | .y(function(d) { return y(d[1]); })
21 | .curve(d3.curveStep)
22 | .context(context);
23 |
24 | context.translate(margin.left, margin.top);
25 |
26 | // Get the data
27 | d3.json(call_path, {credentials: 'same-origin'}).then(function(data) {
28 | x.domain(d3.extent(data.response, function(d) { return parseTime(d[0]); }));
29 | y.domain(d3.extent(data.response, function(d) { return d[1]; }));
30 |
31 | xAxis();
32 | yAxis();
33 |
34 | context.beginPath();
35 | line(data.response);
36 | context.lineWidth = 1.5;
37 | context.strokeStyle = "steelblue";
38 | context.stroke();
39 | });
40 |
41 | function xAxis() {
42 | var tickCount = 10,
43 | tickSize = .1,
44 | ticks = x.ticks(tickCount),
45 | tickFormat = x.tickFormat();
46 |
47 | context.beginPath();
48 | ticks.forEach(function(d) {
49 | context.moveTo(x(d), height);
50 | context.lineTo(x(d), height + tickSize);
51 | });
52 | context.strokeStyle = "black";
53 | context.stroke();
54 |
55 | context.textAlign = "center";
56 | context.textBaseline = "top";
57 | ticks.forEach(function(d) {
58 | context.fillText(tickFormat(d), x(d), height + tickSize);
59 | });
60 | }
61 |
62 | function yAxis() {
63 | var tickCount = 20,
64 | tickSize = 1,
65 | tickPadding = 1,
66 | ticks = y.ticks(tickCount),
67 | tickFormat = y.tickFormat(tickCount);
68 |
69 | context.beginPath();
70 | ticks.forEach(function(d) {
71 | context.moveTo(0, y(d));
72 | context.lineTo(-6, y(d));
73 | });
74 | context.strokeStyle = "black";
75 | context.stroke();
76 |
77 | context.beginPath();
78 | context.moveTo(-tickSize, 0);
79 | context.lineTo(0.5, 0);
80 | context.lineTo(0.5, height);
81 | context.lineTo(-tickSize, height);
82 | context.strokeStyle = "black";
83 | context.stroke();
84 |
85 | context.textAlign = "right";
86 | context.textBaseline = "middle";
87 | ticks.forEach(function(d) {
88 | context.fillText(tickFormat(d), -tickSize - tickPadding, y(d));
89 | });
90 |
91 | context.save();
92 | context.rotate(-Math.PI / 2);
93 | context.textAlign = "right";
94 | context.textBaseline = "top";
95 | context.font = "bold 10px sans-serif";
96 | context.fillText("Rank", -10, 10);
97 | context.restore();
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/website/web/static/linegraph_country.css:
--------------------------------------------------------------------------------
1 | .axis--x path {
2 | display: none;
3 | }
4 |
5 | .line {
6 | fill: none;
7 | stroke: steelblue;
8 | stroke-width: 1.5px;
9 | }
10 |
--------------------------------------------------------------------------------
/website/web/static/linegraph_country.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | function linegraph(call_path) {
4 | var svg = d3.select("svg"),
5 | margin = {top: 20, right: 80, bottom: 30, left: 50},
6 | width = svg.attr("width") - margin.left - margin.right,
7 | height = svg.attr("height") - margin.top - margin.bottom,
8 | g = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")");
9 |
10 | var parseTime = d3.timeParse("%Y-%m-%d");
11 |
12 | var x = d3.scaleTime().range([0, width]),
13 | y = d3.scaleLinear().range([height, 0]),
14 | z = d3.scaleOrdinal(d3.schemeCategory10);
15 |
16 | var line = d3.line()
17 | .curve(d3.curveLinear)
18 | .x(function(d) { return x(d.date); })
19 | .y(function(d) { return y(d.rank); });
20 |
21 | d3.json(call_path, {credentials: 'same-origin'}).then(data => {
22 | var country_ranks = $.map(data.response, function(value, key) {
23 | return {
24 | country: key,
25 | values: $.map(value, function(d) {
26 | return {date: parseTime(d[0]), rank: d[1]};
27 | })
28 | };
29 | });
30 |
31 | x.domain(d3.extent(country_ranks[0].values, function(d) { return d.date; }));
32 | y.domain([
33 | d3.min(country_ranks, function(c) { return d3.min(c.values, function(d) { return d.rank; }); }),
34 | d3.max(country_ranks, function(c) { return d3.max(c.values, function(d) { return d.rank; }); })
35 | ]);
36 |
37 | z.domain(country_ranks.map(function(c) { return c.country; }));
38 |
39 | g.append("g")
40 | .attr("class", "axis axis--x")
41 | .attr("transform", "translate(0," + height + ")")
42 | .call(d3.axisBottom(x));
43 |
44 | g.append("g")
45 | .attr("class", "axis axis--y")
46 | .call(d3.axisLeft(y))
47 | .append("text")
48 | .attr("transform", "rotate(-90)")
49 | .attr("y", 6)
50 | .attr("dy", "0.71em")
51 | .attr("fill", "#000")
52 | .text("Rank");
53 |
54 | var country = g.selectAll(".country")
55 | .data(country_ranks)
56 | .enter().append("g")
57 | .attr("class", "country");
58 |
59 | country.append("path")
60 | .attr("class", "line")
61 | .attr("d", function(d) { return line(d.values); })
62 | .style("stroke", function(d) { return z(d.country); });
63 |
64 | country.append("text")
65 | .datum(function(d) { return {id: d.country, value: d.values[d.values.length - 1]}; })
66 | .attr("transform", function(d) { return "translate(" + x(d.value.date) + "," + y(d.value.rank) + ")"; })
67 | .attr("x", 3)
68 | .attr("dy", "0.35em")
69 | .style("font", "10px sans-serif")
70 | .text(function(d) { return d.id; });
71 |
72 | d3.text('/country_history_callback',
73 | {credentials: 'same-origin',
74 | method: 'POST',
75 | body: JSON.stringify(data.response),
76 | })
77 | .then(function(data) {
78 | d3.select('#asn_details').html(data);
79 | });
80 | });
81 | };
82 |
--------------------------------------------------------------------------------
/website/web/templates/asn.html:
--------------------------------------------------------------------------------
1 | {% extends "main.html" %}
2 |
3 | {% block head %}
4 | {{ super() }}
5 | {% endblock %}
6 |
7 |
8 | {% block title %}
9 | Ranking - {{ asn }}
10 | {% endblock %}
11 |
12 | {% block scripts %}
13 | {{ super() }}
14 |
15 |
16 | {% endblock %}
17 |
18 | {% block content %}
19 |
20 |
Ranking - {{asn}}
21 |
26 |
27 | {% include ['top_forms.html'] %}
28 |
29 |
30 |
Timestamp
31 |
ASN Description
32 |
33 | {% for timestamp in asn_descriptions.keys()|sort %}
34 |