├── .gitignore ├── LICENSE ├── README.md ├── bgpranking ├── __init__.py ├── bgpranking.py ├── default │ ├── __init__.py │ ├── abstractmanager.py │ ├── exceptions.py │ └── helpers.py ├── helpers.py ├── parsers │ ├── __init__.py │ ├── abusech.py │ ├── abusech_feodo.py │ ├── abusech_threatfox.py │ ├── dshield.py │ ├── malc0de.py │ ├── nothink.py │ └── shadowserver.py └── statsripe.py ├── bin ├── __init__.py ├── archiver.py ├── asn_descriptions.py ├── dbinsert.py ├── fetcher.py ├── manual_ranking.py ├── parser.py ├── ranking.py ├── run_backend.py ├── sanitizer.py ├── shutdown.py ├── ssfetcher.py ├── start.py ├── start_website.py ├── stop.py └── update.py ├── cache ├── cache.conf ├── run_redis.sh └── shutdown_redis.sh ├── config ├── generic.json.sample ├── modules │ ├── Alienvault.json │ ├── BlocklistDeApache.json │ ├── BlocklistDeBots.json │ ├── BlocklistDeFTP.json │ ├── BlocklistDeIMAP.json │ ├── BlocklistDeMail.json │ ├── BlocklistDeSIP.json │ ├── BlocklistDeSSH.json │ ├── BlocklistDeStrong.json │ ├── CIArmy.json │ ├── CleanMXMalwares.json │ ├── CleanMXPhishing.json │ ├── CleanMXPortals.json │ ├── CoinBlockerLists.json │ ├── DshieldDaily.json │ ├── DshieldTopIPs.json │ ├── EmergingThreatsCompromized.json │ ├── FeodotrackerIPBlockList.json │ ├── Malc0de.json │ ├── MalwareDomainListIP.json │ ├── SSLBlacklist.json │ ├── ThreatFoxIOC.json │ ├── greensnow.json │ ├── jq_all_the_things.sh │ ├── module.schema │ ├── pop3gropers.json │ ├── shadowserver_only.sh │ └── validate_all.sh └── shadowserver.json.sample ├── poetry.lock ├── pyproject.toml ├── ranking ├── kvrocks.conf └── run_kvrocks.sh ├── setup.py ├── storage ├── kvrocks.conf └── run_kvrocks.sh ├── temp ├── intake.conf ├── prepare.conf ├── run_redis.sh └── shutdown_redis.sh ├── tools ├── 3rdparty.py ├── clear_prepare_db.py ├── migrate.py ├── monitoring.py └── validate_config_files.py └── website ├── __init__.py ├── readme.md └── web ├── __init__.py ├── genericapi.py ├── helpers.py ├── proxied.py ├── static ├── forkme_right_darkblue_121621.png ├── linegraph.css ├── linegraph.js ├── linegraph_country.css └── linegraph_country.js └── templates ├── asn.html ├── country.html ├── country_asn_map.html ├── index.html ├── ipasn.html ├── main.html └── top_forms.html /.gitignore: -------------------------------------------------------------------------------- 1 | # Local exclude 2 | scraped/ 3 | *.swp 4 | lookyloo/ete3_webserver/webapi.py 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # dotenv 88 | .env 89 | 90 | # virtualenv 91 | .venv 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | 109 | # web 110 | secret_key 111 | 112 | cache.pid 113 | *.rdb 114 | 115 | # Local config files 116 | config/*.json 117 | config/*.json.bkp 118 | 119 | rawdata 120 | 121 | storage/db/ 122 | storage/kvrocks* 123 | ranking/db/ 124 | ranking/kvrocks* 125 | website/web/static/d3.*.js 126 | website/web/static/bootstrap-select.min.* 127 | 128 | *.pid 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BGP Ranking 2 | 3 | For an Internet Service Provider, AS numbers are a logical representation of 4 | the other ISP peering or communicating with its autonomous system. ISP customers 5 | are using the capacity of the Internet Service Provider to reach Internet 6 | services over other AS. Some of those communications can be malicious (e.g. due 7 | to malware activities on an end-user equipments) and hosted at specific AS location. 8 | 9 | In order to provide an improved security view on those AS numbers, a trust ranking 10 | scheme is implemented based on existing dataset of compromised systems, 11 | malware C&C IP and existing datasets. BGP Ranking provides a way to collect 12 | such malicious activities, aggregate the information per ASN and provide a ranking 13 | model to rank the ASN from the most malicious to the less malicious ASN. 14 | 15 | The official website of the project is: [https://github.com/D4-project/bgp-ranking/](https://github.com/D4-project/bgp-ranking/) 16 | 17 | There is a public BGP Ranking at [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/) 18 | 19 | BGP Ranking is free software licensed under the GNU Affero General Public License 20 | 21 | BGP Ranking is a software to rank AS numbers based on their malicious activities. 22 | 23 | # Python client 24 | 25 | ```bash 26 | $ pip install git+https://github.com/D4-project/BGP-Ranking.git/#egg=pybgpranking\&subdirectory=client 27 | $ bgpranking --help 28 | usage: bgpranking [-h] [--url URL] (--asn ASN | --ip IP) 29 | 30 | Run a query against BGP Ranking 31 | 32 | optional arguments: 33 | -h, --help show this help message and exit 34 | --url URL URL of the instance. 35 | --asn ASN ASN to lookup 36 | --ip IP IP to lookup 37 | ``` 38 | 39 | ## History 40 | 41 | - The first version of BGP Ranking was done in 2010 by [Raphael Vinot](https://github.com/Rafiot) with the support of [Alexandre Dulaunoy](https://github.com/adulau/). 42 | CIRCL supported the project from the early beginning and setup an online version to share information about the malicious ranking of ISPs. 43 | 44 | - In late 2018 within the scope of the D4 Project (a CIRCL project co-funded by INEA under the CEF Telecom program), a new version of BGP Ranking was completed rewritten in python3.6+ with an ARDB back-end. 45 | 46 | - In January 2022, BGP Ranking version 2.0 was released including a new backend on [kvrocks](https://github.com/KvrocksLabs/kvrocks) and many improvements. 47 | 48 | # Online service 49 | 50 | BGP Ranking service is available online [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/). 51 | 52 | A Python library and client software is [available](https://github.com/D4-project/BGP-Ranking/tree/master/client) using the default API available from bgpranking.circl.lu. 53 | 54 | # CURL Example 55 | 56 | ## Get the ASN from an IP or a prefix 57 | ```bash 58 | curl https://bgpranking-ng.circl.lu/ipasn_history/?ip=143.255.153.0/24 59 | ``` 60 | 61 | ## Response 62 | 63 | ```json 64 | { 65 | "meta": { 66 | "address_family": "v4", 67 | "ip": "143.255.153.0/24", 68 | "source": "caida" 69 | }, 70 | "response": { 71 | "2019-05-19T12:00:00": { 72 | "asn": "264643", 73 | "prefix": "143.255.153.0/24" 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | ## Get the ranking of the AS 80 | ``` 81 | curl -X POST -d '{"asn": "5577", "date": "2019-05-19"}' https://bgpranking-ng.circl.lu/json/asn 82 | ``` 83 | 84 | Note: `date` isn't required. 85 | 86 | ### Response 87 | 88 | ```json 89 | { 90 | "meta": { 91 | "asn": "5577" 92 | }, 93 | "response": { 94 | "asn_description": "ROOT, LU", 95 | "ranking": { 96 | "rank": 0.0004720052083333333, 97 | "position": 7084, 98 | "total_known_asns": 15375 99 | } 100 | } 101 | } 102 | ``` 103 | 104 | ## Get historical information for an ASN 105 | 106 | ``` 107 | curl -X POST -d '{"asn": "5577", "period": 5}' https://bgpranking-ng.circl.lu/json/asn_history 108 | ``` 109 | 110 | ### Response 111 | 112 | ```json 113 | { 114 | "meta": { 115 | "asn": "5577", 116 | "period": 5 117 | }, 118 | "response": { 119 | "asn_history": [ 120 | [ 121 | "2019-11-10", 122 | 0.00036458333333333335 123 | ], 124 | [ 125 | "2019-11-11", 126 | 0.00036168981481481485 127 | ], 128 | [ 129 | "2019-11-12", 130 | 0.0003761574074074074 131 | ], 132 | [ 133 | "2019-11-13", 134 | 0.0003530092592592593 135 | ], 136 | [ 137 | "2019-11-14", 138 | 0.0003559027777777778 139 | ] 140 | ] 141 | } 142 | } 143 | ``` 144 | 145 | 146 | # Server Installation (if you want to run your own) 147 | 148 | **IMPORTANT**: Use [poetry](https://github.com/python-poetry/poetry#installation) 149 | 150 | **NOTE**: Yes, it requires python3.6+. No, it will never support anything older. 151 | 152 | ## Install redis 153 | 154 | ```bash 155 | git clone https://github.com/antirez/redis.git 156 | cd redis 157 | git checkout 5.0 158 | make 159 | make test 160 | cd .. 161 | ``` 162 | **Note**: If it fails, have a look at [the documentation](https://github.com/redis/redis#building-redis). 163 | 164 | ## Install kvrocks 165 | 166 | ```bash 167 | git clone https://github.com/KvrocksLabs/kvrocks.git 168 | cd kvrocks 169 | git checkout 2.5 170 | ./x.py build 171 | cd .. 172 | ``` 173 | **Note**: If it fails, have a look at [the documentation](https://github.com/apache/kvrocks#build-and-run-kvrocks). 174 | 175 | ## Install & run BGP Ranking 176 | 177 | ```bash 178 | git clone https://github.com/D4-project/BGP-Ranking.git 179 | cd BGP-Ranking 180 | poetry install 181 | echo BGPRANKING_HOME="'`pwd`'" > .env 182 | poetry shell 183 | # Starts all the backend 184 | start 185 | ``` 186 | 187 | ## Shutdown BGP Ranking 188 | 189 | ```bash 190 | stop 191 | ``` 192 | 193 | # Directory structure 194 | 195 | *Config files*: `bgpranking / config / *.json` 196 | 197 | *Per-module parsers*: `bgpraking / parsers` 198 | 199 | *Libraries* : `brpranking / libs` 200 | 201 | # Raw dataset directory structure 202 | 203 | ## Files to import 204 | 205 | *Note*: The default location of `` is the root directory of the repo. 206 | 207 | ` / / ` 208 | 209 | ## Last modified date (if possible) and lock file 210 | 211 | ` / / / meta` 212 | 213 | ## Imported files less than 2 months old 214 | 215 | ` / / / archive` 216 | 217 | ## Imported files more than 2 months old 218 | 219 | ` / / / archive / deep` 220 | 221 | # Databases 222 | 223 | ## Intake (redis, port 6579) 224 | 225 | *Usage*: All the modules push their entries in this database. 226 | 227 | Creates the following hashes: 228 | 229 | ```python 230 | UUID = {'ip': , 'source': , 'datetime': } 231 | ``` 232 | 233 | Creates a set `intake` for further processing containing all the UUIDs. 234 | 235 | 236 | ## Pre-Insert (redis, port 6580) 237 | 238 | 239 | *Usage*: Make sure th IPs are global, validate input from the intake module. 240 | 241 | Pop UUIDs from `intake`, get the hashes with that key 242 | 243 | Creates the following hashes: 244 | 245 | ```python 246 | UUID = {'ip': , 'source': , 'datetime': , 'date': } 247 | ``` 248 | 249 | Creates a set `to_insert` for further processing containing all the UUIDs. 250 | 251 | Creates a set `for_ris_lookup` to lookup on the RIS database. Contains all the IPs. 252 | 253 | ## Routing Information Service cache (redis, port 6581) 254 | 255 | *Usage*: Lookup IPs against the RIPE's RIS database 256 | 257 | Pop IPs from `for_ris_lookup`. 258 | 259 | Creates the following hashes: 260 | 261 | ```python 262 | IP = {'asn': , 'prefix': , 'description': } 263 | ``` 264 | 265 | ## Ranking Information cache (redis, port 6582) 266 | 267 | *Usage*: Store the current list of known ASNs at RIPE, and the prefixes originating from them. 268 | 269 | Creates the following sets: 270 | 271 | ```python 272 | asns = set([, ...]) 273 | |v4 = set([, ...]) 274 | |v6 = set([, ...]) 275 | ``` 276 | 277 | And the following keys: 278 | 279 | ```python 280 | |v4|ipcount = 281 | |v6|ipcount = 282 | ``` 283 | 284 | ## Long term storage (kvrocks, port 5188) 285 | 286 | *Usage*: Stores the IPs with the required meta informations required for ranking. 287 | 288 | Pop UUIDs from `to_insert`, get the hashes with that key 289 | 290 | Use the IP from that hash to get the RIS informations. 291 | 292 | Creates the following sets: 293 | 294 | ```python 295 | # All the sources, by day 296 | |sources = set([, ...]) 297 | # All the ASNs by source, by day 298 | | -> set([, ...]) 299 | # All the prefixes, by ASN, by source, by day 300 | || -> set([, ...]) 301 | # All the tuples (ip, datetime), by prefixes, by ASN, by source, by day 302 | ||| -> set([|, ...]) 303 | ``` 304 | -------------------------------------------------------------------------------- /bgpranking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/__init__.py -------------------------------------------------------------------------------- /bgpranking/bgpranking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import re 6 | 7 | from redis import ConnectionPool, Redis 8 | from redis.connection import UnixDomainSocketConnection 9 | 10 | from .default import get_config, get_socket_path 11 | 12 | from typing import TypeVar, Union, Optional, Dict, Any, List, Tuple 13 | import datetime 14 | from datetime import timedelta 15 | from dateutil.parser import parse 16 | from collections import defaultdict 17 | 18 | import json 19 | 20 | from .default import InvalidDateFormat 21 | from .helpers import get_modules 22 | from .statsripe import StatsRIPE 23 | 24 | Dates = TypeVar('Dates', datetime.datetime, datetime.date, str) 25 | 26 | 27 | class BGPRanking(): 28 | 29 | def __init__(self) -> None: 30 | self.logger = logging.getLogger(f'{self.__class__.__name__}') 31 | self.logger.setLevel(get_config('generic', 'loglevel')) 32 | 33 | self.cache_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection, 34 | path=get_socket_path('cache'), decode_responses=True) 35 | 36 | self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True) 37 | self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True) 38 | self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port')) 39 | 40 | @property 41 | def cache(self): 42 | return Redis(connection_pool=self.cache_pool, db=1) 43 | 44 | def check_redis_up(self) -> bool: 45 | return self.cache.ping() 46 | 47 | def __normalize_date(self, date: Optional[Dates]) -> str: 48 | if not date: 49 | return datetime.date.today().isoformat() 50 | if isinstance(date, datetime.datetime): 51 | return date.date().isoformat() 52 | elif isinstance(date, datetime.date): 53 | return date.isoformat() 54 | elif isinstance(date, str): 55 | try: 56 | return parse(date).date().isoformat() 57 | except ValueError: 58 | raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.') 59 | 60 | def _ranking_cache_wrapper(self, key): 61 | if not self.cache.exists(key): 62 | if self.ranking.exists(key): 63 | try: 64 | content: List[Tuple[bytes, float]] = self.ranking.zrangebyscore(key, '-Inf', '+Inf', withscores=True) 65 | # Cache for 10 hours 66 | self.cache.zadd(key, {value: rank for value, rank in content}) 67 | self.cache.expire(key, 36000) 68 | except Exception as e: 69 | self.logger.exception(f'Something went poorly when caching {key}.') 70 | raise e 71 | 72 | def asns_global_ranking(self, date: Optional[Dates]=None, source: Union[list, str]='', 73 | ipversion: str='v4', limit: int=100): 74 | '''Aggregated ranking of all the ASNs known in the system, weighted by source.''' 75 | to_return: Dict[str, Any] = { 76 | 'meta': {'ipversion': ipversion, 'limit': limit}, 77 | 'source': source, 78 | 'response': set() 79 | } 80 | d = self.__normalize_date(date) 81 | to_return['meta']['date'] = d 82 | if source: 83 | if isinstance(source, list): 84 | keys = [] 85 | for s in source: 86 | key = f'{d}|{s}|asns|{ipversion}' 87 | self._ranking_cache_wrapper(key) 88 | keys.append(key) 89 | # union the ranked sets 90 | key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}' 91 | if not self.cache.exists(key): 92 | self.cache.zunionstore(key, keys) 93 | else: 94 | key = f'{d}|{source}|asns|{ipversion}' 95 | else: 96 | key = f'{d}|asns|{ipversion}' 97 | self._ranking_cache_wrapper(key) 98 | to_return['response'] = self.cache.zrevrange(key, start=0, end=limit, withscores=True) 99 | return to_return 100 | 101 | def asn_details(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='', 102 | ipversion: str='v4'): 103 | '''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.''' 104 | to_return: Dict[str, Any] = { 105 | 'meta': {'asn': asn, 'ipversion': ipversion, 'source': source}, 106 | 'response': set() 107 | } 108 | 109 | d = self.__normalize_date(date) 110 | to_return['meta']['date'] = d 111 | if source: 112 | if isinstance(source, list): 113 | keys = [] 114 | for s in source: 115 | key = f'{d}|{s}|{asn}|{ipversion}|prefixes' 116 | self._ranking_cache_wrapper(key) 117 | keys.append(key) 118 | # union the ranked sets 119 | key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}' 120 | if not self.cache.exists(key): 121 | self.cache.zunionstore(key, keys) 122 | else: 123 | key = f'{d}|{source}|{asn}|{ipversion}|prefixes' 124 | else: 125 | key = f'{d}|{asn}|{ipversion}' 126 | self._ranking_cache_wrapper(key) 127 | to_return['response'] = self.cache.zrevrange(key, start=0, end=-1, withscores=True) 128 | return to_return 129 | 130 | def asn_rank(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='', 131 | ipversion: str='v4', with_position: bool=False): 132 | '''Get the rank of a single ASN, weighted by source.''' 133 | to_return: Dict[str, Any] = { 134 | 'meta': {'asn': asn, 'ipversion': ipversion, 135 | 'source': source, 'with_position': with_position}, 136 | 'response': 0.0 137 | } 138 | 139 | d = self.__normalize_date(date) 140 | to_return['meta']['date'] = d 141 | if source: 142 | to_return['meta']['source'] = source 143 | if isinstance(source, list): 144 | keys = [] 145 | for s in source: 146 | key = f'{d}|{s}|{asn}|{ipversion}' 147 | self._ranking_cache_wrapper(key) 148 | keys.append(key) 149 | r = sum(float(self.cache.get(key)) for key in keys if self.cache.exists(key)) 150 | else: 151 | key = f'{d}|{source}|{asn}|{ipversion}' 152 | self._ranking_cache_wrapper(key) 153 | r = self.cache.get(key) 154 | else: 155 | key = f'{d}|asns|{ipversion}' 156 | self._ranking_cache_wrapper(key) 157 | r = self.cache.zscore(key, asn) 158 | if not r: 159 | r = 0 160 | if with_position and not source: 161 | position = self.cache.zrevrank(key, asn) 162 | if position is not None: 163 | position += 1 164 | to_return['response'] = {'rank': float(r), 'position': position, 165 | 'total_known_asns': self.cache.zcard(key)} 166 | else: 167 | to_return['response'] = float(r) 168 | return to_return 169 | 170 | def get_sources(self, date: Optional[Dates]=None): 171 | '''Get the sources availables for a specific day (default: today).''' 172 | to_return: Dict[str, Any] = {'meta': {}, 'response': set()} 173 | 174 | d = self.__normalize_date(date) 175 | to_return['meta']['date'] = d 176 | key = f'{d}|sources' 177 | to_return['response'] = self.storage.smembers(key) 178 | return to_return 179 | 180 | def get_asn_descriptions(self, asn: int, all_descriptions=False) -> Dict[str, Any]: 181 | to_return: Dict[str, Union[Dict, List, str]] = { 182 | 'meta': {'asn': asn, 'all_descriptions': all_descriptions}, 183 | 'response': [] 184 | } 185 | descriptions = self.asn_meta.hgetall(f'{asn}|descriptions') 186 | if all_descriptions or not descriptions: 187 | to_return['response'] = descriptions 188 | else: 189 | to_return['response'] = descriptions[sorted(descriptions.keys(), reverse=True)[0]] 190 | return to_return 191 | 192 | def get_prefix_ips(self, asn: int, prefix: str, date: Optional[Dates]=None, 193 | source: Union[list, str]='', ipversion: str='v4'): 194 | to_return: Dict[str, Any] = { 195 | 'meta': {'asn': asn, 'prefix': prefix, 'ipversion': ipversion, 196 | 'source': source}, 197 | 'response': defaultdict(list) 198 | } 199 | 200 | d = self.__normalize_date(date) 201 | to_return['meta']['date'] = d 202 | 203 | if source: 204 | to_return['meta']['source'] = source 205 | if isinstance(source, list): 206 | sources = source 207 | else: 208 | sources = [source] 209 | else: 210 | sources = self.get_sources(d)['response'] 211 | 212 | for source in sources: 213 | ips = set([ip_ts.split('|')[0] 214 | for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')]) 215 | [to_return['response'][ip].append(source) for ip in ips] 216 | return to_return 217 | 218 | def get_asn_history(self, asn: int, period: int=100, source: Union[list, str]='', 219 | ipversion: str='v4', date: Optional[Dates]=None): 220 | to_return: Dict[str, Any] = { 221 | 'meta': {'asn': asn, 'period': period, 'ipversion': ipversion, 'source': source}, 222 | 'response': [] 223 | } 224 | 225 | if date is None: 226 | python_date: datetime.date = datetime.date.today() 227 | elif isinstance(date, str): 228 | python_date = parse(date).date() 229 | elif isinstance(date, datetime.datetime): 230 | python_date = date.date() 231 | else: 232 | python_date = date 233 | 234 | to_return['meta']['date'] = python_date.isoformat() 235 | 236 | for i in range(period): 237 | d = python_date - timedelta(days=i) 238 | rank = self.asn_rank(asn, d, source, ipversion) 239 | if 'response' not in rank: 240 | rank = 0 241 | to_return['response'].insert(0, (d.isoformat(), rank['response'])) 242 | return to_return 243 | 244 | def country_rank(self, country: str, date: Optional[Dates]=None, source: Union[list, str]='', 245 | ipversion: str='v4'): 246 | to_return: Dict[str, Any] = { 247 | 'meta': {'country': country, 'ipversion': ipversion, 'source': source}, 248 | 'response': [] 249 | } 250 | 251 | d = self.__normalize_date(date) 252 | to_return['meta']['date'] = d 253 | 254 | ripe = StatsRIPE() 255 | response = ripe.country_asns(country, query_time=d, details=1) 256 | if (not response.get('data') or not response['data'].get('countries') or not 257 | response['data']['countries'][0].get('routed')): 258 | logging.warning(f'Invalid response: {response}') 259 | return 0, [(0, 0)] 260 | routed_asns = re.findall(r"AsnSingle\(([\d]*)\)", response['data']['countries'][0]['routed']) 261 | ranks = [self.asn_rank(asn, d, source, ipversion)['response'] for asn in routed_asns] 262 | to_return['response'] = [sum(ranks), zip(routed_asns, ranks)] 263 | return to_return 264 | 265 | def country_history(self, country: Union[list, str], period: int=30, source: Union[list, str]='', 266 | ipversion: str='v4', date: Optional[Dates]=None): 267 | to_return: Dict[str, Any] = { 268 | 'meta': {'country': country, 'ipversion': ipversion, 'source': source}, 269 | 'response': defaultdict(list) 270 | } 271 | if date is None: 272 | python_date: datetime.date = datetime.date.today() 273 | elif isinstance(date, str): 274 | python_date = parse(date).date() 275 | elif isinstance(date, datetime.datetime): 276 | python_date = date.date() 277 | else: 278 | python_date = date 279 | 280 | if isinstance(country, str): 281 | country = [country] 282 | for c in country: 283 | for i in range(period): 284 | d = python_date - timedelta(days=i) 285 | rank, details = self.country_rank(c, d, source, ipversion)['response'] 286 | if rank is None: 287 | rank = 0 288 | to_return['response'][c].insert(0, (d.isoformat(), rank, list(details))) 289 | return to_return 290 | 291 | def get_source_config(self): 292 | pass 293 | 294 | def get_sources_configs(self): 295 | loaded = [] 296 | for modulepath in get_modules(): 297 | with open(modulepath) as f: 298 | loaded.append(json.load(f)) 299 | return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded} 300 | -------------------------------------------------------------------------------- /bgpranking/default/__init__.py: -------------------------------------------------------------------------------- 1 | env_global_name: str = 'BGPRANKING_HOME' 2 | 3 | from .exceptions import (BGPRankingException, FetcherException, ArchiveException, # noqa 4 | CreateDirectoryException, MissingEnv, InvalidDateFormat, # noqa 5 | MissingConfigFile, MissingConfigEntry, ThirdPartyUnreachable) # noqa 6 | 7 | # NOTE: the imports below are there to avoid too long paths when importing the 8 | # classes/methods in the rest of the project while keeping all that in a subdirectory 9 | # and allow to update them easily. 10 | # You should not have to change anything in this file below this line. 11 | 12 | from .abstractmanager import AbstractManager # noqa 13 | 14 | from .exceptions import MissingEnv, CreateDirectoryException, ConfigError # noqa 15 | 16 | from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file # noqa 17 | -------------------------------------------------------------------------------- /bgpranking/default/abstractmanager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import asyncio 5 | import logging 6 | import signal 7 | import time 8 | from abc import ABC 9 | from datetime import datetime, timedelta 10 | from subprocess import Popen 11 | from typing import List, Optional, Tuple 12 | 13 | from redis import Redis 14 | from redis.exceptions import ConnectionError 15 | 16 | from .helpers import get_socket_path 17 | 18 | 19 | class AbstractManager(ABC): 20 | 21 | script_name: str 22 | 23 | def __init__(self, loglevel: int=logging.DEBUG): 24 | self.loglevel = loglevel 25 | self.logger = logging.getLogger(f'{self.__class__.__name__}') 26 | self.logger.setLevel(loglevel) 27 | self.logger.info(f'Initializing {self.__class__.__name__}') 28 | self.process: Optional[Popen] = None 29 | self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) 30 | 31 | @staticmethod 32 | def is_running() -> List[Tuple[str, float]]: 33 | try: 34 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) 35 | return r.zrangebyscore('running', '-inf', '+inf', withscores=True) 36 | except ConnectionError: 37 | print('Unable to connect to redis, the system is down.') 38 | return [] 39 | 40 | @staticmethod 41 | def force_shutdown(): 42 | try: 43 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) 44 | r.set('shutdown', 1) 45 | except ConnectionError: 46 | print('Unable to connect to redis, the system is down.') 47 | 48 | def set_running(self) -> None: 49 | self.__redis.zincrby('running', 1, self.script_name) 50 | 51 | def unset_running(self) -> None: 52 | current_running = self.__redis.zincrby('running', -1, self.script_name) 53 | if int(current_running) <= 0: 54 | self.__redis.zrem('running', self.script_name) 55 | 56 | def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool: 57 | if shutdown_check > sleep_in_sec: 58 | shutdown_check = sleep_in_sec 59 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) 60 | while sleep_until > datetime.now(): 61 | time.sleep(shutdown_check) 62 | if self.shutdown_requested(): 63 | return False 64 | return True 65 | 66 | async def long_sleep_async(self, sleep_in_sec: int, shutdown_check: int=10) -> bool: 67 | if shutdown_check > sleep_in_sec: 68 | shutdown_check = sleep_in_sec 69 | sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) 70 | while sleep_until > datetime.now(): 71 | await asyncio.sleep(shutdown_check) 72 | if self.shutdown_requested(): 73 | return False 74 | return True 75 | 76 | def shutdown_requested(self) -> bool: 77 | try: 78 | return True if self.__redis.exists('shutdown') else False 79 | except ConnectionRefusedError: 80 | return True 81 | except ConnectionError: 82 | return True 83 | 84 | def _to_run_forever(self) -> None: 85 | pass 86 | 87 | def run(self, sleep_in_sec: int) -> None: 88 | self.logger.info(f'Launching {self.__class__.__name__}') 89 | try: 90 | while True: 91 | if self.shutdown_requested(): 92 | break 93 | try: 94 | if self.process: 95 | if self.process.poll() is not None: 96 | self.logger.critical(f'Unable to start {self.script_name}.') 97 | break 98 | else: 99 | self.set_running() 100 | self._to_run_forever() 101 | except Exception: 102 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') 103 | finally: 104 | if not self.process: 105 | # self.process means we run an external script, all the time, 106 | # do not unset between sleep. 107 | self.unset_running() 108 | if not self.long_sleep(sleep_in_sec): 109 | break 110 | except KeyboardInterrupt: 111 | self.logger.warning(f'{self.script_name} killed by user.') 112 | finally: 113 | if self.process: 114 | try: 115 | # Killing everything if possible. 116 | self.process.send_signal(signal.SIGWINCH) 117 | self.process.send_signal(signal.SIGTERM) 118 | except Exception: 119 | pass 120 | try: 121 | self.unset_running() 122 | except Exception: 123 | # the services can already be down at that point. 124 | pass 125 | self.logger.info(f'Shutting down {self.__class__.__name__}') 126 | 127 | async def _to_run_forever_async(self) -> None: 128 | pass 129 | 130 | async def run_async(self, sleep_in_sec: int) -> None: 131 | self.logger.info(f'Launching {self.__class__.__name__}') 132 | try: 133 | while True: 134 | if self.shutdown_requested(): 135 | break 136 | try: 137 | if self.process: 138 | if self.process.poll() is not None: 139 | self.logger.critical(f'Unable to start {self.script_name}.') 140 | break 141 | else: 142 | self.set_running() 143 | await self._to_run_forever_async() 144 | except Exception: 145 | self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') 146 | finally: 147 | if not self.process: 148 | # self.process means we run an external script, all the time, 149 | # do not unset between sleep. 150 | self.unset_running() 151 | if not await self.long_sleep_async(sleep_in_sec): 152 | break 153 | except KeyboardInterrupt: 154 | self.logger.warning(f'{self.script_name} killed by user.') 155 | finally: 156 | if self.process: 157 | try: 158 | # Killing everything if possible. 159 | self.process.send_signal(signal.SIGWINCH) 160 | self.process.send_signal(signal.SIGTERM) 161 | except Exception: 162 | pass 163 | try: 164 | self.unset_running() 165 | except Exception: 166 | # the services can already be down at that point. 167 | pass 168 | self.logger.info(f'Shutting down {self.__class__.__name__}') 169 | -------------------------------------------------------------------------------- /bgpranking/default/exceptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | class BGPRankingException(Exception): 6 | pass 7 | 8 | 9 | class FetcherException(BGPRankingException): 10 | pass 11 | 12 | 13 | class ArchiveException(BGPRankingException): 14 | pass 15 | 16 | 17 | class CreateDirectoryException(BGPRankingException): 18 | pass 19 | 20 | 21 | class MissingEnv(BGPRankingException): 22 | pass 23 | 24 | 25 | class InvalidDateFormat(BGPRankingException): 26 | pass 27 | 28 | 29 | class MissingConfigFile(BGPRankingException): 30 | pass 31 | 32 | 33 | class MissingConfigEntry(BGPRankingException): 34 | pass 35 | 36 | 37 | class ThirdPartyUnreachable(BGPRankingException): 38 | pass 39 | 40 | 41 | class ConfigError(BGPRankingException): 42 | pass 43 | -------------------------------------------------------------------------------- /bgpranking/default/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import json 4 | import logging 5 | import os 6 | from functools import lru_cache 7 | from pathlib import Path 8 | from typing import Any, Dict, Optional, Union 9 | 10 | from . import env_global_name 11 | from .exceptions import ConfigError, CreateDirectoryException, MissingEnv 12 | 13 | configs: Dict[str, Dict[str, Any]] = {} 14 | logger = logging.getLogger('Helpers') 15 | 16 | 17 | @lru_cache(64) 18 | def get_homedir() -> Path: 19 | if not os.environ.get(env_global_name): 20 | # Try to open a .env file in the home directory if it exists. 21 | if (Path(__file__).resolve().parent.parent.parent / '.env').exists(): 22 | with (Path(__file__).resolve().parent.parent.parent / '.env').open() as f: 23 | for line in f: 24 | key, value = line.strip().split('=', 1) 25 | if value[0] in ['"', "'"]: 26 | value = value[1:-1] 27 | os.environ[key] = value 28 | 29 | if not os.environ.get(env_global_name): 30 | guessed_home = Path(__file__).resolve().parent.parent.parent 31 | raise MissingEnv(f"{env_global_name} is missing. \ 32 | Run the following command (assuming you run the code from the clonned repository):\ 33 | export {env_global_name}='{guessed_home}'") 34 | return Path(os.environ[env_global_name]) 35 | 36 | 37 | @lru_cache(64) 38 | def load_configs(path_to_config_files: Optional[Union[str, Path]]=None): 39 | global configs 40 | if configs: 41 | return 42 | if path_to_config_files: 43 | if isinstance(path_to_config_files, str): 44 | config_path = Path(path_to_config_files) 45 | else: 46 | config_path = path_to_config_files 47 | else: 48 | config_path = get_homedir() / 'config' 49 | if not config_path.exists(): 50 | raise ConfigError(f'Configuration directory {config_path} does not exists.') 51 | elif not config_path.is_dir(): 52 | raise ConfigError(f'Configuration directory {config_path} is not a directory.') 53 | 54 | configs = {} 55 | for path in config_path.glob('*.json'): 56 | with path.open() as _c: 57 | configs[path.stem] = json.load(_c) 58 | 59 | 60 | @lru_cache(64) 61 | def get_config(config_type: str, entry: str, quiet: bool=False) -> Any: 62 | """Get an entry from the given config_type file. Automatic fallback to the sample file""" 63 | global configs 64 | if not configs: 65 | load_configs() 66 | if config_type in configs: 67 | if entry in configs[config_type]: 68 | return configs[config_type][entry] 69 | else: 70 | if not quiet: 71 | logger.warning(f'Unable to find {entry} in config file.') 72 | else: 73 | if not quiet: 74 | logger.warning(f'No {config_type} config file available.') 75 | if not quiet: 76 | logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.') 77 | with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c: 78 | sample_config = json.load(_c) 79 | return sample_config[entry] 80 | 81 | 82 | def safe_create_dir(to_create: Path) -> None: 83 | if to_create.exists() and not to_create.is_dir(): 84 | raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory') 85 | to_create.mkdir(parents=True, exist_ok=True) 86 | 87 | 88 | def get_socket_path(name: str) -> str: 89 | mapping = { 90 | 'cache': Path('cache', 'cache.sock'), 91 | 'intake': Path('temp', 'intake.sock'), 92 | 'prepare': Path('temp', 'prepare.sock') 93 | } 94 | return str(get_homedir() / mapping[name]) 95 | 96 | 97 | def try_make_file(filename: Path): 98 | try: 99 | filename.touch(exist_ok=False) 100 | return True 101 | except FileExistsError: 102 | return False 103 | -------------------------------------------------------------------------------- /bgpranking/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | from functools import lru_cache 6 | from pathlib import Path 7 | from typing import Dict, List 8 | 9 | import requests 10 | 11 | from pyipasnhistory import IPASNHistory 12 | 13 | from .default import get_homedir, get_config, ThirdPartyUnreachable, safe_create_dir 14 | 15 | 16 | @lru_cache(64) 17 | def get_data_dir() -> Path: 18 | capture_dir = get_homedir() / 'rawdata' 19 | safe_create_dir(capture_dir) 20 | return capture_dir 21 | 22 | 23 | @lru_cache(64) 24 | def get_modules_dir() -> Path: 25 | modules_dir = get_homedir() / 'config' / 'modules' 26 | safe_create_dir(modules_dir) 27 | return modules_dir 28 | 29 | 30 | @lru_cache(64) 31 | def get_modules() -> List[Path]: 32 | return [modulepath for modulepath in get_modules_dir().glob('*.json')] 33 | 34 | 35 | def load_all_modules_configs() -> Dict[str, Dict]: 36 | configs = {} 37 | for p in get_modules(): 38 | with p.open() as f: 39 | j = json.load(f) 40 | configs[f"{j['vendor']}-{j['name']}"] = j 41 | return configs 42 | 43 | 44 | def get_ipasn(): 45 | ipasnhistory_url = get_config('generic', 'ipasnhistory_url') 46 | ipasn = IPASNHistory(ipasnhistory_url) 47 | if not ipasn.is_up: 48 | raise ThirdPartyUnreachable(f"Unable to reach IPASNHistory on {ipasnhistory_url}") 49 | return ipasn 50 | 51 | 52 | def sanity_check_ipasn(ipasn): 53 | try: 54 | meta = ipasn.meta() 55 | except requests.exceptions.ConnectionError: 56 | return False, "IP ASN History is not reachable, try again later." 57 | 58 | if 'error' in meta: 59 | raise ThirdPartyUnreachable(f'IP ASN History has a problem: {meta["error"]}') 60 | 61 | v4_percent = meta['cached_dates']['caida']['v4']['percent'] 62 | v6_percent = meta['cached_dates']['caida']['v6']['percent'] 63 | if v4_percent < 90 or v6_percent < 90: # (this way it works if we only load 10 days) 64 | # Try again later. 65 | return False, f"IP ASN History is not ready: v4 {v4_percent}% / v6 {v6_percent}% loaded" 66 | return True, f"IP ASN History is ready: v4 {v4_percent}% / v6 {v6_percent}% loaded" 67 | -------------------------------------------------------------------------------- /bgpranking/parsers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/parsers/__init__.py -------------------------------------------------------------------------------- /bgpranking/parsers/abusech.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | import re 6 | from io import BytesIO 7 | 8 | from typing import List 9 | 10 | 11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]: 12 | self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0]) 13 | return self.extract_ipv4(f.getvalue()) 14 | -------------------------------------------------------------------------------- /bgpranking/parsers/abusech_feodo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | import re 6 | from io import BytesIO 7 | 8 | from typing import List 9 | 10 | 11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]: 12 | self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0]) 13 | return self.extract_ipv4(f.getvalue()) 14 | -------------------------------------------------------------------------------- /bgpranking/parsers/abusech_threatfox.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | 6 | from datetime import datetime 7 | from io import BytesIO 8 | from typing import List 9 | 10 | 11 | def parse_raw_file(self, f: BytesIO) -> List[str]: 12 | self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) 13 | to_return = [] 14 | for entry in json.loads(f.getvalue().decode()).values(): 15 | ip_port = entry[0]['ioc_value'] 16 | to_return.append(ip_port.split(':')[0]) 17 | return to_return 18 | -------------------------------------------------------------------------------- /bgpranking/parsers/dshield.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | import re 6 | from io import BytesIO 7 | from typing import List 8 | 9 | 10 | def parse_raw_file(self, f: BytesIO) -> List[bytes]: 11 | self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0]) 12 | iplist = self.extract_ipv4(f.getvalue()) 13 | # The IPS have leading 0s. Getting tid of them directly here. 14 | return self.strip_leading_zeros(iplist) 15 | -------------------------------------------------------------------------------- /bgpranking/parsers/malc0de.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | import re 6 | from io import BytesIO 7 | 8 | from typing import List 9 | 10 | 11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]: 12 | self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0]) 13 | return self.extract_ipv4(f.getvalue()) 14 | -------------------------------------------------------------------------------- /bgpranking/parsers/nothink.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | import re 6 | from io import BytesIO 7 | 8 | from typing import List 9 | 10 | def parse_raw_file(self, f: BytesIO) -> List[bytes]: 11 | self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0]) 12 | return self.extract_ipv4(f.getvalue()) 13 | -------------------------------------------------------------------------------- /bgpranking/parsers/shadowserver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from dateutil.parser import parse 5 | from csv import DictReader 6 | from io import BytesIO, StringIO 7 | from typing import Tuple, Generator 8 | from datetime import datetime 9 | 10 | 11 | def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]: 12 | default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) 13 | reader = DictReader(StringIO(f.getvalue().decode())) 14 | for row in reader: 15 | if 'timestamp' in row: 16 | ts = parse(row['timestamp']) 17 | else: 18 | ts = default_ts 19 | 20 | if 'ip' in row: 21 | ip = row['ip'] 22 | elif 'src_ip' in row: 23 | # For sinkhole6_http 24 | ip = row['src_ip'] 25 | else: 26 | self.logger.critical(f'No IPs in the list {self.source}.') 27 | break 28 | yield ip, ts 29 | -------------------------------------------------------------------------------- /bgpranking/statsripe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import copy 5 | import json 6 | 7 | from datetime import datetime, timedelta 8 | from enum import Enum 9 | from ipaddress import IPv4Address, IPv6Address, IPv4Network, IPv6Network 10 | from typing import TypeVar, Optional, Dict, Any 11 | 12 | import requests 13 | 14 | from dateutil.parser import parse 15 | 16 | from .helpers import get_homedir, safe_create_dir 17 | 18 | IPTypes = TypeVar('IPTypes', IPv4Address, IPv6Address, 'str') 19 | PrefixTypes = TypeVar('PrefixTypes', IPv4Network, IPv6Network, 'str') 20 | TimeTypes = TypeVar('TimeTypes', datetime, 'str') 21 | 22 | 23 | class ASNsTypes(Enum): 24 | transiting = 't' 25 | originating = 'o' 26 | all_types = 't,o' 27 | undefined = '' 28 | 29 | 30 | class AddressFamilies(Enum): 31 | ipv4 = 'v4' 32 | ipv6 = 'v6' 33 | all_families = 'v4,v6' 34 | undefined = '' 35 | 36 | 37 | class Noise(Enum): 38 | keep = 'keep' 39 | remove = 'filter' 40 | 41 | 42 | class StatsRIPE(): 43 | 44 | def __init__(self, sourceapp='bgpranking-ng - CIRCL'): 45 | self.url = "https://stat.ripe.net/data/{method}/data.json?{parameters}" 46 | self.sourceapp = sourceapp 47 | self.cache_dir = get_homedir() / 'rawdata' / 'stats_ripe' 48 | 49 | def __time_to_text(self, query_time: TimeTypes) -> str: 50 | if isinstance(query_time, datetime): 51 | return query_time.isoformat() 52 | return query_time 53 | 54 | def _get_cache(self, method, parameters): 55 | '''The dataset is updated every 8 hours (midnight, 8, 16). 56 | If parameters has a key 'query_time' on any of these hours, try to get it. 57 | If not, try to get the closest one. 58 | If it has nothing, assume non and try to get the closest timestamp 59 | When caching, get query_time from response['data']['query_time'] 60 | ''' 61 | parameters = copy.copy(parameters) 62 | if not parameters.get('query_time'): 63 | # use timedelta because the generation of the new dataset takes a while. 64 | parameters['query_time'] = (datetime.now() - timedelta(hours=8)).isoformat() 65 | 66 | d = parse(parameters['query_time']) 67 | if d.hour == 8 and d.minute == 0 and d.second == 0: 68 | pass 69 | else: 70 | d = d.replace(hour=min([0, 8, 16], key=lambda x: abs(x - d.hour)), 71 | minute=0, second=0, microsecond=0) 72 | parameters['query_time'] = d.isoformat() 73 | cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()]) 74 | c_path = self.cache_dir / method / cache_filename 75 | if c_path.exists(): 76 | with open(c_path, 'r') as f: 77 | return json.load(f) 78 | return False 79 | 80 | def _save_cache(self, method, parameters, response): 81 | parameters['query_time'] = response['data']['query_time'] 82 | cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()]) 83 | safe_create_dir(self.cache_dir / method) 84 | c_path = self.cache_dir / method / cache_filename 85 | with open(c_path, 'w') as f: 86 | json.dump(response, f, indent=2) 87 | 88 | def _get(self, method: str, parameters: Dict) -> Dict: 89 | parameters['sourceapp'] = self.sourceapp 90 | cached = self._get_cache(method, parameters) 91 | if cached: 92 | return cached 93 | url = self.url.format(method=method, parameters='&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()])) 94 | response = requests.get(url) 95 | j_content = response.json() 96 | self._save_cache(method, parameters, j_content) 97 | return j_content 98 | 99 | def network_info(self, ip: IPTypes) -> dict: 100 | parameters = {'resource': ip} 101 | return self._get('network-info', parameters) 102 | 103 | def prefix_overview(self, prefix: PrefixTypes, min_peers_seeing: int= 0, 104 | max_related: int=0, query_time: Optional[TimeTypes]=None) -> dict: 105 | parameters: Dict[str, Any] = {'resource': prefix} 106 | if min_peers_seeing: 107 | parameters['min_peers_seeing'] = min_peers_seeing 108 | if max_related: 109 | parameters['max_related'] = max_related 110 | if query_time: 111 | parameters['query_time'] = self.__time_to_text(query_time) 112 | return self._get('prefix-overview', parameters) 113 | 114 | def ris_asns(self, query_time: Optional[TimeTypes]=None, list_asns: bool=False, asn_types: ASNsTypes=ASNsTypes.undefined): 115 | parameters: Dict[str, Any] = {} 116 | if list_asns: 117 | parameters['list_asns'] = list_asns 118 | if asn_types: 119 | parameters['asn_types'] = asn_types.value 120 | if query_time: 121 | parameters['query_time'] = self.__time_to_text(query_time) 122 | return self._get('ris-asns', parameters) 123 | 124 | def ris_prefixes(self, asn: int, query_time: Optional[TimeTypes]=None, 125 | list_prefixes: bool=False, types: ASNsTypes=ASNsTypes.undefined, 126 | af: AddressFamilies=AddressFamilies.undefined, noise: Noise=Noise.keep): 127 | parameters: Dict[str, Any] = {'resource': str(asn)} 128 | if query_time: 129 | parameters['query_time'] = self.__time_to_text(query_time) 130 | if list_prefixes: 131 | parameters['list_prefixes'] = list_prefixes 132 | if types: 133 | parameters['types'] = types.value 134 | if af: 135 | parameters['af'] = af.value 136 | if noise: 137 | parameters['noise'] = noise.value 138 | return self._get('ris-prefixes', parameters) 139 | 140 | def country_asns(self, country: str, details: int=0, query_time: Optional[TimeTypes]=None): 141 | parameters: Dict[str, Any] = {'resource': country} 142 | if details: 143 | parameters['lod'] = details 144 | if query_time: 145 | parameters['query_time'] = self.__time_to_text(query_time) 146 | return self._get('country-asns', parameters) 147 | -------------------------------------------------------------------------------- /bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bin/__init__.py -------------------------------------------------------------------------------- /bin/archiver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | import zipfile 7 | 8 | from collections import defaultdict 9 | from datetime import date 10 | from logging import Logger 11 | from pathlib import Path 12 | 13 | from dateutil import parser 14 | from dateutil.relativedelta import relativedelta 15 | 16 | from bgpranking.default import safe_create_dir, AbstractManager 17 | from bgpranking.helpers import get_modules, get_data_dir 18 | 19 | 20 | logger = logging.getLogger('Archiver') 21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 22 | level=logging.INFO) 23 | 24 | 25 | class DeepArchive(): 26 | 27 | def __init__(self, config_file: Path, logger: Logger): 28 | '''Archive everyfile older than 2 month.''' 29 | with config_file.open() as f: 30 | module_parameters = json.load(f) 31 | self.logger = logger 32 | self.vendor = module_parameters['vendor'] 33 | self.listname = module_parameters['name'] 34 | self.directory = get_data_dir() / self.vendor / self.listname / 'archive' 35 | safe_create_dir(self.directory) 36 | self.deep_archive = self.directory / 'deep' 37 | safe_create_dir(self.deep_archive) 38 | 39 | def archive(self): 40 | to_archive = defaultdict(list) 41 | today = date.today() 42 | last_day_to_keep = date(today.year, today.month, 1) - relativedelta(months=2) 43 | for p in self.directory.iterdir(): 44 | if not p.is_file(): 45 | continue 46 | filedate = parser.parse(p.name.split('.')[0]).date() 47 | if filedate >= last_day_to_keep: 48 | continue 49 | to_archive['{}.zip'.format(filedate.strftime('%Y%m'))].append(p) 50 | if to_archive: 51 | self.logger.info('Found old files. Archiving: {}'.format(', '.join(to_archive.keys()))) 52 | else: 53 | self.logger.debug('No old files.') 54 | for archivename, path_list in to_archive.items(): 55 | with zipfile.ZipFile(self.deep_archive / archivename, 'x', zipfile.ZIP_DEFLATED) as z: 56 | for f in path_list: 57 | z.write(f, f.name) 58 | # Delete all the files if the archiving worked out properly 59 | [f.unlink() for f in path_list] 60 | 61 | 62 | class ModulesArchiver(AbstractManager): 63 | 64 | def __init__(self, loglevel: int=logging.INFO): 65 | super().__init__(loglevel) 66 | self.script_name = 'archiver' 67 | self.modules = [DeepArchive(path, self.logger) for path in get_modules()] 68 | 69 | def _to_run_forever(self): 70 | [module.archive() for module in self.modules] 71 | 72 | 73 | def main(): 74 | archiver = ModulesArchiver() 75 | archiver.run(sleep_in_sec=360000) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /bin/asn_descriptions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import re 6 | import requests 7 | 8 | from dateutil.parser import parse 9 | from redis import Redis 10 | 11 | from bgpranking.default import get_socket_path, safe_create_dir, AbstractManager, get_config 12 | from bgpranking.helpers import get_data_dir 13 | 14 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 15 | level=logging.INFO) 16 | 17 | 18 | class ASNDescriptions(AbstractManager): 19 | 20 | def __init__(self, loglevel: int=logging.INFO): 21 | super().__init__(loglevel) 22 | self.script_name = 'asn_descr' 23 | self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), db=2, decode_responses=True) 24 | self.logger.debug('Starting ASN History') 25 | self.directory = get_data_dir() / 'asn_descriptions' 26 | safe_create_dir(self.directory) 27 | self.archives = self.directory / 'archive' 28 | safe_create_dir(self.archives) 29 | self.url = 'https://www.cidr-report.org/as2.0/autnums.html' 30 | 31 | def __update_available(self): 32 | r = requests.head(self.url) 33 | print(r.headers) 34 | current_last_modified = parse(r.headers['Last-Modified']) 35 | if not self.asn_meta.exists('ans_description_last_update'): 36 | return True 37 | last_update = parse(self.asn_meta.get('ans_description_last_update')) # type: ignore 38 | if last_update < current_last_modified: 39 | return True 40 | return False 41 | 42 | def load_descriptions(self): 43 | if not self.__update_available(): 44 | self.logger.debug('No new file to import.') 45 | return 46 | self.logger.info('Importing new ASN descriptions.') 47 | r = requests.get(self.url) 48 | last_modified = parse(r.headers['Last-Modified']).isoformat() 49 | p = self.asn_meta.pipeline() 50 | new_asn = 0 51 | new_description = 0 52 | for asn, descr in re.findall('as=AS(.*)&.* (.*)\n', r.text): 53 | existing_descriptions = self.asn_meta.hgetall(f'{asn}|descriptions') 54 | if not existing_descriptions: 55 | self.logger.debug(f'New ASN: {asn} - {descr}') 56 | p.hset(f'{asn}|descriptions', last_modified, descr) 57 | new_asn += 1 58 | else: 59 | last_descr = sorted(existing_descriptions.keys(), reverse=True)[0] 60 | if descr != existing_descriptions[last_descr]: 61 | self.logger.debug(f'New description for {asn}: {existing_descriptions[last_descr]} -> {descr}') 62 | p.hset(f'{asn}|descriptions', last_modified, descr) 63 | new_description += 1 64 | p.set('ans_description_last_update', last_modified) 65 | p.execute() 66 | self.logger.info(f'Done with import. New ASNs: {new_asn}, new descriptions: {new_description}') 67 | if new_asn or new_description: 68 | with open(self.archives / f'{last_modified}.html', 'w') as f: 69 | f.write(r.text) 70 | 71 | def _to_run_forever(self): 72 | self.load_descriptions() 73 | 74 | 75 | def main(): 76 | asnd_manager = ASNDescriptions() 77 | asnd_manager.run(sleep_in_sec=3600) 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | -------------------------------------------------------------------------------- /bin/dbinsert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | import time 6 | 7 | from typing import List 8 | 9 | from redis import Redis 10 | 11 | from bgpranking.default import get_socket_path, AbstractManager, get_config 12 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn 13 | 14 | 15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 16 | level=logging.INFO) 17 | 18 | 19 | class DBInsertManager(AbstractManager): 20 | 21 | def __init__(self, loglevel: int=logging.INFO): 22 | super().__init__(loglevel) 23 | self.script_name = 'db_insert' 24 | self.kvrocks_storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True) 25 | self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) 26 | self.ipasn = get_ipasn() 27 | self.logger.debug('Starting import') 28 | 29 | def _to_run_forever(self): 30 | ready, message = sanity_check_ipasn(self.ipasn) 31 | if not ready: 32 | # Try again later. 33 | self.logger.warning(message) 34 | return 35 | self.logger.debug(message) 36 | 37 | while True: 38 | if self.shutdown_requested(): 39 | break 40 | try: 41 | if not self.ipasn.is_up: 42 | break 43 | except Exception: 44 | self.logger.warning('Unable to query ipasnhistory') 45 | time.sleep(10) 46 | continue 47 | uuids: List[str] = self.redis_sanitized.spop('to_insert', 100) # type: ignore 48 | if not uuids: 49 | break 50 | p = self.redis_sanitized.pipeline(transaction=False) 51 | [p.hgetall(uuid) for uuid in uuids] 52 | sanitized_data = p.execute() 53 | 54 | for_query = [] 55 | for i, uuid in enumerate(uuids): 56 | data = sanitized_data[i] 57 | if not data: 58 | self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.') 59 | continue 60 | for_query.append({'ip': data['ip'], 'address_family': data['address_family'], 61 | 'date': data['datetime'], 'precision_delta': {'days': 3}}) 62 | try: 63 | responses = self.ipasn.mass_query(for_query) 64 | except Exception: 65 | self.logger.exception('Mass query in IPASN History failed, trying again later.') 66 | # Rollback the spop 67 | self.redis_sanitized.sadd('to_insert', *uuids) 68 | time.sleep(10) 69 | continue 70 | retry = [] 71 | done = [] 72 | ardb_pipeline = self.kvrocks_storage.pipeline(transaction=False) 73 | for i, uuid in enumerate(uuids): 74 | data = sanitized_data[i] 75 | if not data: 76 | self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.') 77 | done.append(uuid) 78 | continue 79 | routing_info = responses['responses'][i]['response'] # our queries are on one single date, not a range 80 | # Data gathered from IPASN History: 81 | # * IP Block of the IP 82 | # * AS number 83 | if not routing_info: 84 | self.logger.warning(f"No response for {responses['responses'][i]}") 85 | done.append(uuid) 86 | continue 87 | if 'error' in routing_info: 88 | self.logger.warning(f"Unable to find routing information for {data['ip']} - {data['datetime']}: {routing_info['error']}") 89 | done.append(uuid) 90 | continue 91 | # Single date query, getting from the object 92 | datetime_routing = list(routing_info.keys())[0] 93 | entry = routing_info[datetime_routing] 94 | if not entry: 95 | # routing info is missing, need to try again later. 96 | retry.append(uuid) 97 | continue 98 | if 'asn' in entry and entry['asn'] in [None, '0']: 99 | self.logger.warning(f"Unable to find the AS number associated to {data['ip']} - {data['datetime']} (got {entry['asn']}) - {entry}.") 100 | done.append(uuid) 101 | continue 102 | if 'prefix' in entry and entry['prefix'] in [None, '0.0.0.0/0', '::/0']: 103 | self.logger.warning(f"Unable to find the prefix associated to {data['ip']} - {data['datetime']} (got {entry['prefix']}).") 104 | done.append(uuid) 105 | continue 106 | 107 | # Format: |sources -> set([, ...]) 108 | ardb_pipeline.sadd(f"{data['date']}|sources", data['source']) 109 | 110 | # Format: | -> set([, ...]) 111 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}", entry['asn']) 112 | # Format: || -> set([, ...]) 113 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}", entry['prefix']) 114 | 115 | # Format: ||| -> set([|, ...]) 116 | ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}|{entry['prefix']}", 117 | f"{data['ip']}|{data['datetime']}") 118 | done.append(uuid) 119 | ardb_pipeline.execute() 120 | p = self.redis_sanitized.pipeline(transaction=False) 121 | if done: 122 | p.delete(*done) 123 | if retry: 124 | p.sadd('to_insert', *retry) 125 | p.execute() 126 | 127 | 128 | def main(): 129 | dbinsert = DBInsertManager() 130 | dbinsert.run(sleep_in_sec=120) 131 | 132 | 133 | if __name__ == '__main__': 134 | main() 135 | -------------------------------------------------------------------------------- /bin/fetcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import asyncio 6 | import logging 7 | 8 | from datetime import datetime, date 9 | from hashlib import sha512 # Faster than sha256 on 64b machines. 10 | from logging import Logger 11 | from pathlib import Path 12 | 13 | import aiohttp 14 | from dateutil import parser 15 | from pid import PidFile, PidFileError # type: ignore 16 | 17 | from bgpranking.default import AbstractManager, safe_create_dir 18 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir 19 | 20 | 21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 22 | level=logging.INFO) 23 | 24 | 25 | class Fetcher(): 26 | 27 | def __init__(self, config_file: Path, logger: Logger): 28 | '''Load `config_file`, and store the fetched data into `storage_directory` 29 | Note: if the `config_file` does not provide a URL (the file is 30 | gathered by some oter mean), the fetcher is automatically stoped.''' 31 | with open(config_file, 'r') as f: 32 | module_parameters = json.load(f) 33 | self.vendor = module_parameters['vendor'] 34 | self.listname = module_parameters['name'] 35 | self.logger = logger 36 | self.fetcher = True 37 | if 'url' not in module_parameters: 38 | self.logger.info(f'{self.vendor}-{self.listname}: No URL to fetch, breaking.') 39 | self.fetcher = False 40 | return 41 | self.url = module_parameters['url'] 42 | self.logger.debug(f'{self.vendor}-{self.listname}: Starting fetcher on {self.url}') 43 | self.directory = get_data_dir() / self.vendor / self.listname 44 | safe_create_dir(self.directory) 45 | self.meta = self.directory / 'meta' 46 | safe_create_dir(self.meta) 47 | self.archive_dir = self.directory / 'archive' 48 | safe_create_dir(self.archive_dir) 49 | self.first_fetch = True 50 | 51 | async def __get_last_modified(self): 52 | async with aiohttp.ClientSession() as session: 53 | async with session.head(self.url) as r: 54 | headers = r.headers 55 | if 'Last-Modified' in headers: 56 | return parser.parse(headers['Last-Modified']) 57 | return None 58 | 59 | async def __newer(self): 60 | '''Check if the file available for download is newed than the one 61 | already downloaded by checking the `Last-Modified` header. 62 | Note: return False if the file containing the last header content 63 | is not existing, or the header doesn't have this key. 64 | ''' 65 | last_modified_path = self.meta / 'lastmodified' 66 | if not last_modified_path.exists(): 67 | # The file doesn't exists 68 | if not self.first_fetch: 69 | # The URL has no Last-Modified header, we cannot use it. 70 | self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available') 71 | return True 72 | self.first_fetch = False 73 | last_modified = await self.__get_last_modified() 74 | if last_modified: 75 | self.logger.debug(f'{self.vendor}-{self.listname}: Last-Modified header available') 76 | with last_modified_path.open('w') as f: 77 | f.write(last_modified.isoformat()) 78 | else: 79 | self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available') 80 | return True 81 | with last_modified_path.open() as f: 82 | file_content = f.read() 83 | last_modified_file = parser.parse(file_content) 84 | last_modified = await self.__get_last_modified() 85 | if not last_modified: 86 | # No more Last-Modified header Oo 87 | self.logger.warning(f'{self.vendor}-{self.listname}: Last-Modified header was present, isn\'t anymore!') 88 | last_modified_path.unlink() 89 | return True 90 | if last_modified > last_modified_file: 91 | self.logger.info(f'{self.vendor}-{self.listname}: Got a new file.') 92 | with last_modified_path.open('w') as f: 93 | f.write(last_modified.isoformat()) 94 | return True 95 | return False 96 | 97 | def __same_as_last(self, downloaded): 98 | '''Figure out the last downloaded file, check if it is the same as the 99 | newly downloaded one. Returns true if both files have been downloaded the 100 | same day. 101 | Note: we check the new and the archive directory because we may have backlog 102 | and the newest file is always the first one we process 103 | ''' 104 | to_check = [] 105 | to_check_new = sorted([f for f in self.directory.iterdir() if f.is_file()]) 106 | if to_check_new: 107 | # we have files waiting to be processed 108 | self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_new)} file(s) are waiting to be processed') 109 | to_check.append(to_check_new[-1]) 110 | to_check_archive = sorted([f for f in self.archive_dir.iterdir() if f.is_file()]) 111 | if to_check_archive: 112 | # we have files already processed, in the archive 113 | self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_archive)} file(s) have been processed') 114 | to_check.append(to_check_archive[-1]) 115 | if not to_check: 116 | self.logger.debug(f'{self.vendor}-{self.listname}: New list, no hisorical files') 117 | # nothing has been downloaded ever, moving on 118 | return False 119 | dl_hash = sha512(downloaded) 120 | for last_file in to_check: 121 | with last_file.open('rb') as f: 122 | last_hash = sha512(f.read()) 123 | if (dl_hash.digest() == last_hash.digest() 124 | and parser.parse(last_file.name.split('.')[0]).date() == date.today()): 125 | self.logger.debug(f'{self.vendor}-{self.listname}: Same file already downloaded today.') 126 | return True 127 | return False 128 | 129 | async def fetch_list(self): 130 | '''Fetch & store the list''' 131 | if not self.fetcher: 132 | return 133 | try: 134 | with PidFile(f'{self.listname}.pid', piddir=self.meta): 135 | if not await self.__newer(): 136 | return 137 | async with aiohttp.ClientSession() as session: 138 | async with session.get(self.url) as r: 139 | content = await r.content.read() 140 | if self.__same_as_last(content): 141 | return 142 | self.logger.info(f'{self.vendor}-{self.listname}: Got a new file!') 143 | with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f: 144 | f.write(content) 145 | except PidFileError: 146 | self.logger.info(f'{self.vendor}-{self.listname}: Fetcher already running') 147 | 148 | 149 | class ModulesManager(AbstractManager): 150 | 151 | def __init__(self, loglevel: int=logging.DEBUG): 152 | super().__init__(loglevel) 153 | self.script_name = 'modules_manager' 154 | self.modules_paths = get_modules() 155 | self.modules = [Fetcher(path, self.logger) for path in self.modules_paths] 156 | 157 | async def _to_run_forever_async(self): 158 | # Check if there are new config files 159 | new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths] 160 | self.modules += [Fetcher(path, self.logger) for path in new_modules_paths] 161 | self.modules_paths += new_modules_paths 162 | 163 | if self.modules: 164 | for module in self.modules: 165 | if module.fetcher: 166 | await module.fetch_list() 167 | else: 168 | self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.') 169 | 170 | 171 | def main(): 172 | m = ModulesManager() 173 | asyncio.run(m.run_async(sleep_in_sec=3600)) 174 | 175 | 176 | if __name__ == '__main__': 177 | main() 178 | -------------------------------------------------------------------------------- /bin/manual_ranking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import logging 6 | from dateutil.parser import parse 7 | from datetime import timedelta 8 | 9 | from bgpranking.helpers import load_all_modules_configs 10 | from .ranking import Ranking 11 | 12 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 13 | level=logging.INFO) 14 | 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser(description='Manually force the ranking of a day or a time interval.') 18 | group = parser.add_mutually_exclusive_group(required=True) 19 | group.add_argument('-d', '--day', type=str, help='Day to rank (Format: YYYY-MM-DD).') 20 | group.add_argument('-i', '--interval', type=str, nargs=2, help='Interval to rank, first to last (Format: YYYY-MM-DD YYYY-MM-DD).') 21 | args = parser.parse_args() 22 | 23 | ranking = Ranking(loglevel=logging.DEBUG) 24 | config_files = load_all_modules_configs() 25 | if args.day: 26 | day = parse(args.day).date().isoformat() 27 | ranking.rank_a_day(day) 28 | else: 29 | current = parse(args.interval[1]).date() 30 | stop_date = parse(args.interval[0]).date() 31 | while current >= stop_date: 32 | ranking.rank_a_day(current.isoformat()) 33 | current -= timedelta(days=1) 34 | -------------------------------------------------------------------------------- /bin/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import importlib 5 | import json 6 | import logging 7 | import re 8 | import types 9 | 10 | from datetime import datetime 11 | from io import BytesIO 12 | from logging import Logger 13 | from pathlib import Path 14 | from typing import List, Union, Tuple 15 | from uuid import uuid4 16 | 17 | from redis import Redis 18 | 19 | from bgpranking.default import AbstractManager, safe_create_dir, get_socket_path 20 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir 21 | 22 | 23 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 24 | level=logging.INFO) 25 | 26 | 27 | class RawFilesParser(): 28 | 29 | def __init__(self, config_file: Path, logger: Logger) -> None: 30 | self.logger = logger 31 | with open(config_file, 'r') as f: 32 | module_parameters = json.load(f) 33 | self.vendor = module_parameters['vendor'] 34 | self.listname = module_parameters['name'] 35 | if 'parser' in module_parameters: 36 | self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser'], 'bgpranking').parse_raw_file, self) # type: ignore 37 | self.source = f'{self.vendor}-{self.listname}' 38 | self.directory = get_data_dir() / self.vendor / self.listname 39 | safe_create_dir(self.directory) 40 | self.unparsable_dir = self.directory / 'unparsable' 41 | safe_create_dir(self.unparsable_dir) 42 | self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0) 43 | self.logger.debug(f'{self.source}: Starting intake.') 44 | 45 | @property 46 | def files_to_parse(self) -> List[Path]: 47 | return sorted([f for f in self.directory.iterdir() if f.is_file()], reverse=True) 48 | 49 | def extract_ipv4(self, bytestream: bytes) -> List[Union[bytes, Tuple[bytes, datetime]]]: 50 | return re.findall(rb'[0-9]+(?:\.[0-9]+){3}', bytestream) 51 | 52 | def strip_leading_zeros(self, ips: List[bytes]) -> List[bytes]: 53 | '''Helper to get rid of leading 0s in an IP list. 54 | Only run it when needed, it is nasty and slow''' 55 | return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips] 56 | 57 | def parse_raw_file(self, f: BytesIO) -> List[Union[bytes, Tuple[bytes, datetime]]]: 58 | # If the list doesn't provide a time, fallback to current day, midnight 59 | self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) 60 | return self.extract_ipv4(f.getvalue()) 61 | 62 | def parse_raw_files(self) -> None: 63 | nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()]) 64 | if nb_unparsable_files: 65 | self.logger.warning(f'{self.source}: Was unable to parse {nb_unparsable_files} files.') 66 | try: 67 | for filepath in self.files_to_parse: 68 | self.logger.debug(f'{self.source}: Parsing {filepath}, {len(self.files_to_parse) - 1} to go.') 69 | with open(filepath, 'rb') as f: 70 | to_parse = BytesIO(f.read()) 71 | p = self.redis_intake.pipeline() 72 | for line in self.parse_raw_file(to_parse): 73 | if isinstance(line, tuple): 74 | ip, datetime = line 75 | else: 76 | ip = line 77 | datetime = self.datetime 78 | uuid = uuid4() 79 | p.hmset(str(uuid), {'ip': ip, 'source': self.source, 80 | 'datetime': datetime.isoformat()}) 81 | p.sadd('intake', str(uuid)) 82 | p.execute() 83 | self._archive(filepath) 84 | except Exception as e: 85 | self.logger.exception(f"{self.source}: That didn't go well: {e}") 86 | self._unparsable(filepath) 87 | 88 | def _archive(self, filepath: Path) -> None: 89 | '''After processing, move file to the archive directory''' 90 | filepath.rename(self.directory / 'archive' / filepath.name) 91 | 92 | def _unparsable(self, filepath: Path) -> None: 93 | '''After processing, move file to the archive directory''' 94 | filepath.rename(self.unparsable_dir / filepath.name) 95 | 96 | 97 | class ParserManager(AbstractManager): 98 | 99 | def __init__(self, loglevel: int=logging.DEBUG): 100 | super().__init__(loglevel) 101 | self.script_name = 'parser' 102 | self.modules_paths = get_modules() 103 | self.modules = [RawFilesParser(path, self.logger) for path in self.modules_paths] 104 | 105 | def _to_run_forever(self): 106 | # Check if there are new config files 107 | new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths] 108 | self.modules += [RawFilesParser(path, self.logger) for path in new_modules_paths] 109 | self.modules_paths += new_modules_paths 110 | 111 | if self.modules: 112 | for module in self.modules: 113 | module.parse_raw_files() 114 | else: 115 | self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.') 116 | 117 | 118 | def main(): 119 | parser_manager = ParserManager() 120 | parser_manager.run(sleep_in_sec=120) 121 | 122 | 123 | if __name__ == '__main__': 124 | main() 125 | -------------------------------------------------------------------------------- /bin/ranking.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | 6 | from datetime import datetime, date, timedelta 7 | from ipaddress import ip_network 8 | from typing import Dict, Any 9 | 10 | from redis import Redis 11 | import requests 12 | 13 | from bgpranking.default import AbstractManager, get_config 14 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn, load_all_modules_configs 15 | 16 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 17 | level=logging.INFO) 18 | 19 | 20 | class Ranking(AbstractManager): 21 | 22 | def __init__(self, loglevel: int=logging.INFO): 23 | super().__init__(loglevel) 24 | self.script_name = 'ranking' 25 | self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True) 26 | self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'), decode_responses=True) 27 | self.ipasn = get_ipasn() 28 | 29 | def rank_a_day(self, day: str): 30 | asns_aggregation_key_v4 = f'{day}|asns|v4' 31 | asns_aggregation_key_v6 = f'{day}|asns|v6' 32 | to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6]) 33 | r_pipeline = self.ranking.pipeline() 34 | cached_meta: Dict[str, Dict[str, Any]] = {} 35 | config_files = load_all_modules_configs() 36 | for source in self.storage.smembers(f'{day}|sources'): 37 | if source not in config_files: 38 | # get it again, just in case it is created after we open them 39 | config_files = load_all_modules_configs() 40 | self.logger.info(f'{day} - Ranking source: {source}') 41 | source_aggregation_key_v4 = f'{day}|{source}|asns|v4' 42 | source_aggregation_key_v6 = f'{day}|{source}|asns|v6' 43 | to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6]) 44 | for asn in self.storage.smembers(f'{day}|{source}'): 45 | prefixes_aggregation_key_v4 = f'{day}|{asn}|v4' 46 | prefixes_aggregation_key_v6 = f'{day}|{asn}|v6' 47 | to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6]) 48 | if asn == '0': 49 | # Default ASN when no matches. Probably spoofed. 50 | continue 51 | self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}') 52 | asn_rank_v4 = 0.0 53 | asn_rank_v6 = 0.0 54 | for prefix in self.storage.smembers(f'{day}|{source}|{asn}'): 55 | if prefix == 'None': 56 | # This should not happen and requires a DB cleanup. 57 | self.logger.critical(f'Fucked up prefix in "{day}|{source}|{asn}"') 58 | continue 59 | ips = set([ip_ts.split('|')[0] 60 | for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')]) 61 | py_prefix = ip_network(prefix) 62 | prefix_rank = float(len(ips)) / py_prefix.num_addresses 63 | r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', {prefix: prefix_rank}) 64 | if py_prefix.version == 4: 65 | asn_rank_v4 += len(ips) * config_files[source]['impact'] 66 | r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix_rank * config_files[source]['impact'], prefix) 67 | else: 68 | asn_rank_v6 += len(ips) * config_files[source]['impact'] 69 | r_pipeline.zincrby(prefixes_aggregation_key_v6, prefix_rank * config_files[source]['impact'], prefix) 70 | if asn in cached_meta: 71 | v4info = cached_meta[asn]['v4'] 72 | v6info = cached_meta[asn]['v6'] 73 | else: 74 | retry = 3 75 | while retry: 76 | try: 77 | v4info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v4', date=day) 78 | v6info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v6', date=day) 79 | break 80 | except requests.exceptions.ConnectionError: 81 | # Sometimes, ipasnhistory is unreachable try again a few times 82 | retry -= 1 83 | else: 84 | # if it keeps failing, the ASN will be ranked on next run. 85 | continue 86 | 87 | cached_meta[asn] = {'v4': v4info, 'v6': v6info} 88 | ipasnhistory_date_v4 = list(v4info['response'].keys())[0] 89 | v4count = v4info['response'][ipasnhistory_date_v4][asn]['ipcount'] 90 | ipasnhistory_date_v6 = list(v6info['response'].keys())[0] 91 | v6count = v6info['response'][ipasnhistory_date_v6][asn]['ipcount'] 92 | if v4count: 93 | asn_rank_v4 /= float(v4count) 94 | if asn_rank_v4: 95 | r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4) 96 | r_pipeline.zincrby(asns_aggregation_key_v4, asn_rank_v4, asn) 97 | r_pipeline.zadd(source_aggregation_key_v4, {asn: asn_rank_v4}) 98 | if v6count: 99 | asn_rank_v6 /= float(v6count) 100 | if asn_rank_v6: 101 | r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6) 102 | r_pipeline.zincrby(asns_aggregation_key_v6, asn_rank_v6, asn) 103 | r_pipeline.zadd(source_aggregation_key_v6, {asn: asn_rank_v6}) 104 | self.ranking.delete(*to_delete) 105 | r_pipeline.execute() 106 | 107 | def compute(self): 108 | ready, message = sanity_check_ipasn(self.ipasn) 109 | if not ready: 110 | # Try again later. 111 | self.logger.warning(message) 112 | return 113 | self.logger.debug(message) 114 | 115 | self.logger.info('Start ranking') 116 | today = date.today() 117 | now = datetime.now() 118 | today12am = now.replace(hour=12, minute=0, second=0, microsecond=0) 119 | if now < today12am: 120 | # Compute yesterday and today's ranking (useful when we have lists generated only once a day) 121 | self.rank_a_day((today - timedelta(days=1)).isoformat()) 122 | self.rank_a_day(today.isoformat()) 123 | self.logger.info('Ranking done.') 124 | 125 | def _to_run_forever(self): 126 | self.compute() 127 | 128 | 129 | def main(): 130 | ranking = Ranking() 131 | ranking.run(sleep_in_sec=3600) 132 | 133 | 134 | if __name__ == '__main__': 135 | main() 136 | -------------------------------------------------------------------------------- /bin/run_backend.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import time 7 | from pathlib import Path 8 | from subprocess import Popen 9 | from typing import Optional, Dict 10 | 11 | from redis import Redis 12 | from redis.exceptions import ConnectionError 13 | 14 | from bgpranking.default import get_homedir, get_socket_path, get_config 15 | 16 | 17 | def check_running(name: str) -> bool: 18 | if name == "storage": 19 | r = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port')) 20 | elif name == "ranking": 21 | r = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port')) 22 | else: 23 | socket_path = get_socket_path(name) 24 | if not os.path.exists(socket_path): 25 | return False 26 | r = Redis(unix_socket_path=socket_path) 27 | try: 28 | return True if r.ping() else False 29 | except ConnectionError: 30 | return False 31 | 32 | 33 | def launch_cache(storage_directory: Optional[Path]=None): 34 | if not storage_directory: 35 | storage_directory = get_homedir() 36 | if not check_running('cache'): 37 | Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache')) 38 | 39 | 40 | def shutdown_cache(storage_directory: Optional[Path]=None): 41 | if not storage_directory: 42 | storage_directory = get_homedir() 43 | r = Redis(unix_socket_path=get_socket_path('cache')) 44 | r.shutdown(save=True) 45 | print('Redis cache database shutdown.') 46 | 47 | 48 | def launch_temp(storage_directory: Optional[Path]=None): 49 | if not storage_directory: 50 | storage_directory = get_homedir() 51 | if not check_running('intake') and not check_running('prepare'): 52 | Popen(["./run_redis.sh"], cwd=(storage_directory / 'temp')) 53 | 54 | 55 | def shutdown_temp(storage_directory: Optional[Path]=None): 56 | if not storage_directory: 57 | storage_directory = get_homedir() 58 | r = Redis(unix_socket_path=get_socket_path('intake')) 59 | r.shutdown(save=True) 60 | print('Redis intake database shutdown.') 61 | r = Redis(unix_socket_path=get_socket_path('prepare')) 62 | r.shutdown(save=True) 63 | print('Redis prepare database shutdown.') 64 | 65 | 66 | def launch_storage(storage_directory: Optional[Path]=None): 67 | if not storage_directory: 68 | storage_directory = get_homedir() 69 | if not check_running('storage'): 70 | Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'storage')) 71 | 72 | 73 | def shutdown_storage(storage_directory: Optional[Path]=None): 74 | redis = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port')) 75 | redis.shutdown() 76 | 77 | 78 | def launch_ranking(storage_directory: Optional[Path]=None): 79 | if not storage_directory: 80 | storage_directory = get_homedir() 81 | if not check_running('ranking'): 82 | Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'ranking')) 83 | 84 | 85 | def shutdown_ranking(storage_directory: Optional[Path]=None): 86 | redis = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port')) 87 | redis.shutdown() 88 | 89 | 90 | def launch_all(): 91 | launch_cache() 92 | launch_temp() 93 | launch_storage() 94 | launch_ranking() 95 | 96 | 97 | def check_all(stop: bool=False): 98 | backends: Dict[str, bool] = {'cache': False, 'storage': False, 'ranking': False, 99 | 'intake': False, 'prepare': False} 100 | while True: 101 | for db_name in backends.keys(): 102 | print(backends[db_name]) 103 | try: 104 | backends[db_name] = check_running(db_name) 105 | except Exception: 106 | backends[db_name] = False 107 | if stop: 108 | if not any(running for running in backends.values()): 109 | break 110 | else: 111 | if all(running for running in backends.values()): 112 | break 113 | for db_name, running in backends.items(): 114 | if not stop and not running: 115 | print(f"Waiting on {db_name} to start") 116 | if stop and running: 117 | print(f"Waiting on {db_name} to stop") 118 | time.sleep(1) 119 | 120 | 121 | def stop_all(): 122 | shutdown_cache() 123 | shutdown_temp() 124 | shutdown_storage() 125 | shutdown_ranking() 126 | 127 | 128 | def main(): 129 | parser = argparse.ArgumentParser(description='Manage backend DBs.') 130 | parser.add_argument("--start", action='store_true', default=False, help="Start all") 131 | parser.add_argument("--stop", action='store_true', default=False, help="Stop all") 132 | parser.add_argument("--status", action='store_true', default=True, help="Show status") 133 | args = parser.parse_args() 134 | 135 | if args.start: 136 | launch_all() 137 | if args.stop: 138 | stop_all() 139 | if not args.stop and args.status: 140 | check_all() 141 | 142 | 143 | if __name__ == '__main__': 144 | main() 145 | -------------------------------------------------------------------------------- /bin/sanitizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import ipaddress 5 | import logging 6 | import time 7 | 8 | from datetime import timezone 9 | from typing import Optional, List, Dict 10 | 11 | from dateutil import parser 12 | from redis import Redis 13 | import requests 14 | 15 | from bgpranking.default import AbstractManager, get_socket_path 16 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn 17 | 18 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 19 | level=logging.INFO) 20 | 21 | 22 | class Sanitizer(AbstractManager): 23 | 24 | def __init__(self, loglevel: int=logging.INFO): 25 | super().__init__(loglevel) 26 | self.script_name = 'sanitizer' 27 | self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True) 28 | self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) 29 | self.ipasn = get_ipasn() 30 | self.logger.debug('Starting import') 31 | 32 | def _sanitize_ip(self, pipeline: Redis, uuid: str, data: Dict) -> Optional[Dict]: 33 | try: 34 | ip = ipaddress.ip_address(data['ip']) 35 | if isinstance(ip, ipaddress.IPv6Address): 36 | address_family = 'v6' 37 | else: 38 | address_family = 'v4' 39 | except ValueError: 40 | self.logger.info(f"Invalid IP address: {data['ip']}") 41 | return None 42 | except KeyError: 43 | self.logger.info(f"Invalid entry {data}") 44 | return None 45 | 46 | if not ip.is_global: 47 | self.logger.info(f"The IP address {data['ip']} is not global") 48 | return None 49 | 50 | datetime = parser.parse(data['datetime']) 51 | if datetime.tzinfo: 52 | # Make sure the datetime isn't TZ aware, and UTC. 53 | datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None) 54 | 55 | # Add to temporay DB for further processing 56 | pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family, 57 | 'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()}) 58 | pipeline.sadd('to_insert', uuid) 59 | 60 | return {'ip': str(ip), 'address_family': address_family, 61 | 'date': datetime.isoformat(), 'precision_delta': {'days': 3}} 62 | 63 | def _sanitize_network(self, pipeline: Redis, uuid: str, data: Dict) -> List[Dict]: 64 | try: 65 | network = ipaddress.ip_network(data['ip']) 66 | if isinstance(network, ipaddress.IPv6Network): 67 | address_family = 'v6' 68 | else: 69 | address_family = 'v4' 70 | except ValueError: 71 | self.logger.info(f"Invalid IP network: {data['ip']}") 72 | return [] 73 | except KeyError: 74 | self.logger.info(f"Invalid entry {data}") 75 | return [] 76 | 77 | datetime = parser.parse(data['datetime']) 78 | if datetime.tzinfo: 79 | # Make sure the datetime isn't TZ aware, and UTC. 80 | datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None) 81 | 82 | for_cache = [] 83 | for ip in network.hosts(): 84 | if not ip.is_global: 85 | self.logger.info(f"The IP address {ip} is not global") 86 | continue 87 | 88 | # Add to temporay DB for further processing 89 | pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family, 90 | 'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()}) 91 | pipeline.sadd('to_insert', uuid) 92 | 93 | for_cache.append({'ip': str(ip), 'address_family': address_family, 94 | 'date': datetime.isoformat(), 'precision_delta': {'days': 3}}) 95 | return for_cache 96 | 97 | def sanitize(self): 98 | ready, message = sanity_check_ipasn(self.ipasn) 99 | if not ready: 100 | # Try again later. 101 | self.logger.warning(message) 102 | return 103 | self.logger.debug(message) 104 | 105 | while True: 106 | try: 107 | if self.shutdown_requested() or not self.ipasn.is_up: 108 | break 109 | except requests.exceptions.ConnectionError: 110 | # Temporary issue with ipasnhistory 111 | self.logger.info('Temporary issue with ipasnhistory, trying again later.') 112 | time.sleep(10) 113 | continue 114 | uuids: Optional[List[str]] = self.redis_intake.spop('intake', 100) # type: ignore 115 | if not uuids: 116 | break 117 | for_cache = [] 118 | pipeline = self.redis_sanitized.pipeline(transaction=False) 119 | for uuid in uuids: 120 | data = self.redis_intake.hgetall(uuid) 121 | if not data: 122 | continue 123 | if '/' in data['ip']: 124 | entries_for_cache = self._sanitize_network(pipeline, uuid, data) 125 | if entries_for_cache: 126 | for_cache += entries_for_cache 127 | else: 128 | entry_for_cache = self._sanitize_ip(pipeline, uuid, data) 129 | if entry_for_cache: 130 | for_cache.append(entry_for_cache) 131 | 132 | pipeline.execute() 133 | self.redis_intake.delete(*uuids) 134 | 135 | try: 136 | # Just cache everything so the lookup scripts can do their thing. 137 | self.ipasn.mass_cache(for_cache) 138 | except Exception: 139 | self.logger.info('Mass cache in IPASN History failed, trying again later.') 140 | # Rollback the spop 141 | self.redis_intake.sadd('intake', *uuids) 142 | break 143 | 144 | def _to_run_forever(self): 145 | self.sanitize() 146 | 147 | 148 | def main(): 149 | sanitizer = Sanitizer() 150 | sanitizer.run(sleep_in_sec=120) 151 | 152 | 153 | if __name__ == '__main__': 154 | main() 155 | -------------------------------------------------------------------------------- /bin/shutdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import time 5 | 6 | from bgpranking.default import AbstractManager 7 | 8 | 9 | def main(): 10 | AbstractManager.force_shutdown() 11 | time.sleep(5) 12 | while True: 13 | try: 14 | running = AbstractManager.is_running() 15 | except FileNotFoundError: 16 | print('Redis is already down.') 17 | break 18 | if not running: 19 | break 20 | print(running) 21 | time.sleep(5) 22 | 23 | 24 | if __name__ == '__main__': 25 | main() 26 | -------------------------------------------------------------------------------- /bin/ssfetcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | from logging import Logger 6 | import json 7 | import asyncio 8 | 9 | from typing import Tuple, Dict, List, Optional, TypeVar, Any 10 | from datetime import datetime, date 11 | from pathlib import Path 12 | 13 | import aiohttp 14 | from bs4 import BeautifulSoup # type: ignore 15 | from dateutil.parser import parse 16 | 17 | from bgpranking.default import AbstractManager, get_homedir, safe_create_dir 18 | from bgpranking.helpers import get_data_dir, get_modules_dir 19 | 20 | 21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 22 | level=logging.INFO) 23 | 24 | 25 | Dates = TypeVar('Dates', datetime, date, str) 26 | 27 | 28 | class ShadowServerFetcher(): 29 | 30 | def __init__(self, user, password, logger: Logger) -> None: 31 | self.logger = logger 32 | self.storage_directory = get_data_dir() 33 | self.config_path_modules = get_modules_dir() 34 | self.user = user 35 | self.password = password 36 | self.index_page = 'https://dl.shadowserver.org/reports/index.php' 37 | self.vendor = 'shadowserver' 38 | self.known_list_types = ('blacklist', 'blocklist', 'botnet', 'cc', 'cisco', 'cwsandbox', 39 | 'device', 'drone', 'event4', 'malware', 'scan6', 'event6', 'netis', 40 | 'microsoft', 'scan', 'sinkhole6', 'sinkhole', 'outdated', 41 | 'compromised', 'hp', 'darknet', 'ddos') 42 | self.first_available_day: Optional[date] = None 43 | self.last_available_day: date 44 | self.available_entries: Dict[str, List[Tuple[str, str]]] = {} 45 | 46 | async def __get_index(self): 47 | auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'} 48 | async with aiohttp.ClientSession() as s: 49 | self.logger.debug('Fetching the index.') 50 | async with s.post(self.index_page, data=auth_details) as r: 51 | return await r.text() 52 | 53 | async def __build_daily_dict(self): 54 | html_index = await self.__get_index() 55 | soup = BeautifulSoup(html_index, 'html.parser') 56 | treeview = soup.find(id='treemenu1') 57 | for y in treeview.select(':scope > li'): 58 | year = y.contents[0] 59 | for m in y.contents[1].select(':scope > li'): 60 | month = m.contents[0] 61 | for d in m.contents[1].select(':scope > li'): 62 | day = d.contents[0] 63 | date = parse(f'{year} {month} {day}').date() 64 | self.available_entries[date.isoformat()] = [] 65 | for a in d.contents[1].find_all('a', href=True): 66 | if not self.first_available_day: 67 | self.first_available_day = date 68 | self.last_available_day = date 69 | self.available_entries[date.isoformat()].append((a['href'], a.string)) 70 | self.logger.debug('Dictionary created.') 71 | 72 | def __normalize_day(self, day: Optional[Dates]=None) -> str: 73 | if not day: 74 | if not self.last_available_day: 75 | raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first') 76 | to_return = self.last_available_day 77 | else: 78 | if isinstance(day, str): 79 | to_return = parse(day).date() 80 | elif isinstance(day, datetime): 81 | to_return = day.date() 82 | return to_return.isoformat() 83 | 84 | def __split_name(self, name): 85 | type_content, country, list_type = name.split('-') 86 | if '_' in type_content: 87 | type_content, details_type = type_content.split('_', maxsplit=1) 88 | if '_' in details_type: 89 | details_type, sub = details_type.split('_', maxsplit=1) 90 | return list_type, country, (type_content, details_type, sub) 91 | return list_type, country, (type_content, details_type) 92 | return list_type, country, (type_content) 93 | 94 | def __check_config(self, filename: str) -> Optional[Path]: 95 | self.logger.debug(f'Working on config for {filename}.') 96 | config: Dict[str, Any] = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'} 97 | type_content, _, type_details = self.__split_name(filename) 98 | prefix = type_content.split('.')[0] 99 | 100 | if isinstance(type_details, str): 101 | main_type = type_details 102 | config['name'] = '{}-{}'.format(prefix, type_details) 103 | else: 104 | main_type = type_details[0] 105 | config['name'] = '{}-{}'.format(prefix, '_'.join(type_details)) 106 | 107 | if main_type not in self.known_list_types: 108 | self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.') 109 | return None 110 | 111 | if main_type == 'blacklist': 112 | config['impact'] = 5 113 | elif main_type == 'blocklist': 114 | config['impact'] = 5 115 | elif main_type == 'botnet': 116 | config['impact'] = 2 117 | elif main_type == 'malware': 118 | config['impact'] = 2 119 | elif main_type == 'cc': 120 | config['impact'] = 5 121 | elif main_type == 'cisco': 122 | config['impact'] = 3 123 | elif main_type == 'cwsandbox': 124 | config['impact'] = 5 125 | elif main_type == 'drone': 126 | config['impact'] = 2 127 | elif main_type == 'microsoft': 128 | config['impact'] = 3 129 | elif main_type == 'scan': 130 | config['impact'] = 1 131 | elif main_type == 'scan6': 132 | config['impact'] = 1 133 | elif main_type == 'sinkhole6': 134 | config['impact'] = 2 135 | elif main_type == 'sinkhole': 136 | config['impact'] = 2 137 | elif main_type == 'device': 138 | config['impact'] = 1 139 | elif main_type == 'event4': 140 | config['impact'] = 2 141 | elif main_type == 'event6': 142 | config['impact'] = 2 143 | elif main_type == 'netis': 144 | config['impact'] = 2 145 | else: 146 | config['impact'] = 1 147 | 148 | if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists(): 149 | self.logger.debug(f'Creating config file for {filename}.') 150 | with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f: 151 | json.dump(config, f, indent=2) 152 | else: 153 | with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f: 154 | # Validate new config file with old 155 | config_current = json.load(f) 156 | if config_current != config: 157 | self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current))) 158 | # Init list directory 159 | directory = self.storage_directory / config['vendor'] / config['name'] 160 | safe_create_dir(directory) 161 | meta = directory / 'meta' 162 | safe_create_dir(meta) 163 | archive_dir = directory / 'archive' 164 | safe_create_dir(archive_dir) 165 | self.logger.debug(f'Done with config for {filename}.') 166 | return directory 167 | 168 | async def download_daily_entries(self, day: Optional[Dates]=None): 169 | await self.__build_daily_dict() 170 | for url, filename in self.available_entries[self.__normalize_day(day)]: 171 | storage_dir = self.__check_config(filename) 172 | if not storage_dir: 173 | continue 174 | # Check if the file we're trying to download has already been downloaded. Skip if True. 175 | uuid = url.split('/')[-1] 176 | if (storage_dir / 'meta' / 'last_download').exists(): 177 | with open(storage_dir / 'meta' / 'last_download') as _fr: 178 | last_download_uuid = _fr.read() 179 | if last_download_uuid == uuid: 180 | self.logger.debug(f'Already downloaded: {url}.') 181 | continue 182 | async with aiohttp.ClientSession() as s: 183 | async with s.get(url) as r: 184 | self.logger.info(f'Downloading {url}.') 185 | content = await r.content.read() 186 | with (storage_dir / f'{datetime.now().isoformat()}.txt').open('wb') as _fw: 187 | _fw.write(content) 188 | with (storage_dir / 'meta' / 'last_download').open('w') as _fwt: 189 | _fwt.write(uuid) 190 | 191 | 192 | class ShadowServerManager(AbstractManager): 193 | 194 | def __init__(self, loglevel: int=logging.INFO): 195 | super().__init__(loglevel) 196 | self.script_name = 'shadowserver_fetcher' 197 | shadow_server_config_file = get_homedir() / 'config' / 'shadowserver.json' 198 | self.config = True 199 | if not shadow_server_config_file.exists(): 200 | self.config = False 201 | self.logger.warning(f'No config file available {shadow_server_config_file}, the shadow server module will not be launched.') 202 | return 203 | with shadow_server_config_file.open() as f: 204 | ss_config = json.load(f) 205 | self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], self.logger) 206 | 207 | async def _to_run_forever_async(self): 208 | await self.fetcher.download_daily_entries() 209 | 210 | 211 | def main(): 212 | modules_manager = ShadowServerManager() 213 | if modules_manager.config: 214 | asyncio.run(modules_manager.run_async(sleep_in_sec=3600)) 215 | 216 | 217 | if __name__ == '__main__': 218 | main() 219 | -------------------------------------------------------------------------------- /bin/start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from subprocess import Popen, run 5 | 6 | from bgpranking.default import get_homedir 7 | 8 | 9 | def main(): 10 | # Just fail if the env isn't set. 11 | get_homedir() 12 | print('Start backend (redis)...') 13 | p = run(['run_backend', '--start']) 14 | p.check_returncode() 15 | print('done.') 16 | Popen(['fetcher']) 17 | # Popen(['ssfetcher']) 18 | Popen(['parser']) 19 | Popen(['sanitizer']) 20 | Popen(['dbinsert']) 21 | Popen(['ranking']) 22 | Popen(['asn_descriptions']) 23 | print('Start website...') 24 | Popen(['start_website']) 25 | print('done.') 26 | 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /bin/start_website.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import logging 5 | from subprocess import Popen 6 | 7 | from bgpranking.default import AbstractManager 8 | from bgpranking.default import get_config, get_homedir 9 | 10 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 11 | level=logging.INFO) 12 | 13 | 14 | class Website(AbstractManager): 15 | 16 | def __init__(self, loglevel: int=logging.INFO): 17 | super().__init__(loglevel) 18 | self.script_name = 'website' 19 | self.process = self._launch_website() 20 | self.set_running() 21 | 22 | def _launch_website(self): 23 | website_dir = get_homedir() / 'website' 24 | ip = get_config('generic', 'website_listen_ip') 25 | port = get_config('generic', 'website_listen_port') 26 | return Popen(['gunicorn', '-w', '10', 27 | '--graceful-timeout', '2', '--timeout', '300', 28 | '-b', f'{ip}:{port}', 29 | '--log-level', 'info', 30 | 'web:app'], 31 | cwd=website_dir) 32 | 33 | 34 | def main(): 35 | w = Website() 36 | w.run(sleep_in_sec=10) 37 | 38 | 39 | if __name__ == '__main__': 40 | main() 41 | -------------------------------------------------------------------------------- /bin/stop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from subprocess import Popen, run 5 | 6 | from redis import Redis 7 | from redis.exceptions import ConnectionError 8 | 9 | from bgpranking.default import get_homedir, get_socket_path 10 | 11 | 12 | def main(): 13 | get_homedir() 14 | p = Popen(['shutdown']) 15 | p.wait() 16 | try: 17 | r = Redis(unix_socket_path=get_socket_path('cache'), db=1) 18 | r.delete('shutdown') 19 | print('Shutting down databases...') 20 | p_backend = run(['run_backend', '--stop']) 21 | p_backend.check_returncode() 22 | print('done.') 23 | except ConnectionError: 24 | # Already down, skip the stacktrace 25 | pass 26 | 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /bin/update.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import hashlib 6 | import logging 7 | import platform 8 | import shlex 9 | import subprocess 10 | import sys 11 | from pathlib import Path 12 | 13 | from bgpranking.default import get_homedir, get_config 14 | 15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', 16 | level=logging.INFO) 17 | 18 | 19 | def compute_hash_self(): 20 | m = hashlib.sha256() 21 | with (get_homedir() / 'bin' / 'update.py').open('rb') as f: 22 | m.update(f.read()) 23 | return m.digest() 24 | 25 | 26 | def keep_going(ignore=False): 27 | if ignore: 28 | return 29 | keep_going = input('Continue? (y/N) ') 30 | if keep_going.lower() != 'y': 31 | print('Okay, quitting.') 32 | sys.exit() 33 | 34 | 35 | def run_command(command, expect_fail: bool=False, capture_output: bool=True): 36 | args = shlex.split(command) 37 | homedir = get_homedir() 38 | process = subprocess.run(args, cwd=homedir, capture_output=capture_output) 39 | if capture_output: 40 | print(process.stdout.decode()) 41 | if process.returncode and not expect_fail: 42 | print(process.stderr.decode()) 43 | sys.exit() 44 | 45 | 46 | def check_poetry_version(): 47 | args = shlex.split("poetry self -V") 48 | homedir = get_homedir() 49 | process = subprocess.run(args, cwd=homedir, capture_output=True) 50 | poetry_version_str = process.stdout.decode() 51 | version = poetry_version_str.split()[2] 52 | version = version.strip(')') 53 | version_details = tuple(int(i) for i in version.split('.')) 54 | if version_details < (1, 1, 0): 55 | print('The project requires poetry >= 1.1.0, please update.') 56 | print('If you installed with "pip install --user poetry", run "pip install --user -U poetry"') 57 | print('If you installed via the recommended method, use "poetry self update"') 58 | print('More details: https://github.com/python-poetry/poetry#updating-poetry') 59 | sys.exit() 60 | 61 | 62 | def main(): 63 | parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.') 64 | parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.') 65 | args = parser.parse_args() 66 | 67 | old_hash = compute_hash_self() 68 | 69 | print('* Update repository.') 70 | keep_going(args.yes) 71 | run_command('git pull') 72 | new_hash = compute_hash_self() 73 | if old_hash != new_hash: 74 | print('Update script changed, please do "poetry run update"') 75 | sys.exit() 76 | 77 | check_poetry_version() 78 | 79 | print('* Install/update dependencies.') 80 | keep_going(args.yes) 81 | run_command('poetry install') 82 | 83 | print('* Validate configuration files.') 84 | keep_going(args.yes) 85 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --check') 86 | 87 | print('* Update configuration files.') 88 | keep_going(args.yes) 89 | run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --update') 90 | 91 | print('* Restarting') 92 | keep_going(args.yes) 93 | if platform.system() == 'Windows': 94 | print('Restarting with poetry...') 95 | run_command('poetry run stop', expect_fail=True) 96 | run_command('poetry run start', capture_output=False) 97 | print('Started.') 98 | else: 99 | service = get_config('generic', 'systemd_service_name') 100 | p = subprocess.run(["systemctl", "is-active", "--quiet", service]) 101 | try: 102 | p.check_returncode() 103 | print('Restarting with systemd...') 104 | run_command(f'sudo service {service} restart') 105 | print('done.') 106 | except subprocess.CalledProcessError: 107 | print('Restarting with poetry...') 108 | run_command('poetry run stop', expect_fail=True) 109 | run_command('poetry run start', capture_output=False) 110 | print('Started.') 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | -------------------------------------------------------------------------------- /cache/run_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | if [ -f ../../valkey/src/valkey-server ]; then 7 | ../../valkey/src/valkey-server ./cache.conf 8 | elif [ -f ../../redis/src/redis-server ]; then 9 | ../../redis/src/redis-server ./cache.conf 10 | else 11 | echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2 12 | /usr/bin/redis-server ./cache.conf 13 | fi 14 | -------------------------------------------------------------------------------- /cache/shutdown_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # set -e 4 | set -x 5 | 6 | ../../redis/src/redis-cli -s ./cache.sock shutdown 7 | -------------------------------------------------------------------------------- /config/generic.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "loglevel": "INFO", 3 | "website_listen_ip": "0.0.0.0", 4 | "website_listen_port": 5005, 5 | "systemd_service_name": "bgpranking", 6 | "storage_db_hostname": "127.0.0.1", 7 | "storage_db_port": 5188, 8 | "ranking_db_hostname": "127.0.0.1", 9 | "ranking_db_port": 5189, 10 | "ipasnhistory_url": "https://ipasnhistory.circl.lu/", 11 | "_notes": { 12 | "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", 13 | "website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.", 14 | "website_listen_port": "Port Flask will listen on.", 15 | "systemd_service_name": "(Optional) Name of the systemd service if your project has one.", 16 | "storage_db_hostname": "Hostname of the storage database (kvrocks)", 17 | "storage_db_port": "Port of the storage database (kvrocks)", 18 | "ranking_db_hostname": "Hostname of the ranking database (kvrocks)", 19 | "ranking_db_port": "Port of the ranking database (kvrocks)", 20 | "ipasnhistory_url": "URL of the IP ASN History service, defaults to the public one." 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /config/modules/Alienvault.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://reputation.alienvault.com/reputation.generic", 3 | "vendor": "alienvault", 4 | "name": "reputation.generic", 5 | "impact": 0.01 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeApache.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/apache.txt", 3 | "vendor": "blocklist_de", 4 | "name": "apache", 5 | "impact": 0.1 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeBots.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/bots.txt", 3 | "vendor": "blocklist_de", 4 | "name": "bots", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeFTP.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/ftp.txt", 3 | "vendor": "blocklist_de", 4 | "name": "ftp", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeIMAP.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/imap.txt", 3 | "vendor": "blocklist_de", 4 | "name": "imap", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeMail.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/mail.txt", 3 | "vendor": "blocklist_de", 4 | "name": "mail", 5 | "impact": 0.1 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeSIP.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/sip.txt", 3 | "vendor": "blocklist_de", 4 | "name": "sip", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeSSH.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/ssh.txt", 3 | "vendor": "blocklist_de", 4 | "name": "ssh", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/BlocklistDeStrong.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.blocklist.de/lists/strongips.txt", 3 | "vendor": "blocklist_de", 4 | "name": "strong", 5 | "impact": 6 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/CIArmy.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.ciarmy.com/list/ci-badguys.txt", 3 | "vendor": "ciarmy", 4 | "name": "ip", 5 | "impact": 5 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/CleanMXMalwares.json: -------------------------------------------------------------------------------- 1 | { 2 | "vendor": "cleanmx", 3 | "name": "malwares", 4 | "impact": 5 5 | } 6 | -------------------------------------------------------------------------------- /config/modules/CleanMXPhishing.json: -------------------------------------------------------------------------------- 1 | { 2 | "vendor": "cleanmx", 3 | "name": "phishing", 4 | "impact": 5 5 | } 6 | -------------------------------------------------------------------------------- /config/modules/CleanMXPortals.json: -------------------------------------------------------------------------------- 1 | { 2 | "vendor": "cleanmx", 3 | "name": "portals", 4 | "impact": 5 5 | } 6 | -------------------------------------------------------------------------------- /config/modules/CoinBlockerLists.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://zerodot1.gitlab.io/CoinBlockerLists/MiningServerIPList.txt", 3 | "vendor": "ZeroDot1", 4 | "name": "CoinBlockerLists", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/DshieldDaily.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.dshield.org/feeds/daily_sources", 3 | "vendor": "dshield", 4 | "name": "daily", 5 | "impact": 0.1, 6 | "parser": ".parsers.dshield" 7 | } 8 | -------------------------------------------------------------------------------- /config/modules/DshieldTopIPs.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.dshield.org/feeds/topips.txt", 3 | "vendor": "dshield", 4 | "name": "topips", 5 | "impact": 1 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/EmergingThreatsCompromized.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://rules.emergingthreats.net/blockrules/compromised-ips.txt", 3 | "vendor": "emergingthreats", 4 | "name": "compromized", 5 | "impact": 5 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/FeodotrackerIPBlockList.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://feodotracker.abuse.ch/downloads/ipblocklist.txt", 3 | "vendor": "feodotracker", 4 | "name": "ipblocklist", 5 | "impact": 5, 6 | "parser": ".parsers.abusech_feodo" 7 | } 8 | -------------------------------------------------------------------------------- /config/modules/Malc0de.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://malc0de.com/bl/IP_Blacklist.txt", 3 | "vendor": "malc0de", 4 | "name": "blocklist", 5 | "impact": 5, 6 | "parser": ".parsers.malc0de" 7 | } 8 | -------------------------------------------------------------------------------- /config/modules/MalwareDomainListIP.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "http://www.malwaredomainlist.com/hostslist/ip.txt", 3 | "vendor": "malwaredomainlist", 4 | "name": "ip", 5 | "impact": 5 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/SSLBlacklist.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://sslbl.abuse.ch/blacklist/sslipblacklist.txt", 3 | "vendor": "abuse.ch", 4 | "name": "sslblacklist", 5 | "impact": 7, 6 | "parser": ".parsers.abusech" 7 | } 8 | -------------------------------------------------------------------------------- /config/modules/ThreatFoxIOC.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://threatfox.abuse.ch/export/json/ip-port/recent/", 3 | "vendor": "abuse.ch", 4 | "name": "threatfox", 5 | "impact": 5, 6 | "parser": ".parsers.abusech_threatfox" 7 | } 8 | -------------------------------------------------------------------------------- /config/modules/greensnow.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://blocklist.greensnow.co/greensnow.txt", 3 | "vendor": "greensnow", 4 | "name": "blocklist", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/jq_all_the_things.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | # Seeds sponge, from moreutils 7 | 8 | for dir in ./*.json 9 | do 10 | cat ${dir} | jq . | sponge ${dir} 11 | done 12 | -------------------------------------------------------------------------------- /config/modules/module.schema: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/schema#", 3 | "title": "BGP Ranking NG module", 4 | "id": "https://www.github.com/CIRCL/bgpranking-ng/modules.json", 5 | "type": "object", 6 | "additionalProperties": false, 7 | "properties": { 8 | "url": { 9 | "type": "string" 10 | }, 11 | "vendor": { 12 | "type": "string" 13 | }, 14 | "name": { 15 | "type": "string" 16 | }, 17 | "impact": { 18 | "type": "number" 19 | }, 20 | "parser": { 21 | "type": "string" 22 | }, 23 | "tags": { 24 | "type": "array", 25 | "uniqueItems": true, 26 | "items": { 27 | "type": "string" 28 | } 29 | } 30 | }, 31 | "required": [ 32 | "name", 33 | "vendor", 34 | "impact" 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /config/modules/pop3gropers.json: -------------------------------------------------------------------------------- 1 | { 2 | "url": "https://home.nuug.no/~peter/pop3gropers.txt", 3 | "vendor": "bsdly", 4 | "name": "pop3gropers", 5 | "impact": 3 6 | } 7 | -------------------------------------------------------------------------------- /config/modules/shadowserver_only.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | find . -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete 7 | -------------------------------------------------------------------------------- /config/modules/validate_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | # remove the exec flag on the json files 7 | find -name "*.json" -exec chmod -x "{}" \; 8 | 9 | diffs=`git status --porcelain | wc -l` 10 | 11 | if ! [ $diffs -eq 0 ]; then 12 | echo "Please make sure you run remove the executable flag on the json files before commiting: find -name "*.json" -exec chmod -x \"{}\" \\;" 13 | # exit 1 14 | fi 15 | 16 | ./jq_all_the_things.sh 17 | 18 | diffs=`git status --porcelain | wc -l` 19 | 20 | if ! [ $diffs -eq 0 ]; then 21 | echo "Please make sure you run ./jq_all_the_things.sh before commiting." 22 | # exit 1 23 | fi 24 | 25 | for dir in ./*.json 26 | do 27 | echo -n "${dir}: " 28 | jsonschema -i ${dir} module.schema 29 | echo '' 30 | done 31 | -------------------------------------------------------------------------------- /config/shadowserver.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "user": "[USERNAME]", 3 | "password": "[PASSWORD]" 4 | } 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "bgpranking" 3 | version = "2.0" 4 | description = "BGP Ranking is a software to rank AS numbers based on their malicious activities." 5 | authors = ["Raphaël Vinot "] 6 | license = "AGPLv3" 7 | 8 | [tool.poetry.scripts] 9 | start = "bin.start:main" 10 | stop = "bin.stop:main" 11 | update = "bin.update:main" 12 | shutdown = "bin.shutdown:main" 13 | run_backend = "bin.run_backend:main" 14 | start_website = "bin.start_website:main" 15 | 16 | archiver = "bin.archiver:main" 17 | asn_descriptions = "bin.asn_descriptions:main" 18 | dbinsert = "bin.dbinsert:main" 19 | fetcher = "bin.fetcher:main" 20 | parser = "bin.parser:main" 21 | ranking = "bin.ranking:main" 22 | sanitizer = "bin.sanitizer:main" 23 | ssfetcher = "bin.ssfetcher:main" 24 | 25 | 26 | [tool.poetry.dependencies] 27 | python = "^3.8.1" 28 | redis = {version = "^5.0.7", extras = ["hiredis"]} 29 | flask-restx = "^1.3.0" 30 | gunicorn = "^22.0.0" 31 | python-dateutil = "^2.9.0.post0" 32 | pyipasnhistory = "^2.1.2" 33 | pycountry = "^23.12.11" 34 | beautifulsoup4 = "^4.12.3" 35 | aiohttp = "^3.9.5" 36 | Bootstrap-Flask = "^2.4.0" 37 | pid = "^3.0.4" 38 | pybgpranking2 = "^2.0.1" 39 | 40 | [tool.poetry.dev-dependencies] 41 | ipython = [ 42 | {version = "<8.13.0", python = "<3.9"}, 43 | {version = "^8.18.0", python = ">=3.9"}, 44 | {version = "^8.24.0", python = ">=3.10"} 45 | ] 46 | mypy = "^1.10.1" 47 | types-setuptools = "^70.2.0.20240704" 48 | types-redis = "^4.6.0.20240425" 49 | types-requests = "^2.32.0.20240622" 50 | types-python-dateutil = "^2.9.0.20240316" 51 | 52 | [build-system] 53 | requires = ["poetry-core"] 54 | build-backend = "poetry.core.masonry.api" 55 | 56 | [tool.mypy] 57 | python_version = 3.8 58 | check_untyped_defs = true 59 | ignore_errors = false 60 | ignore_missing_imports = false 61 | strict_optional = true 62 | no_implicit_optional = true 63 | warn_unused_ignores = true 64 | warn_redundant_casts = true 65 | warn_unused_configs = true 66 | warn_unreachable = true 67 | 68 | show_error_context = true 69 | pretty = true 70 | -------------------------------------------------------------------------------- /ranking/run_kvrocks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | ../../kvrocks/build/kvrocks -c kvrocks.conf 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from setuptools import setup 4 | 5 | 6 | setup( 7 | name='bgpranking', 8 | version='0.1', 9 | author='Raphaël Vinot', 10 | author_email='raphael.vinot@circl.lu', 11 | maintainer='Raphaël Vinot', 12 | url='https://github.com/D4-project/BGP-Ranking', 13 | description='BGP Ranking, the new one.', 14 | packages=['bgpranking'], 15 | scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py', 16 | 'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', 'bin/start_website.py', 17 | 'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py', 18 | 'bin/manual_ranking.py', 19 | 'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'], 20 | classifiers=[ 21 | 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', 22 | 'Development Status :: 3 - Alpha', 23 | 'Environment :: Console', 24 | 'Operating System :: POSIX :: Linux', 25 | 'Intended Audience :: Science/Research', 26 | 'Intended Audience :: Telecommunications Industry', 27 | 'Intended Audience :: Information Technology', 28 | 'Programming Language :: Python :: 3', 29 | 'Topic :: Security', 30 | 'Topic :: Internet', 31 | ], 32 | include_package_data=True, 33 | package_data={'config': ['config/*/*.conf', 34 | 'config/modules/*.json']}, 35 | ) 36 | -------------------------------------------------------------------------------- /storage/run_kvrocks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | ../../kvrocks/build/kvrocks -c kvrocks.conf 7 | -------------------------------------------------------------------------------- /temp/run_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | set -x 5 | 6 | if [ -f ../../valkey/src/valkey-server ]; then 7 | ../../valkey/src/valkey-server ./intake.conf 8 | ../../valkey/src/valkey-server ./prepare.conf 9 | elif [ -f ../../redis/src/redis-server ]; then 10 | ../../redis/src/redis-server ./intake.conf 11 | ../../redis/src/redis-server ./prepare.conf 12 | else 13 | echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2 14 | /usr/bin/redis-server ./intake.conf 15 | /usr/bin/redis-server ./prepare.conf 16 | fi 17 | -------------------------------------------------------------------------------- /temp/shutdown_redis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # set -e 4 | set -x 5 | 6 | ../../redis/src/redis-cli -s ./intake.sock shutdown 7 | ../../redis/src/redis-cli -s ./prepare.sock shutdown 8 | -------------------------------------------------------------------------------- /tools/3rdparty.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import requests 5 | 6 | from bgpranking.default import get_homedir 7 | 8 | d3js_version = '7' 9 | bootstrap_select_version = "1.14.0-beta3" 10 | jquery_version = "3.7.1" 11 | 12 | if __name__ == '__main__': 13 | dest_dir = get_homedir() / 'website' / 'web' / 'static' 14 | 15 | d3 = requests.get(f'https://d3js.org/d3.v{d3js_version}.min.js') 16 | with (dest_dir / f'd3.v{d3js_version}.min.js').open('wb') as f: 17 | f.write(d3.content) 18 | print(f'Downloaded d3js v{d3js_version}.') 19 | 20 | bootstrap_select_js = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/js/bootstrap-select.min.js') 21 | with (dest_dir / 'bootstrap-select.min.js').open('wb') as f: 22 | f.write(bootstrap_select_js.content) 23 | print(f'Downloaded bootstrap_select js v{bootstrap_select_version}.') 24 | 25 | bootstrap_select_css = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/css/bootstrap-select.min.css') 26 | with (dest_dir / 'bootstrap-select.min.css').open('wb') as f: 27 | f.write(bootstrap_select_css.content) 28 | print(f'Downloaded bootstrap_select css v{bootstrap_select_version}.') 29 | 30 | jquery = requests.get(f'https://code.jquery.com/jquery-{jquery_version}.min.js') 31 | with (dest_dir / 'jquery.min.js').open('wb') as f: 32 | f.write(jquery.content) 33 | print(f'Downloaded jquery v{jquery_version}.') 34 | 35 | print('All 3rd party modules for the website were downloaded.') 36 | -------------------------------------------------------------------------------- /tools/clear_prepare_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import uuid 4 | 5 | from redis import Redis 6 | from bgpranking.default import get_socket_path 7 | 8 | redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) 9 | to_delete = [] 10 | for name in redis_sanitized.scan_iter(_type='HASH', count=100): 11 | try: 12 | uuid.UUID(name) 13 | except Exception as e: 14 | continue 15 | if not redis_sanitized.sismember('to_insert', name): 16 | to_delete.append(name) 17 | if len(to_delete) >= 100000: 18 | redis_sanitized.delete(*to_delete) 19 | to_delete = [] 20 | if to_delete: 21 | redis_sanitized.delete(*to_delete) 22 | -------------------------------------------------------------------------------- /tools/migrate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from datetime import datetime 5 | from typing import Set 6 | 7 | from redis import Redis 8 | 9 | redis_src = Redis(unix_socket_path='../storage/storage.sock', db=0) 10 | redis_dst = Redis('127.0.0.1', 5188) 11 | 12 | chunk_size = 100000 13 | 14 | 15 | def process_chunk(src: Redis, dst: Redis, keys: Set[str]): 16 | src_pipeline = src.pipeline() 17 | [src_pipeline.type(key) for key in keys] 18 | to_process = {key: key_type for key, key_type in zip(keys, src_pipeline.execute())} 19 | 20 | src_pipeline = src.pipeline() 21 | for key, key_type in to_process.items(): 22 | if key_type == b"string": 23 | src_pipeline.get(key) 24 | elif key_type == b"list": 25 | raise Exception(f'Lists should not be used: {key}.') 26 | elif key_type == b"set": 27 | src_pipeline.smembers(key) 28 | elif key_type == b"zset": 29 | src_pipeline.zrangebyscore(key, '-Inf', '+Inf', withscores=True) 30 | elif key_type == b"hash": 31 | src_pipeline.hgetall(key) 32 | else: 33 | raise Exception(f'{key_type} not supported {key}.') 34 | 35 | dest_pipeline = dst.pipeline() 36 | for key, content in zip(to_process.keys(), src_pipeline.execute()): 37 | if to_process[key] == b"string": 38 | dest_pipeline.set(key, content) 39 | elif to_process[key] == b"set": 40 | dest_pipeline.sadd(key, *content) 41 | elif to_process[key] == b"zset": 42 | dest_pipeline.zadd(key, {value: rank for value, rank in content}) 43 | elif to_process[key] == b"hash": 44 | dest_pipeline.hmset(key, content) 45 | 46 | dest_pipeline.execute() 47 | 48 | 49 | def migrate(src: Redis, dst: Redis): 50 | keys = set() 51 | pos = 0 52 | for key in src.scan_iter(count=chunk_size, match='2017*'): 53 | keys.add(key) 54 | 55 | if len(keys) == chunk_size: 56 | process_chunk(src, dst, keys) 57 | pos += len(keys) 58 | print(f'{datetime.now()} - {pos} keys done.') 59 | keys = set() 60 | 61 | # migrate remaining keys 62 | process_chunk(src, dst, keys) 63 | pos += len(keys) 64 | print(f'{datetime.now()} - {pos} keys done.') 65 | 66 | 67 | if __name__ == '__main__': 68 | migrate(redis_src, redis_dst) 69 | -------------------------------------------------------------------------------- /tools/monitoring.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import json 4 | 5 | from redis import Redis 6 | from bgpranking.default import get_socket_path 7 | from bgpranking.helpers import get_ipasn 8 | 9 | 10 | class Monitor(): 11 | 12 | def __init__(self): 13 | self.intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True) 14 | self.sanitize = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True) 15 | self.cache = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) 16 | self.ipasn = get_ipasn() 17 | 18 | def get_values(self): 19 | ips_in_intake = self.intake.scard('intake') 20 | ready_to_insert = self.sanitize.scard('to_insert') 21 | ipasn_meta = self.ipasn.meta() 22 | if len(ipasn_meta['cached_dates']['caida']['v4']['cached']) > 15: 23 | ipasn_meta['cached_dates']['caida']['v4']['cached'] = 'Too many entries' 24 | if len(ipasn_meta['cached_dates']['caida']['v6']['cached']) > 15: 25 | ipasn_meta['cached_dates']['caida']['v6']['cached'] = 'Too many entries' 26 | return json.dumps({'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert, 27 | 'running': self.cache.zrangebyscore('running', '-inf', '+inf', withscores=True), 28 | 'IPASN History': ipasn_meta}, 29 | indent=2) 30 | 31 | 32 | if __name__ == '__main__': 33 | m = Monitor() 34 | print(m.get_values()) 35 | -------------------------------------------------------------------------------- /tools/validate_config_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import logging 6 | import argparse 7 | 8 | from bgpranking.default import get_homedir 9 | 10 | 11 | def validate_generic_config_file(): 12 | user_config = get_homedir() / 'config' / 'generic.json' 13 | with user_config.open() as f: 14 | generic_config = json.load(f) 15 | with (get_homedir() / 'config' / 'generic.json.sample').open() as f: 16 | generic_config_sample = json.load(f) 17 | # Check documentation 18 | for key in generic_config_sample.keys(): 19 | if key == '_notes': 20 | continue 21 | if key not in generic_config_sample['_notes']: 22 | raise Exception(f'###### - Documentation missing for {key}') 23 | 24 | # Check all entries in the sample files are in the user file, and they have the same type 25 | for key in generic_config_sample.keys(): 26 | if key == '_notes': 27 | continue 28 | if generic_config.get(key) is None: 29 | logger.warning(f'Entry missing in user config file: {key}. Will default to: {generic_config_sample[key]}') 30 | continue 31 | if not isinstance(generic_config[key], type(generic_config_sample[key])): 32 | raise Exception(f'Invalid type for {key}. Got: {type(generic_config[key])} ({generic_config[key]}), expected: {type(generic_config_sample[key])} ({generic_config_sample[key]})') 33 | 34 | if isinstance(generic_config[key], dict): 35 | # Check entries 36 | for sub_key in generic_config_sample[key].keys(): 37 | if sub_key not in generic_config[key]: 38 | raise Exception(f'{sub_key} is missing in generic_config[key]. Default from sample file: {generic_config_sample[key][sub_key]}') 39 | if not isinstance(generic_config[key][sub_key], type(generic_config_sample[key][sub_key])): 40 | raise Exception(f'Invalid type for {sub_key} in {key}. Got: {type(generic_config[key][sub_key])} ({generic_config[key][sub_key]}), expected: {type(generic_config_sample[key][sub_key])} ({generic_config_sample[key][sub_key]})') 41 | 42 | # Make sure the user config file doesn't have entries missing in the sample config 43 | for key in generic_config.keys(): 44 | if key not in generic_config_sample: 45 | raise Exception(f'{key} is missing in the sample config file. You need to compare {user_config} with {user_config}.sample.') 46 | 47 | return True 48 | 49 | 50 | def update_user_configs(): 51 | for file_name in ['generic']: 52 | with (get_homedir() / 'config' / f'{file_name}.json').open() as f: 53 | try: 54 | generic_config = json.load(f) 55 | except Exception: 56 | generic_config = {} 57 | with (get_homedir() / 'config' / f'{file_name}.json.sample').open() as f: 58 | generic_config_sample = json.load(f) 59 | 60 | has_new_entry = False 61 | for key in generic_config_sample.keys(): 62 | if key == '_notes': 63 | continue 64 | if generic_config.get(key) is None: 65 | print(f'{key} was missing in {file_name}, adding it.') 66 | print(f"Description: {generic_config_sample['_notes'][key]}") 67 | generic_config[key] = generic_config_sample[key] 68 | has_new_entry = True 69 | elif isinstance(generic_config[key], dict): 70 | for sub_key in generic_config_sample[key].keys(): 71 | if sub_key not in generic_config[key]: 72 | print(f'{sub_key} was missing in {key} from {file_name}, adding it.') 73 | generic_config[key][sub_key] = generic_config_sample[key][sub_key] 74 | has_new_entry = True 75 | if has_new_entry: 76 | with (get_homedir() / 'config' / f'{file_name}.json').open('w') as fw: 77 | json.dump(generic_config, fw, indent=2, sort_keys=True) 78 | return has_new_entry 79 | 80 | 81 | if __name__ == '__main__': 82 | logger = logging.getLogger('Config validator') 83 | parser = argparse.ArgumentParser(description='Check the config files.') 84 | parser.add_argument('--check', default=False, action='store_true', help='Check if the sample config and the user config are in-line') 85 | parser.add_argument('--update', default=False, action='store_true', help='Update the user config with the entries from the sample config if entries are missing') 86 | args = parser.parse_args() 87 | 88 | if args.check: 89 | if validate_generic_config_file(): 90 | print(f"The entries in {get_homedir() / 'config' / 'generic.json'} are valid.") 91 | 92 | if args.update: 93 | if not update_user_configs(): 94 | print(f"No updates needed in {get_homedir() / 'config' / 'generic.json'}.") 95 | -------------------------------------------------------------------------------- /website/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/__init__.py -------------------------------------------------------------------------------- /website/readme.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | Run: 4 | 5 | ```bash 6 | start_website.py 7 | ``` 8 | 9 | Un debug mode: 10 | 11 | ```bash 12 | export FLASK_APP=${BGPRANKING_HOME}/website/web/__init__.py 13 | flask run -h 0.0.0.0 -p 5005 14 | ``` 15 | 16 | -------------------------------------------------------------------------------- /website/web/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import pkg_resources 5 | 6 | from collections import defaultdict 7 | from datetime import date, timedelta 8 | from typing import Dict, Any, Tuple, List, Optional 9 | 10 | from flask import Flask, render_template, request, session, redirect, url_for 11 | from flask_bootstrap import Bootstrap5 # type: ignore 12 | from flask_restx import Api # type: ignore 13 | 14 | from bgpranking.bgpranking import BGPRanking 15 | from bgpranking.helpers import get_ipasn 16 | 17 | from .genericapi import api as generic_api 18 | from .helpers import get_secret_key, load_session, get_country_codes 19 | from .proxied import ReverseProxied 20 | 21 | app = Flask(__name__) 22 | 23 | app.wsgi_app = ReverseProxied(app.wsgi_app) # type: ignore 24 | 25 | app.config['SECRET_KEY'] = get_secret_key() 26 | 27 | Bootstrap5(app) 28 | app.config['BOOTSTRAP_SERVE_LOCAL'] = True 29 | 30 | bgpranking = BGPRanking() 31 | 32 | 33 | # ############# Web UI ############# 34 | 35 | @app.route('/', methods=['GET', 'POST']) 36 | def index(): 37 | if request.method == 'HEAD': 38 | # Just returns ack if the webserver is running 39 | return 'Ack' 40 | load_session() 41 | sources = bgpranking.get_sources(date=session['date'])['response'] 42 | session.pop('asn', None) 43 | session.pop('country', None) 44 | ranks = bgpranking.asns_global_ranking(limit=100, **session)['response'] 45 | r = [(asn, rank, bgpranking.get_asn_descriptions(int(asn))['response']) for asn, rank in ranks] 46 | return render_template('index.html', ranks=r, sources=sources, countries=get_country_codes(), **session) 47 | 48 | 49 | @app.route('/asn', methods=['GET', 'POST']) 50 | def asn_details(): 51 | load_session() 52 | if 'asn' not in session: 53 | return redirect(url_for('index')) 54 | asn_descriptions = bgpranking.get_asn_descriptions(asn=session['asn'], all_descriptions=True)['response'] 55 | sources = bgpranking.get_sources(date=session['date'])['response'] 56 | prefix = session.pop('prefix', None) 57 | ranks = bgpranking.asn_details(**session)['response'] 58 | if prefix: 59 | prefix_ips = bgpranking.get_prefix_ips(prefix=prefix, **session)['response'] 60 | prefix_ips = [(ip, sorted(sources)) for ip, sources in prefix_ips.items()] 61 | prefix_ips.sort(key=lambda entry: len(entry[1]), reverse=True) 62 | else: 63 | prefix_ips = [] 64 | return render_template('asn.html', sources=sources, ranks=ranks, 65 | prefix_ips=prefix_ips, asn_descriptions=asn_descriptions, **session) 66 | 67 | 68 | @app.route('/country', methods=['GET', 'POST']) 69 | def country(): 70 | load_session() 71 | sources = bgpranking.get_sources(date=session['date'])['response'] 72 | return render_template('country.html', sources=sources, countries=get_country_codes(), **session) 73 | 74 | 75 | @app.route('/country_history_callback', methods=['GET', 'POST']) 76 | def country_history_callback(): 77 | history_data: Dict[str, Tuple[str, str, List[Any]]] 78 | history_data = request.get_json(force=True) 79 | to_display = [] 80 | mapping: Dict[str, Any] = defaultdict(dict) 81 | dates = [] 82 | all_asns = set([]) 83 | for country, foo in history_data.items(): 84 | for d, r_sum, details in foo: 85 | dates.append(d) 86 | for detail in details: 87 | asn, r = detail 88 | all_asns.add(asn) 89 | mapping[asn][d] = r 90 | 91 | to_display_temp = [[country] + dates] 92 | for a in sorted(list(all_asns), key=int): 93 | line = [a] 94 | for d in dates: 95 | if mapping[a].get(d) is not None: 96 | line.append(round(mapping[a].get(d), 3)) 97 | else: 98 | line.append('N/A') 99 | to_display_temp.append(line) 100 | to_display.append(to_display_temp) 101 | return render_template('country_asn_map.html', to_display=to_display) 102 | 103 | 104 | @app.route('/ipasn', methods=['GET', 'POST']) 105 | def ipasn(): 106 | d: Optional[Dict] = None 107 | if request.method == 'POST': 108 | d = request.form 109 | elif request.method == 'GET': 110 | d = request.args 111 | 112 | if not d or 'ip' not in d: 113 | return render_template('ipasn.html') 114 | else: 115 | if isinstance(d['ip'], list): 116 | ip = d['ip'][0] 117 | else: 118 | ip = d['ip'] 119 | ipasn = get_ipasn() 120 | response = ipasn.query(first=(date.today() - timedelta(days=60)).isoformat(), 121 | aggregate=True, ip=ip) 122 | for r in response['response']: 123 | r['asn_descriptions'] = [] 124 | asn_descriptions = bgpranking.get_asn_descriptions(asn=r['asn'], all_descriptions=True)['response'] 125 | for timestamp in sorted(asn_descriptions.keys()): 126 | if r['first_seen'] <= timestamp <= r['last_seen']: 127 | r['asn_descriptions'].append(asn_descriptions[timestamp]) 128 | 129 | if not r['asn_descriptions'] and timestamp <= r['last_seen']: 130 | r['asn_descriptions'].append(asn_descriptions[timestamp]) 131 | 132 | return render_template('ipasn.html', ipasn_details=response['response'], 133 | **response['meta']) 134 | 135 | 136 | # ############# Web UI ############# 137 | 138 | # Query API 139 | 140 | api = Api(app, title='BGP Ranking API', 141 | description='API to query BGP Ranking.', 142 | doc='/doc/', 143 | version=pkg_resources.get_distribution('bgpranking').version) 144 | 145 | api.add_namespace(generic_api) 146 | -------------------------------------------------------------------------------- /website/web/genericapi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import Dict, Any, Union 5 | from urllib.parse import urljoin 6 | 7 | import requests 8 | 9 | from flask import request, session 10 | from flask_restx import Namespace, Resource, fields # type: ignore 11 | 12 | from bgpranking.default import get_config 13 | from bgpranking.bgpranking import BGPRanking 14 | 15 | from .helpers import load_session 16 | 17 | api = Namespace('BGP Ranking API', description='API to query BGP Ranking.', path='/') 18 | 19 | bgpranking: BGPRanking = BGPRanking() 20 | 21 | 22 | @api.route('/redis_up') 23 | @api.doc(description='Check if redis is up and running') 24 | class RedisUp(Resource): 25 | 26 | def get(self): 27 | return bgpranking.check_redis_up() 28 | 29 | 30 | @api.route('/ipasn_history/') 31 | @api.route('/ipasn_history/') 32 | class IPASNProxy(Resource): 33 | 34 | def _proxy_url(self): 35 | if request.full_path[-1] == '?': 36 | full_path = request.full_path[:-1] 37 | else: 38 | full_path = request.full_path 39 | path_for_ipasnhistory = full_path.replace('/ipasn_history/', '') 40 | if path_for_ipasnhistory.startswith('?'): 41 | path_for_ipasnhistory = path_for_ipasnhistory.replace('?', 'ip?') 42 | if not path_for_ipasnhistory: 43 | path_for_ipasnhistory = 'ip' 44 | return urljoin(get_config('generic', 'ipasnhistory_url'), path_for_ipasnhistory) 45 | 46 | def get(self, path=''): 47 | url = self._proxy_url() 48 | return requests.get(url).json() 49 | 50 | def post(self, path=''): 51 | url = self._proxy_url() 52 | return requests.post(url, data=request.data).json() 53 | 54 | 55 | # TODO: Add other parameters for asn_rank 56 | asn_query_fields = api.model('ASNQueryFields', { 57 | 'asn': fields.String(description='The Autonomus System Number to search', required=True) 58 | }) 59 | 60 | 61 | @api.route('/json/asn') 62 | class ASNRank(Resource): 63 | 64 | @api.doc(body=asn_query_fields) 65 | def post(self): 66 | # TODO 67 | # * Filter on date => if only returning one descr, return the desription at that date 68 | query: Dict[str, Any] = request.get_json(force=True) 69 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}} 70 | if 'asn' not in query: 71 | to_return['error'] = f'You need to pass an asn - {query}' 72 | return to_return 73 | 74 | asn_description_query = {'asn': query['asn']} 75 | responses = bgpranking.get_asn_descriptions(**asn_description_query)['response'] 76 | to_return['response']['asn_description'] = responses # type: ignore 77 | 78 | asn_rank_query = {'asn': query['asn']} 79 | if 'date' in query: 80 | asn_rank_query['date'] = query['date'] 81 | if 'source' in query: 82 | asn_rank_query['source'] = query['source'] 83 | else: 84 | asn_rank_query['with_position'] = True 85 | if 'ipversion' in query: 86 | asn_rank_query['ipversion'] = query['ipversion'] 87 | 88 | to_return['response']['ranking'] = bgpranking.asn_rank(**asn_rank_query)['response'] # type: ignore 89 | return to_return 90 | 91 | 92 | asn_descr_fields = api.model('ASNDescriptionsFields', { 93 | 'asn': fields.String(description='The Autonomus System Number to search', required=True), 94 | 'all_descriptions': fields.Boolean(description='If true, returns all the descriptions instead of only the last one', default=False) 95 | }) 96 | 97 | 98 | @api.route('/json/asn_descriptions') 99 | class ASNDescription(Resource): 100 | 101 | @api.doc(body=asn_descr_fields) 102 | def post(self): 103 | query: Dict = request.get_json(force=True) 104 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}} 105 | if 'asn' not in query: 106 | to_return['error'] = f'You need to pass an asn - {query}' 107 | return to_return 108 | 109 | to_return['response']['asn_descriptions'] = bgpranking.get_asn_descriptions(**query)['response'] # type: ignore 110 | return to_return 111 | 112 | 113 | # TODO: Add other parameters for get_asn_history 114 | asn_history_fields = api.model('ASNQueryFields', { 115 | 'asn': fields.String(description='The Autonomus System Number to search', required=True) 116 | }) 117 | 118 | 119 | @api.route('/json/asn_history') 120 | class ASNHistory(Resource): 121 | 122 | def get(self): 123 | load_session() 124 | if 'asn' in session: 125 | return bgpranking.get_asn_history(**session) 126 | 127 | @api.doc(body=asn_history_fields) 128 | def post(self): 129 | query: Dict = request.get_json(force=True) 130 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}} 131 | if 'asn' not in query: 132 | to_return['error'] = f'You need to pass an asn - {query}' 133 | return to_return 134 | 135 | to_return['response']['asn_history'] = bgpranking.get_asn_history(**query)['response'] # type: ignore 136 | return to_return 137 | 138 | 139 | # TODO: Add other parameters for country_history 140 | coutry_history_fields = api.model('CountryHistoryFields', { 141 | 'country': fields.String(description='The Country Code', required=True) 142 | }) 143 | 144 | 145 | @api.route('/json/country_history') 146 | class CountryHistory(Resource): 147 | 148 | def get(self): 149 | load_session() 150 | return bgpranking.country_history(**session) 151 | 152 | @api.doc(body=coutry_history_fields) 153 | def post(self): 154 | query: Dict = request.get_json(force=True) 155 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}} 156 | to_return['response']['country_history'] = bgpranking.country_history(**query)['response'] # type: ignore 157 | return to_return 158 | 159 | 160 | # TODO: Add other parameters for asns_global_ranking 161 | asns_global_ranking_fields = api.model('ASNsGlobalRankingFields', { 162 | 'date': fields.String(description='The date') 163 | }) 164 | 165 | 166 | @api.route('/json/asns_global_ranking') 167 | class ASNsGlobalRanking(Resource): 168 | 169 | @api.doc(body=asns_global_ranking_fields) 170 | def post(self): 171 | query: Dict = request.get_json(force=True) 172 | to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}} 173 | to_return['response'] = bgpranking.asns_global_ranking(**query)['response'] 174 | return to_return 175 | -------------------------------------------------------------------------------- /website/web/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | 6 | from datetime import date, timedelta 7 | from functools import lru_cache 8 | from pathlib import Path 9 | 10 | import pycountry 11 | 12 | from flask import request, session 13 | 14 | from bgpranking.default import get_homedir 15 | 16 | 17 | def src_request_ip(request) -> str: 18 | # NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers. 19 | real_ip = request.headers.get('X-Real-IP') 20 | if not real_ip: 21 | real_ip = request.remote_addr 22 | return real_ip 23 | 24 | 25 | @lru_cache(64) 26 | def get_secret_key() -> bytes: 27 | secret_file_path: Path = get_homedir() / 'secret_key' 28 | if not secret_file_path.exists() or secret_file_path.stat().st_size < 64: 29 | if not secret_file_path.exists() or secret_file_path.stat().st_size < 64: 30 | with secret_file_path.open('wb') as f: 31 | f.write(os.urandom(64)) 32 | with secret_file_path.open('rb') as f: 33 | return f.read() 34 | 35 | 36 | def load_session(): 37 | if request.method == 'POST': 38 | d = request.form 39 | elif request.method == 'GET': 40 | d = request.args # type: ignore 41 | 42 | for key in d: 43 | if '_all' in d.getlist(key): 44 | session.pop(key, None) 45 | else: 46 | values = [v for v in d.getlist(key) if v] 47 | if values: 48 | if len(values) == 1: 49 | session[key] = values[0] 50 | else: 51 | session[key] = values 52 | 53 | # Edge cases 54 | if 'asn' in session: 55 | session.pop('country', None) 56 | elif 'country' in session: 57 | session.pop('asn', None) 58 | if 'date' not in session: 59 | session['date'] = (date.today() - timedelta(days=1)).isoformat() 60 | 61 | 62 | def get_country_codes(): 63 | for c in pycountry.countries: 64 | yield c.alpha_2, c.name 65 | -------------------------------------------------------------------------------- /website/web/proxied.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from typing import Any, MutableMapping 4 | 5 | 6 | class ReverseProxied(): 7 | def __init__(self, app: Any) -> None: 8 | self.app = app 9 | 10 | def __call__(self, environ: MutableMapping[str, Any], start_response: Any) -> Any: 11 | scheme = environ.get('HTTP_X_FORWARDED_PROTO') 12 | if not scheme: 13 | scheme = environ.get('HTTP_X_SCHEME') 14 | 15 | if scheme: 16 | environ['wsgi.url_scheme'] = scheme 17 | return self.app(environ, start_response) 18 | -------------------------------------------------------------------------------- /website/web/static/forkme_right_darkblue_121621.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/web/static/forkme_right_darkblue_121621.png -------------------------------------------------------------------------------- /website/web/static/linegraph.css: -------------------------------------------------------------------------------- 1 | 2 | body { font: 12px Arial;} 3 | 4 | path { 5 | stroke: steelblue; 6 | stroke-width: 2; 7 | fill: none; 8 | } 9 | 10 | .axis path, 11 | .axis line { 12 | fill: none; 13 | stroke: grey; 14 | stroke-width: 1; 15 | shape-rendering: crispEdges; 16 | } 17 | -------------------------------------------------------------------------------- /website/web/static/linegraph.js: -------------------------------------------------------------------------------- 1 | function linegraph(call_path) { 2 | var canvas = document.querySelector("canvas"), 3 | context = canvas.getContext("2d"); 4 | 5 | // set the dimensions and margins of the graph 6 | var margin = {top: 20, right: 20, bottom: 30, left: 50}, 7 | width = canvas.width - margin.left - margin.right, 8 | height = canvas.height - margin.top - margin.bottom; 9 | 10 | // parse the date / time 11 | var parseTime = d3.timeParse("%Y-%m-%d"); 12 | 13 | // set the ranges 14 | var x = d3.scaleTime().range([0, width]); 15 | var y = d3.scaleLinear().range([height, 0]); 16 | 17 | // define the line 18 | var line = d3.line() 19 | .x(function(d) { return x(parseTime(d[0])); }) 20 | .y(function(d) { return y(d[1]); }) 21 | .curve(d3.curveStep) 22 | .context(context); 23 | 24 | context.translate(margin.left, margin.top); 25 | 26 | // Get the data 27 | d3.json(call_path, {credentials: 'same-origin'}).then(function(data) { 28 | x.domain(d3.extent(data.response, function(d) { return parseTime(d[0]); })); 29 | y.domain(d3.extent(data.response, function(d) { return d[1]; })); 30 | 31 | xAxis(); 32 | yAxis(); 33 | 34 | context.beginPath(); 35 | line(data.response); 36 | context.lineWidth = 1.5; 37 | context.strokeStyle = "steelblue"; 38 | context.stroke(); 39 | }); 40 | 41 | function xAxis() { 42 | var tickCount = 10, 43 | tickSize = .1, 44 | ticks = x.ticks(tickCount), 45 | tickFormat = x.tickFormat(); 46 | 47 | context.beginPath(); 48 | ticks.forEach(function(d) { 49 | context.moveTo(x(d), height); 50 | context.lineTo(x(d), height + tickSize); 51 | }); 52 | context.strokeStyle = "black"; 53 | context.stroke(); 54 | 55 | context.textAlign = "center"; 56 | context.textBaseline = "top"; 57 | ticks.forEach(function(d) { 58 | context.fillText(tickFormat(d), x(d), height + tickSize); 59 | }); 60 | } 61 | 62 | function yAxis() { 63 | var tickCount = 20, 64 | tickSize = 1, 65 | tickPadding = 1, 66 | ticks = y.ticks(tickCount), 67 | tickFormat = y.tickFormat(tickCount); 68 | 69 | context.beginPath(); 70 | ticks.forEach(function(d) { 71 | context.moveTo(0, y(d)); 72 | context.lineTo(-6, y(d)); 73 | }); 74 | context.strokeStyle = "black"; 75 | context.stroke(); 76 | 77 | context.beginPath(); 78 | context.moveTo(-tickSize, 0); 79 | context.lineTo(0.5, 0); 80 | context.lineTo(0.5, height); 81 | context.lineTo(-tickSize, height); 82 | context.strokeStyle = "black"; 83 | context.stroke(); 84 | 85 | context.textAlign = "right"; 86 | context.textBaseline = "middle"; 87 | ticks.forEach(function(d) { 88 | context.fillText(tickFormat(d), -tickSize - tickPadding, y(d)); 89 | }); 90 | 91 | context.save(); 92 | context.rotate(-Math.PI / 2); 93 | context.textAlign = "right"; 94 | context.textBaseline = "top"; 95 | context.font = "bold 10px sans-serif"; 96 | context.fillText("Rank", -10, 10); 97 | context.restore(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /website/web/static/linegraph_country.css: -------------------------------------------------------------------------------- 1 | .axis--x path { 2 | display: none; 3 | } 4 | 5 | .line { 6 | fill: none; 7 | stroke: steelblue; 8 | stroke-width: 1.5px; 9 | } 10 | -------------------------------------------------------------------------------- /website/web/static/linegraph_country.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | function linegraph(call_path) { 4 | var svg = d3.select("svg"), 5 | margin = {top: 20, right: 80, bottom: 30, left: 50}, 6 | width = svg.attr("width") - margin.left - margin.right, 7 | height = svg.attr("height") - margin.top - margin.bottom, 8 | g = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 9 | 10 | var parseTime = d3.timeParse("%Y-%m-%d"); 11 | 12 | var x = d3.scaleTime().range([0, width]), 13 | y = d3.scaleLinear().range([height, 0]), 14 | z = d3.scaleOrdinal(d3.schemeCategory10); 15 | 16 | var line = d3.line() 17 | .curve(d3.curveLinear) 18 | .x(function(d) { return x(d.date); }) 19 | .y(function(d) { return y(d.rank); }); 20 | 21 | d3.json(call_path, {credentials: 'same-origin'}).then(data => { 22 | var country_ranks = $.map(data.response, function(value, key) { 23 | return { 24 | country: key, 25 | values: $.map(value, function(d) { 26 | return {date: parseTime(d[0]), rank: d[1]}; 27 | }) 28 | }; 29 | }); 30 | 31 | x.domain(d3.extent(country_ranks[0].values, function(d) { return d.date; })); 32 | y.domain([ 33 | d3.min(country_ranks, function(c) { return d3.min(c.values, function(d) { return d.rank; }); }), 34 | d3.max(country_ranks, function(c) { return d3.max(c.values, function(d) { return d.rank; }); }) 35 | ]); 36 | 37 | z.domain(country_ranks.map(function(c) { return c.country; })); 38 | 39 | g.append("g") 40 | .attr("class", "axis axis--x") 41 | .attr("transform", "translate(0," + height + ")") 42 | .call(d3.axisBottom(x)); 43 | 44 | g.append("g") 45 | .attr("class", "axis axis--y") 46 | .call(d3.axisLeft(y)) 47 | .append("text") 48 | .attr("transform", "rotate(-90)") 49 | .attr("y", 6) 50 | .attr("dy", "0.71em") 51 | .attr("fill", "#000") 52 | .text("Rank"); 53 | 54 | var country = g.selectAll(".country") 55 | .data(country_ranks) 56 | .enter().append("g") 57 | .attr("class", "country"); 58 | 59 | country.append("path") 60 | .attr("class", "line") 61 | .attr("d", function(d) { return line(d.values); }) 62 | .style("stroke", function(d) { return z(d.country); }); 63 | 64 | country.append("text") 65 | .datum(function(d) { return {id: d.country, value: d.values[d.values.length - 1]}; }) 66 | .attr("transform", function(d) { return "translate(" + x(d.value.date) + "," + y(d.value.rank) + ")"; }) 67 | .attr("x", 3) 68 | .attr("dy", "0.35em") 69 | .style("font", "10px sans-serif") 70 | .text(function(d) { return d.id; }); 71 | 72 | d3.text('/country_history_callback', 73 | {credentials: 'same-origin', 74 | method: 'POST', 75 | body: JSON.stringify(data.response), 76 | }) 77 | .then(function(data) { 78 | d3.select('#asn_details').html(data); 79 | }); 80 | }); 81 | }; 82 | -------------------------------------------------------------------------------- /website/web/templates/asn.html: -------------------------------------------------------------------------------- 1 | {% extends "main.html" %} 2 | 3 | {% block head %} 4 | {{ super() }} 5 | {% endblock %} 6 | 7 | 8 | {% block title %} 9 | Ranking - {{ asn }} 10 | {% endblock %} 11 | 12 | {% block scripts %} 13 | {{ super() }} 14 | 15 | 16 | {% endblock %} 17 | 18 | {% block content %} 19 |
20 |

Ranking - {{asn}}



21 | 26 |
27 | {% include ['top_forms.html'] %} 28 | 29 | 30 | 31 | 32 | 33 | {% for timestamp in asn_descriptions.keys()|sort %} 34 | 35 | 36 | 37 | 38 | {% endfor %} 39 |
TimestampASN Description
{{ timestamp }}{{ asn_descriptions[timestamp] }}
40 | 41 | 42 | 43 | 44 | 45 | {% for prefix, rank in ranks %} 46 | 47 | 48 | 49 | 50 | {% endfor %} 51 |
PrefixRank
{{ prefix }}{{ rank }}
52 | 53 | {% if prefix_ips %} 54 | 55 | 56 | 57 | 58 | 59 | {% for ip, sources in prefix_ips %} 60 | 61 | 62 | 63 | 64 | {% endfor %} 65 |
IPSource(s)
{{ ip }}{{ ', '.join(sources) }}
66 | {% endif %} 67 | {% endblock %} 68 | -------------------------------------------------------------------------------- /website/web/templates/country.html: -------------------------------------------------------------------------------- 1 | {% extends "main.html" %} 2 | 3 | {% block head %} 4 | {{ super() }} 5 | 6 | {% endblock %} 7 | 8 | 9 | {% block title %}Ranking - {{ ' '.join(country) }}{% endblock %} 10 | 11 | {% block scripts %} 12 | {{ super() }} 13 | 14 | 15 | {% endblock %} 16 | 17 | {% block content %} 18 |
19 |

Ranking - {{ ' '.join(country) }}



20 |
21 | {% include ['top_forms.html'] %} 22 | 23 |
24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /website/web/templates/country_asn_map.html: -------------------------------------------------------------------------------- 1 | {% for to_display_country in to_display%} 2 | 3 | 4 | {% for date in to_display_country[0] %} 5 | 6 | {% endfor %} 7 | 8 | {% for line in to_display_country[1:] %} 9 | 10 | 11 | {% for rank in line[1:] %} 12 | 13 | {% endfor %} 14 | 15 | {% endfor %} 16 |
{{ date }}
{{ line[0] }}{{ rank }}
17 | {% endfor %} 18 | -------------------------------------------------------------------------------- /website/web/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "main.html" %} 2 | 3 | {% block title %}BGP Ranking{% endblock %} 4 | 5 | {% block content %} 6 | 7 | Fork me on GitHub 8 | 9 |
10 |

BGP Ranking



11 | 16 |
17 | {% include ['top_forms.html'] %} 18 | 19 | 20 | 21 | 22 | 23 | 24 | {% for asn, rank, description in ranks %} 25 | 26 | 27 | 28 | 29 | 30 | {% endfor %} 31 |
ASNRankDescription
{{ asn }}{{ rank }}{{ description }}
32 | {% endblock %} 33 | -------------------------------------------------------------------------------- /website/web/templates/ipasn.html: -------------------------------------------------------------------------------- 1 | {% extends "main.html" %} 2 | 3 | {% block head %} 4 | {{ super() }} 5 | {% endblock %} 6 | 7 | 8 | {% block title %} IP-ASN History {% endblock %} 9 | 10 | {% block scripts %} 11 | {{ super() }} 12 | {% endblock %} 13 | 14 | {% block content %} 15 |
16 |

IP-ASN History

17 |
18 |

19 |

20 | 21 | 22 | 23 |
24 |

25 | {% if ipasn_details %} 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | {% for entry in ipasn_details %} 35 | 36 | 37 | 38 | 39 | 44 | 45 | 46 | {% endfor %} 47 |
First SeenLast SeenASNASN DescriptionPrefix
{{ entry['first_seen'] }}{{ entry['last_seen'] }}{{ entry['asn'] }} 40 | {% for asn_description in entry['asn_descriptions'] %} 41 | {{ asn_description }}
42 | {% endfor %} 43 |
{{ entry['prefix'] }}
48 | {% endif %} 49 | {% endblock %} 50 | -------------------------------------------------------------------------------- /website/web/templates/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% block head %} 5 | 6 | 7 | {% block styles %} 8 | {{ bootstrap.load_css() }} 9 | 10 | {% endblock %} 11 | {% endblock %} 12 | 13 | 14 |
15 | {% block content %}{% endblock%} 16 |
17 | {% block scripts %} 18 | {{ bootstrap.load_js() }} 19 | 20 | 21 | 22 | 27 | {% endblock %} 28 | 29 | 30 | -------------------------------------------------------------------------------- /website/web/templates/top_forms.html: -------------------------------------------------------------------------------- 1 |

2 |

3 | 4 | 5 |
6 |
7 | 11 | 12 |
13 |
14 | 20 | 21 |
22 |
23 | 24 | 25 |
26 |
27 | 33 | 34 |
35 |
36 | 37 | 38 |
39 |

40 |
41 | --------------------------------------------------------------------------------