├── .gitignore
├── LICENSE
├── README.md
├── bgpranking
    ├── __init__.py
    ├── bgpranking.py
    ├── default
    │   ├── __init__.py
    │   ├── abstractmanager.py
    │   ├── exceptions.py
    │   └── helpers.py
    ├── helpers.py
    ├── parsers
    │   ├── __init__.py
    │   ├── abusech.py
    │   ├── abusech_feodo.py
    │   ├── abusech_threatfox.py
    │   ├── dshield.py
    │   ├── malc0de.py
    │   ├── nothink.py
    │   └── shadowserver.py
    └── statsripe.py
├── bin
    ├── __init__.py
    ├── archiver.py
    ├── asn_descriptions.py
    ├── dbinsert.py
    ├── fetcher.py
    ├── manual_ranking.py
    ├── parser.py
    ├── ranking.py
    ├── run_backend.py
    ├── sanitizer.py
    ├── shutdown.py
    ├── ssfetcher.py
    ├── start.py
    ├── start_website.py
    ├── stop.py
    └── update.py
├── cache
    ├── cache.conf
    ├── run_redis.sh
    └── shutdown_redis.sh
├── config
    ├── generic.json.sample
    ├── modules
    │   ├── Alienvault.json
    │   ├── BlocklistDeApache.json
    │   ├── BlocklistDeBots.json
    │   ├── BlocklistDeFTP.json
    │   ├── BlocklistDeIMAP.json
    │   ├── BlocklistDeMail.json
    │   ├── BlocklistDeSIP.json
    │   ├── BlocklistDeSSH.json
    │   ├── BlocklistDeStrong.json
    │   ├── CIArmy.json
    │   ├── CleanMXMalwares.json
    │   ├── CleanMXPhishing.json
    │   ├── CleanMXPortals.json
    │   ├── CoinBlockerLists.json
    │   ├── DshieldDaily.json
    │   ├── DshieldTopIPs.json
    │   ├── EmergingThreatsCompromized.json
    │   ├── FeodotrackerIPBlockList.json
    │   ├── Malc0de.json
    │   ├── MalwareDomainListIP.json
    │   ├── SSLBlacklist.json
    │   ├── ThreatFoxIOC.json
    │   ├── greensnow.json
    │   ├── jq_all_the_things.sh
    │   ├── module.schema
    │   ├── pop3gropers.json
    │   ├── shadowserver_only.sh
    │   └── validate_all.sh
    └── shadowserver.json.sample
├── poetry.lock
├── pyproject.toml
├── ranking
    ├── kvrocks.conf
    └── run_kvrocks.sh
├── setup.py
├── storage
    ├── kvrocks.conf
    └── run_kvrocks.sh
├── temp
    ├── intake.conf
    ├── prepare.conf
    ├── run_redis.sh
    └── shutdown_redis.sh
├── tools
    ├── 3rdparty.py
    ├── clear_prepare_db.py
    ├── migrate.py
    ├── monitoring.py
    └── validate_config_files.py
└── website
    ├── __init__.py
    ├── readme.md
    └── web
        ├── __init__.py
        ├── genericapi.py
        ├── helpers.py
        ├── proxied.py
        ├── static
            ├── forkme_right_darkblue_121621.png
            ├── linegraph.css
            ├── linegraph.js
            ├── linegraph_country.css
            └── linegraph_country.js
        └── templates
            ├── asn.html
            ├── country.html
            ├── country_asn_map.html
            ├── index.html
            ├── ipasn.html
            ├── main.html
            └── top_forms.html


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Local exclude
  2 | scraped/
  3 | *.swp
  4 | lookyloo/ete3_webserver/webapi.py
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | env/
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # dotenv
 88 | .env
 89 | 
 90 | # virtualenv
 91 | .venv
 92 | venv/
 93 | ENV/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | 
109 | # web
110 | secret_key
111 | 
112 | cache.pid
113 | *.rdb
114 | 
115 | # Local config files
116 | config/*.json
117 | config/*.json.bkp
118 | 
119 | rawdata
120 | 
121 | storage/db/
122 | storage/kvrocks*
123 | ranking/db/
124 | ranking/kvrocks*
125 | website/web/static/d3.*.js
126 | website/web/static/bootstrap-select.min.*
127 | 
128 | *.pid
129 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published
637 |     by the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <http://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # BGP Ranking
  2 | 
  3 | For an Internet Service Provider, AS numbers are a logical representation of
  4 | the other ISP peering or communicating with its autonomous system. ISP customers
  5 | are using the capacity of the Internet Service Provider to reach Internet
  6 | services over other AS. Some of those communications can be malicious (e.g. due
  7 | to malware activities on an end-user equipments) and hosted at specific AS location.
  8 | 
  9 | In order to provide an improved security view on those AS numbers, a trust ranking
 10 | scheme is implemented based on existing dataset of compromised systems,
 11 | malware C&C IP and existing datasets. BGP Ranking provides a way to collect
 12 | such malicious activities, aggregate the information per ASN and provide a ranking
 13 | model to rank the ASN from the most malicious to the less malicious ASN.
 14 | 
 15 | The official website of the project is: [https://github.com/D4-project/bgp-ranking/](https://github.com/D4-project/bgp-ranking/)
 16 | 
 17 | There is a public BGP Ranking at [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/)
 18 | 
 19 | BGP Ranking is free software licensed under the GNU Affero General Public License
 20 | 
 21 | BGP Ranking is a software to rank AS numbers based on their malicious activities.
 22 | 
 23 | # Python client
 24 | 
 25 | ```bash
 26 | $ pip install git+https://github.com/D4-project/BGP-Ranking.git/#egg=pybgpranking\&subdirectory=client
 27 | $ bgpranking --help
 28 | usage: bgpranking [-h] [--url URL] (--asn ASN | --ip IP)
 29 | 
 30 | Run a query against BGP Ranking
 31 | 
 32 | optional arguments:
 33 |   -h, --help  show this help message and exit
 34 |   --url URL   URL of the instance.
 35 |   --asn ASN   ASN to lookup
 36 |   --ip IP     IP to lookup
 37 | ```
 38 | 
 39 | ## History
 40 | 
 41 | - The first version of BGP Ranking was done in 2010 by [Raphael Vinot](https://github.com/Rafiot) with the support of [Alexandre Dulaunoy](https://github.com/adulau/).
 42 | CIRCL supported the project from the early beginning and setup an online version to share information about the malicious ranking of ISPs.
 43 | 
 44 | - In late 2018 within the scope of the D4 Project (a CIRCL project co-funded by INEA under the CEF Telecom program), a new version of BGP Ranking was completed rewritten in python3.6+ with an ARDB back-end.
 45 | 
 46 | - In January 2022, BGP Ranking version 2.0 was released including a new backend on [kvrocks](https://github.com/KvrocksLabs/kvrocks) and many improvements. 
 47 | 
 48 | # Online service
 49 | 
 50 | BGP Ranking service is available online [http://bgpranking.circl.lu/](http://bgpranking.circl.lu/).
 51 | 
 52 | A Python library and client software is [available](https://github.com/D4-project/BGP-Ranking/tree/master/client) using the default API available from bgpranking.circl.lu.
 53 | 
 54 | # CURL Example
 55 | 
 56 | ## Get the ASN from an IP or a prefix
 57 | ```bash
 58 | curl https://bgpranking-ng.circl.lu/ipasn_history/?ip=143.255.153.0/24
 59 | ```
 60 | 
 61 | ## Response
 62 | 
 63 | ```json
 64 | {
 65 |   "meta": {
 66 |     "address_family": "v4",
 67 |     "ip": "143.255.153.0/24",
 68 |     "source": "caida"
 69 |   },
 70 |   "response": {
 71 |     "2019-05-19T12:00:00": {
 72 |       "asn": "264643",
 73 |       "prefix": "143.255.153.0/24"
 74 |     }
 75 |   }
 76 | }
 77 | ```
 78 | 
 79 | ## Get the ranking of the AS
 80 | ```
 81 | curl -X POST -d '{"asn": "5577", "date": "2019-05-19"}' https://bgpranking-ng.circl.lu/json/asn
 82 | ```
 83 | 
 84 | Note: `date` isn't required.
 85 | 
 86 | ### Response
 87 | 
 88 | ```json
 89 | {
 90 |   "meta": {
 91 |     "asn": "5577"
 92 |   },
 93 |   "response": {
 94 |     "asn_description": "ROOT, LU",
 95 |     "ranking": {
 96 |       "rank": 0.0004720052083333333,
 97 |       "position": 7084,
 98 |       "total_known_asns": 15375
 99 |     }
100 |   }
101 | }
102 | ```
103 | 
104 | ## Get historical information for an ASN
105 | 
106 | ```
107 | curl -X POST -d '{"asn": "5577", "period": 5}' https://bgpranking-ng.circl.lu/json/asn_history
108 | ```
109 | 
110 | ### Response
111 | 
112 | ```json
113 | {
114 |   "meta": {
115 |     "asn": "5577",
116 |     "period": 5
117 |   },
118 |   "response": {
119 |     "asn_history": [
120 |       [
121 |         "2019-11-10",
122 |         0.00036458333333333335
123 |       ],
124 |       [
125 |         "2019-11-11",
126 |         0.00036168981481481485
127 |       ],
128 |       [
129 |         "2019-11-12",
130 |         0.0003761574074074074
131 |       ],
132 |       [
133 |         "2019-11-13",
134 |         0.0003530092592592593
135 |       ],
136 |       [
137 |         "2019-11-14",
138 |         0.0003559027777777778
139 |       ]
140 |     ]
141 |   }
142 | }
143 | ```
144 | 
145 | 
146 | # Server Installation (if you want to run your own)
147 | 
148 | **IMPORTANT**: Use [poetry](https://github.com/python-poetry/poetry#installation)
149 | 
150 | **NOTE**: Yes, it requires python3.6+. No, it will never support anything older.
151 | 
152 | ## Install redis
153 | 
154 | ```bash
155 | git clone https://github.com/antirez/redis.git
156 | cd redis
157 | git checkout 5.0
158 | make
159 | make test
160 | cd ..
161 | ```
162 | **Note**: If it fails, have a look at [the documentation](https://github.com/redis/redis#building-redis).
163 | 
164 | ## Install kvrocks
165 | 
166 | ```bash
167 | git clone https://github.com/KvrocksLabs/kvrocks.git
168 | cd kvrocks
169 | git checkout 2.5
170 | ./x.py build
171 | cd ..
172 | ```
173 | **Note**: If it fails, have a look at [the documentation](https://github.com/apache/kvrocks#build-and-run-kvrocks).
174 | 
175 | ## Install & run BGP Ranking
176 | 
177 | ```bash
178 | git clone https://github.com/D4-project/BGP-Ranking.git
179 | cd BGP-Ranking
180 | poetry install
181 | echo BGPRANKING_HOME="'`pwd`'" > .env
182 | poetry shell
183 | # Starts all the backend
184 | start
185 | ```
186 | 
187 | ## Shutdown BGP Ranking
188 | 
189 | ```bash
190 | stop
191 | ```
192 | 
193 | # Directory structure
194 | 
195 | *Config files*: `bgpranking / config / *.json`
196 | 
197 | *Per-module parsers*: `bgpraking / parsers`
198 | 
199 | *Libraries* : `brpranking / libs`
200 | 
201 | # Raw dataset directory structure
202 | 
203 | ## Files to import
204 | 
205 | *Note*: The default location of `<storage_directory>` is the root directory of the repo.
206 | 
207 | `<storage_directory> / <vendor> / <listname>`
208 | 
209 | ## Last modified date (if possible) and lock file
210 | 
211 | `<storage_directory> / <vendor> / <listname> / meta`
212 | 
213 | ## Imported files less than 2 months old
214 | 
215 | `<storage_directory> / <vendor> / <listname> / archive`
216 | 
217 | ## Imported files more than 2 months old
218 | 
219 | `<storage_directory> / <vendor> / <listname> / archive / deep`
220 | 
221 | # Databases
222 | 
223 | ## Intake (redis, port 6579)
224 | 
225 | *Usage*: All the modules push their entries in this database.
226 | 
227 | Creates the following hashes:
228 | 
229 | ```python
230 | UUID = {'ip': <ip>, 'source': <source>, 'datetime': <datetime>}
231 | ```
232 | 
233 | Creates a set `intake` for further processing containing all the UUIDs.
234 | 
235 | 
236 | ## Pre-Insert (redis, port 6580)
237 | 
238 | 
239 | *Usage*: Make sure th IPs are global, validate input from the intake module.
240 | 
241 | Pop UUIDs from `intake`, get the hashes with that key
242 | 
243 | Creates the following hashes:
244 | 
245 | ```python
246 | UUID = {'ip': <ip>, 'source': <source>, 'datetime': <datetime>, 'date': <date>}
247 | ```
248 | 
249 | Creates a set `to_insert` for further processing containing all the UUIDs.
250 | 
251 | Creates a set `for_ris_lookup` to lookup on the RIS database. Contains all the IPs.
252 | 
253 | ## Routing Information Service cache (redis, port 6581)
254 | 
255 | *Usage*: Lookup IPs against the RIPE's RIS database
256 | 
257 | Pop IPs from `for_ris_lookup`.
258 | 
259 | Creates the following hashes:
260 | 
261 | ```python
262 | IP = {'asn': <asn>, 'prefix': <prefix>, 'description': <description>}
263 | ```
264 | 
265 | ## Ranking Information cache (redis, port 6582)
266 | 
267 | *Usage*: Store the current list of known ASNs at RIPE, and the prefixes originating from them.
268 | 
269 | Creates the following sets:
270 | 
271 | ```python
272 | asns = set([<asn>, ...])
273 | <asn>|v4 = set([<ipv4_prefix>, ...])
274 | <asn>|v6 = set([<ipv6_prefix>, ...])
275 | ```
276 | 
277 | And the following keys:
278 | 
279 | ```python
280 | <asn>|v4|ipcount = <Total amount of IP v4 addresses originating this AS>
281 | <asn>|v6|ipcount = <Total amount of IP v6 addresses originating this AS>
282 | ```
283 | 
284 | ## Long term storage (kvrocks, port 5188)
285 | 
286 | *Usage*: Stores the IPs with the required meta informations required for ranking.
287 | 
288 | Pop UUIDs from `to_insert`, get the hashes with that key
289 | 
290 | Use the IP from that hash to get the RIS informations.
291 | 
292 | Creates the following sets:
293 | 
294 | ```python
295 | # All the sources, by day
296 | <YYYY-MM-DD>|sources = set([<source>, ...])
297 | # All the ASNs by source, by day
298 | <YYYY-MM-DD>|<source> -> set([<asn>, ...])
299 | # All the prefixes, by ASN, by source, by day
300 | <YYYY-MM-DD>|<source>|<asn> -> set([<prefix>, ...])
301 | # All the tuples (ip, datetime), by prefixes, by ASN, by source, by day
302 | <YYYY-MM-DD>|<source>|<asn>|<prefix> -> set([<ip>|<datetime>, ...])
303 | ```
304 | 


--------------------------------------------------------------------------------
/bgpranking/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/__init__.py


--------------------------------------------------------------------------------
/bgpranking/bgpranking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | import re
  6 | 
  7 | from redis import ConnectionPool, Redis
  8 | from redis.connection import UnixDomainSocketConnection
  9 | 
 10 | from .default import get_config, get_socket_path
 11 | 
 12 | from typing import TypeVar, Union, Optional, Dict, Any, List, Tuple
 13 | import datetime
 14 | from datetime import timedelta
 15 | from dateutil.parser import parse
 16 | from collections import defaultdict
 17 | 
 18 | import json
 19 | 
 20 | from .default import InvalidDateFormat
 21 | from .helpers import get_modules
 22 | from .statsripe import StatsRIPE
 23 | 
 24 | Dates = TypeVar('Dates', datetime.datetime, datetime.date, str)
 25 | 
 26 | 
 27 | class BGPRanking():
 28 | 
 29 |     def __init__(self) -> None:
 30 |         self.logger = logging.getLogger(f'{self.__class__.__name__}')
 31 |         self.logger.setLevel(get_config('generic', 'loglevel'))
 32 | 
 33 |         self.cache_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
 34 |                                                          path=get_socket_path('cache'), decode_responses=True)
 35 | 
 36 |         self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
 37 |         self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
 38 |         self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
 39 | 
 40 |     @property
 41 |     def cache(self):
 42 |         return Redis(connection_pool=self.cache_pool, db=1)
 43 | 
 44 |     def check_redis_up(self) -> bool:
 45 |         return self.cache.ping()
 46 | 
 47 |     def __normalize_date(self, date: Optional[Dates]) -> str:
 48 |         if not date:
 49 |             return datetime.date.today().isoformat()
 50 |         if isinstance(date, datetime.datetime):
 51 |             return date.date().isoformat()
 52 |         elif isinstance(date, datetime.date):
 53 |             return date.isoformat()
 54 |         elif isinstance(date, str):
 55 |             try:
 56 |                 return parse(date).date().isoformat()
 57 |             except ValueError:
 58 |                 raise InvalidDateFormat('Unable to parse the date. Should be YYYY-MM-DD.')
 59 | 
 60 |     def _ranking_cache_wrapper(self, key):
 61 |         if not self.cache.exists(key):
 62 |             if self.ranking.exists(key):
 63 |                 try:
 64 |                     content: List[Tuple[bytes, float]] = self.ranking.zrangebyscore(key, '-Inf', '+Inf', withscores=True)
 65 |                     # Cache for 10 hours
 66 |                     self.cache.zadd(key, {value: rank for value, rank in content})
 67 |                     self.cache.expire(key, 36000)
 68 |                 except Exception as e:
 69 |                     self.logger.exception(f'Something went poorly when caching {key}.')
 70 |                     raise e
 71 | 
 72 |     def asns_global_ranking(self, date: Optional[Dates]=None, source: Union[list, str]='',
 73 |                             ipversion: str='v4', limit: int=100):
 74 |         '''Aggregated ranking of all the ASNs known in the system, weighted by source.'''
 75 |         to_return: Dict[str, Any] = {
 76 |             'meta': {'ipversion': ipversion, 'limit': limit},
 77 |             'source': source,
 78 |             'response': set()
 79 |         }
 80 |         d = self.__normalize_date(date)
 81 |         to_return['meta']['date'] = d
 82 |         if source:
 83 |             if isinstance(source, list):
 84 |                 keys = []
 85 |                 for s in source:
 86 |                     key = f'{d}|{s}|asns|{ipversion}'
 87 |                     self._ranking_cache_wrapper(key)
 88 |                     keys.append(key)
 89 |                 # union the ranked sets
 90 |                 key = '|'.join(sorted(source)) + f'|{d}|asns|{ipversion}'
 91 |                 if not self.cache.exists(key):
 92 |                     self.cache.zunionstore(key, keys)
 93 |             else:
 94 |                 key = f'{d}|{source}|asns|{ipversion}'
 95 |         else:
 96 |             key = f'{d}|asns|{ipversion}'
 97 |         self._ranking_cache_wrapper(key)
 98 |         to_return['response'] = self.cache.zrevrange(key, start=0, end=limit, withscores=True)
 99 |         return to_return
100 | 
101 |     def asn_details(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
102 |                     ipversion: str='v4'):
103 |         '''Aggregated ranking of all the prefixes anounced by the given ASN, weighted by source.'''
104 |         to_return: Dict[str, Any] = {
105 |             'meta': {'asn': asn, 'ipversion': ipversion, 'source': source},
106 |             'response': set()
107 |         }
108 | 
109 |         d = self.__normalize_date(date)
110 |         to_return['meta']['date'] = d
111 |         if source:
112 |             if isinstance(source, list):
113 |                 keys = []
114 |                 for s in source:
115 |                     key = f'{d}|{s}|{asn}|{ipversion}|prefixes'
116 |                     self._ranking_cache_wrapper(key)
117 |                     keys.append(key)
118 |                 # union the ranked sets
119 |                 key = '|'.join(sorted(source)) + f'|{d}|{asn}|{ipversion}'
120 |                 if not self.cache.exists(key):
121 |                     self.cache.zunionstore(key, keys)
122 |             else:
123 |                 key = f'{d}|{source}|{asn}|{ipversion}|prefixes'
124 |         else:
125 |             key = f'{d}|{asn}|{ipversion}'
126 |         self._ranking_cache_wrapper(key)
127 |         to_return['response'] = self.cache.zrevrange(key, start=0, end=-1, withscores=True)
128 |         return to_return
129 | 
130 |     def asn_rank(self, asn: int, date: Optional[Dates]=None, source: Union[list, str]='',
131 |                  ipversion: str='v4', with_position: bool=False):
132 |         '''Get the rank of a single ASN, weighted by source.'''
133 |         to_return: Dict[str, Any] = {
134 |             'meta': {'asn': asn, 'ipversion': ipversion,
135 |                      'source': source, 'with_position': with_position},
136 |             'response': 0.0
137 |         }
138 | 
139 |         d = self.__normalize_date(date)
140 |         to_return['meta']['date'] = d
141 |         if source:
142 |             to_return['meta']['source'] = source
143 |             if isinstance(source, list):
144 |                 keys = []
145 |                 for s in source:
146 |                     key = f'{d}|{s}|{asn}|{ipversion}'
147 |                     self._ranking_cache_wrapper(key)
148 |                     keys.append(key)
149 |                 r = sum(float(self.cache.get(key)) for key in keys if self.cache.exists(key))
150 |             else:
151 |                 key = f'{d}|{source}|{asn}|{ipversion}'
152 |                 self._ranking_cache_wrapper(key)
153 |                 r = self.cache.get(key)
154 |         else:
155 |             key = f'{d}|asns|{ipversion}'
156 |             self._ranking_cache_wrapper(key)
157 |             r = self.cache.zscore(key, asn)
158 |         if not r:
159 |             r = 0
160 |         if with_position and not source:
161 |             position = self.cache.zrevrank(key, asn)
162 |             if position is not None:
163 |                 position += 1
164 |             to_return['response'] = {'rank': float(r), 'position': position,
165 |                                      'total_known_asns': self.cache.zcard(key)}
166 |         else:
167 |             to_return['response'] = float(r)
168 |         return to_return
169 | 
170 |     def get_sources(self, date: Optional[Dates]=None):
171 |         '''Get the sources availables for a specific day (default: today).'''
172 |         to_return: Dict[str, Any] = {'meta': {}, 'response': set()}
173 | 
174 |         d = self.__normalize_date(date)
175 |         to_return['meta']['date'] = d
176 |         key = f'{d}|sources'
177 |         to_return['response'] = self.storage.smembers(key)
178 |         return to_return
179 | 
180 |     def get_asn_descriptions(self, asn: int, all_descriptions=False) -> Dict[str, Any]:
181 |         to_return: Dict[str, Union[Dict, List, str]] = {
182 |             'meta': {'asn': asn, 'all_descriptions': all_descriptions},
183 |             'response': []
184 |         }
185 |         descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
186 |         if all_descriptions or not descriptions:
187 |             to_return['response'] = descriptions
188 |         else:
189 |             to_return['response'] = descriptions[sorted(descriptions.keys(), reverse=True)[0]]
190 |         return to_return
191 | 
192 |     def get_prefix_ips(self, asn: int, prefix: str, date: Optional[Dates]=None,
193 |                        source: Union[list, str]='', ipversion: str='v4'):
194 |         to_return: Dict[str, Any] = {
195 |             'meta': {'asn': asn, 'prefix': prefix, 'ipversion': ipversion,
196 |                      'source': source},
197 |             'response': defaultdict(list)
198 |         }
199 | 
200 |         d = self.__normalize_date(date)
201 |         to_return['meta']['date'] = d
202 | 
203 |         if source:
204 |             to_return['meta']['source'] = source
205 |             if isinstance(source, list):
206 |                 sources = source
207 |             else:
208 |                 sources = [source]
209 |         else:
210 |             sources = self.get_sources(d)['response']
211 | 
212 |         for source in sources:
213 |             ips = set([ip_ts.split('|')[0]
214 |                        for ip_ts in self.storage.smembers(f'{d}|{source}|{asn}|{prefix}')])
215 |             [to_return['response'][ip].append(source) for ip in ips]
216 |         return to_return
217 | 
218 |     def get_asn_history(self, asn: int, period: int=100, source: Union[list, str]='',
219 |                         ipversion: str='v4', date: Optional[Dates]=None):
220 |         to_return: Dict[str, Any] = {
221 |             'meta': {'asn': asn, 'period': period, 'ipversion': ipversion, 'source': source},
222 |             'response': []
223 |         }
224 | 
225 |         if date is None:
226 |             python_date: datetime.date = datetime.date.today()
227 |         elif isinstance(date, str):
228 |             python_date = parse(date).date()
229 |         elif isinstance(date, datetime.datetime):
230 |             python_date = date.date()
231 |         else:
232 |             python_date = date
233 | 
234 |         to_return['meta']['date'] = python_date.isoformat()
235 | 
236 |         for i in range(period):
237 |             d = python_date - timedelta(days=i)
238 |             rank = self.asn_rank(asn, d, source, ipversion)
239 |             if 'response' not in rank:
240 |                 rank = 0
241 |             to_return['response'].insert(0, (d.isoformat(), rank['response']))
242 |         return to_return
243 | 
244 |     def country_rank(self, country: str, date: Optional[Dates]=None, source: Union[list, str]='',
245 |                      ipversion: str='v4'):
246 |         to_return: Dict[str, Any] = {
247 |             'meta': {'country': country, 'ipversion': ipversion, 'source': source},
248 |             'response': []
249 |         }
250 | 
251 |         d = self.__normalize_date(date)
252 |         to_return['meta']['date'] = d
253 | 
254 |         ripe = StatsRIPE()
255 |         response = ripe.country_asns(country, query_time=d, details=1)
256 |         if (not response.get('data') or not response['data'].get('countries') or not
257 |                 response['data']['countries'][0].get('routed')):
258 |             logging.warning(f'Invalid response: {response}')
259 |             return 0, [(0, 0)]
260 |         routed_asns = re.findall(r"AsnSingle\(([\d]*)\)", response['data']['countries'][0]['routed'])
261 |         ranks = [self.asn_rank(asn, d, source, ipversion)['response'] for asn in routed_asns]
262 |         to_return['response'] = [sum(ranks), zip(routed_asns, ranks)]
263 |         return to_return
264 | 
265 |     def country_history(self, country: Union[list, str], period: int=30, source: Union[list, str]='',
266 |                         ipversion: str='v4', date: Optional[Dates]=None):
267 |         to_return: Dict[str, Any] = {
268 |             'meta': {'country': country, 'ipversion': ipversion, 'source': source},
269 |             'response': defaultdict(list)
270 |         }
271 |         if date is None:
272 |             python_date: datetime.date = datetime.date.today()
273 |         elif isinstance(date, str):
274 |             python_date = parse(date).date()
275 |         elif isinstance(date, datetime.datetime):
276 |             python_date = date.date()
277 |         else:
278 |             python_date = date
279 | 
280 |         if isinstance(country, str):
281 |             country = [country]
282 |         for c in country:
283 |             for i in range(period):
284 |                 d = python_date - timedelta(days=i)
285 |                 rank, details = self.country_rank(c, d, source, ipversion)['response']
286 |                 if rank is None:
287 |                     rank = 0
288 |                 to_return['response'][c].insert(0, (d.isoformat(), rank, list(details)))
289 |         return to_return
290 | 
291 |     def get_source_config(self):
292 |         pass
293 | 
294 |     def get_sources_configs(self):
295 |         loaded = []
296 |         for modulepath in get_modules():
297 |             with open(modulepath) as f:
298 |                 loaded.append(json.load(f))
299 |         return {'{}-{}'.format(config['vendor'], config['name']): config for config in loaded}
300 | 


--------------------------------------------------------------------------------
/bgpranking/default/__init__.py:
--------------------------------------------------------------------------------
 1 | env_global_name: str = 'BGPRANKING_HOME'
 2 | 
 3 | from .exceptions import (BGPRankingException, FetcherException, ArchiveException,  # noqa
 4 |                          CreateDirectoryException, MissingEnv, InvalidDateFormat,  # noqa
 5 |                          MissingConfigFile, MissingConfigEntry, ThirdPartyUnreachable) # noqa
 6 | 
 7 | # NOTE: the imports below are there to avoid too long paths when importing the
 8 | # classes/methods in the rest of the project while keeping all that in a subdirectory
 9 | # and allow to update them easily.
10 | # You should not have to change anything in this file below this line.
11 | 
12 | from .abstractmanager import AbstractManager  # noqa
13 | 
14 | from .exceptions import MissingEnv, CreateDirectoryException, ConfigError  # noqa
15 | 
16 | from .helpers import get_homedir, load_configs, get_config, safe_create_dir, get_socket_path, try_make_file  # noqa
17 | 


--------------------------------------------------------------------------------
/bgpranking/default/abstractmanager.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import asyncio
  5 | import logging
  6 | import signal
  7 | import time
  8 | from abc import ABC
  9 | from datetime import datetime, timedelta
 10 | from subprocess import Popen
 11 | from typing import List, Optional, Tuple
 12 | 
 13 | from redis import Redis
 14 | from redis.exceptions import ConnectionError
 15 | 
 16 | from .helpers import get_socket_path
 17 | 
 18 | 
 19 | class AbstractManager(ABC):
 20 | 
 21 |     script_name: str
 22 | 
 23 |     def __init__(self, loglevel: int=logging.DEBUG):
 24 |         self.loglevel = loglevel
 25 |         self.logger = logging.getLogger(f'{self.__class__.__name__}')
 26 |         self.logger.setLevel(loglevel)
 27 |         self.logger.info(f'Initializing {self.__class__.__name__}')
 28 |         self.process: Optional[Popen] = None
 29 |         self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
 30 | 
 31 |     @staticmethod
 32 |     def is_running() -> List[Tuple[str, float]]:
 33 |         try:
 34 |             r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
 35 |             return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
 36 |         except ConnectionError:
 37 |             print('Unable to connect to redis, the system is down.')
 38 |             return []
 39 | 
 40 |     @staticmethod
 41 |     def force_shutdown():
 42 |         try:
 43 |             r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
 44 |             r.set('shutdown', 1)
 45 |         except ConnectionError:
 46 |             print('Unable to connect to redis, the system is down.')
 47 | 
 48 |     def set_running(self) -> None:
 49 |         self.__redis.zincrby('running', 1, self.script_name)
 50 | 
 51 |     def unset_running(self) -> None:
 52 |         current_running = self.__redis.zincrby('running', -1, self.script_name)
 53 |         if int(current_running) <= 0:
 54 |             self.__redis.zrem('running', self.script_name)
 55 | 
 56 |     def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
 57 |         if shutdown_check > sleep_in_sec:
 58 |             shutdown_check = sleep_in_sec
 59 |         sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
 60 |         while sleep_until > datetime.now():
 61 |             time.sleep(shutdown_check)
 62 |             if self.shutdown_requested():
 63 |                 return False
 64 |         return True
 65 | 
 66 |     async def long_sleep_async(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
 67 |         if shutdown_check > sleep_in_sec:
 68 |             shutdown_check = sleep_in_sec
 69 |         sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
 70 |         while sleep_until > datetime.now():
 71 |             await asyncio.sleep(shutdown_check)
 72 |             if self.shutdown_requested():
 73 |                 return False
 74 |         return True
 75 | 
 76 |     def shutdown_requested(self) -> bool:
 77 |         try:
 78 |             return True if self.__redis.exists('shutdown') else False
 79 |         except ConnectionRefusedError:
 80 |             return True
 81 |         except ConnectionError:
 82 |             return True
 83 | 
 84 |     def _to_run_forever(self) -> None:
 85 |         pass
 86 | 
 87 |     def run(self, sleep_in_sec: int) -> None:
 88 |         self.logger.info(f'Launching {self.__class__.__name__}')
 89 |         try:
 90 |             while True:
 91 |                 if self.shutdown_requested():
 92 |                     break
 93 |                 try:
 94 |                     if self.process:
 95 |                         if self.process.poll() is not None:
 96 |                             self.logger.critical(f'Unable to start {self.script_name}.')
 97 |                             break
 98 |                     else:
 99 |                         self.set_running()
100 |                         self._to_run_forever()
101 |                 except Exception:
102 |                     self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
103 |                 finally:
104 |                     if not self.process:
105 |                         # self.process means we run an external script, all the time,
106 |                         # do not unset between sleep.
107 |                         self.unset_running()
108 |                 if not self.long_sleep(sleep_in_sec):
109 |                     break
110 |         except KeyboardInterrupt:
111 |             self.logger.warning(f'{self.script_name} killed by user.')
112 |         finally:
113 |             if self.process:
114 |                 try:
115 |                     # Killing everything if possible.
116 |                     self.process.send_signal(signal.SIGWINCH)
117 |                     self.process.send_signal(signal.SIGTERM)
118 |                 except Exception:
119 |                     pass
120 |             try:
121 |                 self.unset_running()
122 |             except Exception:
123 |                 # the services can already be down at that point.
124 |                 pass
125 |             self.logger.info(f'Shutting down {self.__class__.__name__}')
126 | 
127 |     async def _to_run_forever_async(self) -> None:
128 |         pass
129 | 
130 |     async def run_async(self, sleep_in_sec: int) -> None:
131 |         self.logger.info(f'Launching {self.__class__.__name__}')
132 |         try:
133 |             while True:
134 |                 if self.shutdown_requested():
135 |                     break
136 |                 try:
137 |                     if self.process:
138 |                         if self.process.poll() is not None:
139 |                             self.logger.critical(f'Unable to start {self.script_name}.')
140 |                             break
141 |                     else:
142 |                         self.set_running()
143 |                         await self._to_run_forever_async()
144 |                 except Exception:
145 |                     self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
146 |                 finally:
147 |                     if not self.process:
148 |                         # self.process means we run an external script, all the time,
149 |                         # do not unset between sleep.
150 |                         self.unset_running()
151 |                 if not await self.long_sleep_async(sleep_in_sec):
152 |                     break
153 |         except KeyboardInterrupt:
154 |             self.logger.warning(f'{self.script_name} killed by user.')
155 |         finally:
156 |             if self.process:
157 |                 try:
158 |                     # Killing everything if possible.
159 |                     self.process.send_signal(signal.SIGWINCH)
160 |                     self.process.send_signal(signal.SIGTERM)
161 |                 except Exception:
162 |                     pass
163 |             try:
164 |                 self.unset_running()
165 |             except Exception:
166 |                 # the services can already be down at that point.
167 |                 pass
168 |             self.logger.info(f'Shutting down {self.__class__.__name__}')
169 | 


--------------------------------------------------------------------------------
/bgpranking/default/exceptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | class BGPRankingException(Exception):
 6 |     pass
 7 | 
 8 | 
 9 | class FetcherException(BGPRankingException):
10 |     pass
11 | 
12 | 
13 | class ArchiveException(BGPRankingException):
14 |     pass
15 | 
16 | 
17 | class CreateDirectoryException(BGPRankingException):
18 |     pass
19 | 
20 | 
21 | class MissingEnv(BGPRankingException):
22 |     pass
23 | 
24 | 
25 | class InvalidDateFormat(BGPRankingException):
26 |     pass
27 | 
28 | 
29 | class MissingConfigFile(BGPRankingException):
30 |     pass
31 | 
32 | 
33 | class MissingConfigEntry(BGPRankingException):
34 |     pass
35 | 
36 | 
37 | class ThirdPartyUnreachable(BGPRankingException):
38 |     pass
39 | 
40 | 
41 | class ConfigError(BGPRankingException):
42 |     pass
43 | 


--------------------------------------------------------------------------------
/bgpranking/default/helpers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | import json
  4 | import logging
  5 | import os
  6 | from functools import lru_cache
  7 | from pathlib import Path
  8 | from typing import Any, Dict, Optional, Union
  9 | 
 10 | from . import env_global_name
 11 | from .exceptions import ConfigError, CreateDirectoryException, MissingEnv
 12 | 
 13 | configs: Dict[str, Dict[str, Any]] = {}
 14 | logger = logging.getLogger('Helpers')
 15 | 
 16 | 
 17 | @lru_cache(64)
 18 | def get_homedir() -> Path:
 19 |     if not os.environ.get(env_global_name):
 20 |         # Try to open a .env file in the home directory if it exists.
 21 |         if (Path(__file__).resolve().parent.parent.parent / '.env').exists():
 22 |             with (Path(__file__).resolve().parent.parent.parent / '.env').open() as f:
 23 |                 for line in f:
 24 |                     key, value = line.strip().split('=', 1)
 25 |                     if value[0] in ['"', "'"]:
 26 |                         value = value[1:-1]
 27 |                     os.environ[key] = value
 28 | 
 29 |     if not os.environ.get(env_global_name):
 30 |         guessed_home = Path(__file__).resolve().parent.parent.parent
 31 |         raise MissingEnv(f"{env_global_name} is missing. \
 32 | Run the following command (assuming you run the code from the clonned repository):\
 33 |     export {env_global_name}='{guessed_home}'")
 34 |     return Path(os.environ[env_global_name])
 35 | 
 36 | 
 37 | @lru_cache(64)
 38 | def load_configs(path_to_config_files: Optional[Union[str, Path]]=None):
 39 |     global configs
 40 |     if configs:
 41 |         return
 42 |     if path_to_config_files:
 43 |         if isinstance(path_to_config_files, str):
 44 |             config_path = Path(path_to_config_files)
 45 |         else:
 46 |             config_path = path_to_config_files
 47 |     else:
 48 |         config_path = get_homedir() / 'config'
 49 |     if not config_path.exists():
 50 |         raise ConfigError(f'Configuration directory {config_path} does not exists.')
 51 |     elif not config_path.is_dir():
 52 |         raise ConfigError(f'Configuration directory {config_path} is not a directory.')
 53 | 
 54 |     configs = {}
 55 |     for path in config_path.glob('*.json'):
 56 |         with path.open() as _c:
 57 |             configs[path.stem] = json.load(_c)
 58 | 
 59 | 
 60 | @lru_cache(64)
 61 | def get_config(config_type: str, entry: str, quiet: bool=False) -> Any:
 62 |     """Get an entry from the given config_type file. Automatic fallback to the sample file"""
 63 |     global configs
 64 |     if not configs:
 65 |         load_configs()
 66 |     if config_type in configs:
 67 |         if entry in configs[config_type]:
 68 |             return configs[config_type][entry]
 69 |         else:
 70 |             if not quiet:
 71 |                 logger.warning(f'Unable to find {entry} in config file.')
 72 |     else:
 73 |         if not quiet:
 74 |             logger.warning(f'No {config_type} config file available.')
 75 |     if not quiet:
 76 |         logger.warning(f'Falling back on sample config, please initialize the {config_type} config file.')
 77 |     with (get_homedir() / 'config' / f'{config_type}.json.sample').open() as _c:
 78 |         sample_config = json.load(_c)
 79 |     return sample_config[entry]
 80 | 
 81 | 
 82 | def safe_create_dir(to_create: Path) -> None:
 83 |     if to_create.exists() and not to_create.is_dir():
 84 |         raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory')
 85 |     to_create.mkdir(parents=True, exist_ok=True)
 86 | 
 87 | 
 88 | def get_socket_path(name: str) -> str:
 89 |     mapping = {
 90 |         'cache': Path('cache', 'cache.sock'),
 91 |         'intake': Path('temp', 'intake.sock'),
 92 |         'prepare': Path('temp', 'prepare.sock')
 93 |     }
 94 |     return str(get_homedir() / mapping[name])
 95 | 
 96 | 
 97 | def try_make_file(filename: Path):
 98 |     try:
 99 |         filename.touch(exist_ok=False)
100 |         return True
101 |     except FileExistsError:
102 |         return False
103 | 


--------------------------------------------------------------------------------
/bgpranking/helpers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | from functools import lru_cache
 6 | from pathlib import Path
 7 | from typing import Dict, List
 8 | 
 9 | import requests
10 | 
11 | from pyipasnhistory import IPASNHistory
12 | 
13 | from .default import get_homedir, get_config, ThirdPartyUnreachable, safe_create_dir
14 | 
15 | 
16 | @lru_cache(64)
17 | def get_data_dir() -> Path:
18 |     capture_dir = get_homedir() / 'rawdata'
19 |     safe_create_dir(capture_dir)
20 |     return capture_dir
21 | 
22 | 
23 | @lru_cache(64)
24 | def get_modules_dir() -> Path:
25 |     modules_dir = get_homedir() / 'config' / 'modules'
26 |     safe_create_dir(modules_dir)
27 |     return modules_dir
28 | 
29 | 
30 | @lru_cache(64)
31 | def get_modules() -> List[Path]:
32 |     return [modulepath for modulepath in get_modules_dir().glob('*.json')]
33 | 
34 | 
35 | def load_all_modules_configs() -> Dict[str, Dict]:
36 |     configs = {}
37 |     for p in get_modules():
38 |         with p.open() as f:
39 |             j = json.load(f)
40 |             configs[f"{j['vendor']}-{j['name']}"] = j
41 |     return configs
42 | 
43 | 
44 | def get_ipasn():
45 |     ipasnhistory_url = get_config('generic', 'ipasnhistory_url')
46 |     ipasn = IPASNHistory(ipasnhistory_url)
47 |     if not ipasn.is_up:
48 |         raise ThirdPartyUnreachable(f"Unable to reach IPASNHistory on {ipasnhistory_url}")
49 |     return ipasn
50 | 
51 | 
52 | def sanity_check_ipasn(ipasn):
53 |     try:
54 |         meta = ipasn.meta()
55 |     except requests.exceptions.ConnectionError:
56 |         return False, "IP ASN History is not reachable, try again later."
57 | 
58 |     if 'error' in meta:
59 |         raise ThirdPartyUnreachable(f'IP ASN History has a problem: {meta["error"]}')
60 | 
61 |     v4_percent = meta['cached_dates']['caida']['v4']['percent']
62 |     v6_percent = meta['cached_dates']['caida']['v6']['percent']
63 |     if v4_percent < 90 or v6_percent < 90:  # (this way it works if we only load 10 days)
64 |         # Try again later.
65 |         return False, f"IP ASN History is not ready: v4 {v4_percent}% / v6 {v6_percent}% loaded"
66 |     return True, f"IP ASN History is ready: v4 {v4_percent}% / v6 {v6_percent}% loaded"
67 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bgpranking/parsers/__init__.py


--------------------------------------------------------------------------------
/bgpranking/parsers/abusech.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | import re
 6 | from io import BytesIO
 7 | 
 8 | from typing import List
 9 | 
10 | 
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 |     self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0])
13 |     return self.extract_ipv4(f.getvalue())
14 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/abusech_feodo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | import re
 6 | from io import BytesIO
 7 | 
 8 | from typing import List
 9 | 
10 | 
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 |     self.datetime = parse(re.findall(b'# Last updated: (.*)#', f.getvalue())[0])
13 |     return self.extract_ipv4(f.getvalue())
14 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/abusech_threatfox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | 
 6 | from datetime import datetime
 7 | from io import BytesIO
 8 | from typing import List
 9 | 
10 | 
11 | def parse_raw_file(self, f: BytesIO) -> List[str]:
12 |     self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
13 |     to_return = []
14 |     for entry in json.loads(f.getvalue().decode()).values():
15 |         ip_port = entry[0]['ioc_value']
16 |         to_return.append(ip_port.split(':')[0])
17 |     return to_return
18 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/dshield.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | import re
 6 | from io import BytesIO
 7 | from typing import List
 8 | 
 9 | 
10 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
11 |     self.datetime = parse(re.findall(b'# updated (.*)\n', f.getvalue())[0])
12 |     iplist = self.extract_ipv4(f.getvalue())
13 |     # The IPS have leading 0s. Getting tid of them directly here.
14 |     return self.strip_leading_zeros(iplist)
15 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/malc0de.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | import re
 6 | from io import BytesIO
 7 | 
 8 | from typing import List
 9 | 
10 | 
11 | def parse_raw_file(self, f: BytesIO) -> List[bytes]:
12 |     self.datetime = parse(re.findall(b'// Last updated (.*)\n', f.getvalue())[0])
13 |     return self.extract_ipv4(f.getvalue())
14 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/nothink.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | import re
 6 | from io import BytesIO
 7 | 
 8 | from typing import List
 9 | 
10 | def parse_raw_file(self, f: BytesIO)  -> List[bytes]:
11 |     self.datetime = parse(re.findall(b'# Generated (.*)\n', f.getvalue())[0])
12 |     return self.extract_ipv4(f.getvalue())
13 | 


--------------------------------------------------------------------------------
/bgpranking/parsers/shadowserver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from dateutil.parser import parse
 5 | from csv import DictReader
 6 | from io import BytesIO, StringIO
 7 | from typing import Tuple, Generator
 8 | from datetime import datetime
 9 | 
10 | 
11 | def parse_raw_file(self, f: BytesIO) -> Generator[Tuple[str, datetime], None, None]:
12 |     default_ts = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
13 |     reader = DictReader(StringIO(f.getvalue().decode()))
14 |     for row in reader:
15 |         if 'timestamp' in row:
16 |             ts = parse(row['timestamp'])
17 |         else:
18 |             ts = default_ts
19 | 
20 |         if 'ip' in row:
21 |             ip = row['ip']
22 |         elif 'src_ip' in row:
23 |             # For sinkhole6_http
24 |             ip = row['src_ip']
25 |         else:
26 |             self.logger.critical(f'No IPs in the list {self.source}.')
27 |             break
28 |         yield ip, ts
29 | 


--------------------------------------------------------------------------------
/bgpranking/statsripe.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import copy
  5 | import json
  6 | 
  7 | from datetime import datetime, timedelta
  8 | from enum import Enum
  9 | from ipaddress import IPv4Address, IPv6Address, IPv4Network, IPv6Network
 10 | from typing import TypeVar, Optional, Dict, Any
 11 | 
 12 | import requests
 13 | 
 14 | from dateutil.parser import parse
 15 | 
 16 | from .helpers import get_homedir, safe_create_dir
 17 | 
 18 | IPTypes = TypeVar('IPTypes', IPv4Address, IPv6Address, 'str')
 19 | PrefixTypes = TypeVar('PrefixTypes', IPv4Network, IPv6Network, 'str')
 20 | TimeTypes = TypeVar('TimeTypes', datetime, 'str')
 21 | 
 22 | 
 23 | class ASNsTypes(Enum):
 24 |     transiting = 't'
 25 |     originating = 'o'
 26 |     all_types = 't,o'
 27 |     undefined = ''
 28 | 
 29 | 
 30 | class AddressFamilies(Enum):
 31 |     ipv4 = 'v4'
 32 |     ipv6 = 'v6'
 33 |     all_families = 'v4,v6'
 34 |     undefined = ''
 35 | 
 36 | 
 37 | class Noise(Enum):
 38 |     keep = 'keep'
 39 |     remove = 'filter'
 40 | 
 41 | 
 42 | class StatsRIPE():
 43 | 
 44 |     def __init__(self, sourceapp='bgpranking-ng - CIRCL'):
 45 |         self.url = "https://stat.ripe.net/data/{method}/data.json?{parameters}"
 46 |         self.sourceapp = sourceapp
 47 |         self.cache_dir = get_homedir() / 'rawdata' / 'stats_ripe'
 48 | 
 49 |     def __time_to_text(self, query_time: TimeTypes) -> str:
 50 |         if isinstance(query_time, datetime):
 51 |             return query_time.isoformat()
 52 |         return query_time
 53 | 
 54 |     def _get_cache(self, method, parameters):
 55 |         '''The dataset is updated every 8 hours (midnight, 8, 16).
 56 |             If parameters has a key 'query_time' on any of these hours, try to get it.
 57 |             If not, try to get the closest one.
 58 |             If it has nothing, assume non and try to get the closest timestamp
 59 |             When caching, get query_time from response['data']['query_time']
 60 |         '''
 61 |         parameters = copy.copy(parameters)
 62 |         if not parameters.get('query_time'):
 63 |             # use timedelta because the generation of the new dataset takes a while.
 64 |             parameters['query_time'] = (datetime.now() - timedelta(hours=8)).isoformat()
 65 | 
 66 |         d = parse(parameters['query_time'])
 67 |         if d.hour == 8 and d.minute == 0 and d.second == 0:
 68 |             pass
 69 |         else:
 70 |             d = d.replace(hour=min([0, 8, 16], key=lambda x: abs(x - d.hour)),
 71 |                           minute=0, second=0, microsecond=0)
 72 |             parameters['query_time'] = d.isoformat()
 73 |         cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()])
 74 |         c_path = self.cache_dir / method / cache_filename
 75 |         if c_path.exists():
 76 |             with open(c_path, 'r') as f:
 77 |                 return json.load(f)
 78 |         return False
 79 | 
 80 |     def _save_cache(self, method, parameters, response):
 81 |         parameters['query_time'] = response['data']['query_time']
 82 |         cache_filename = '&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()])
 83 |         safe_create_dir(self.cache_dir / method)
 84 |         c_path = self.cache_dir / method / cache_filename
 85 |         with open(c_path, 'w') as f:
 86 |             json.dump(response, f, indent=2)
 87 | 
 88 |     def _get(self, method: str, parameters: Dict) -> Dict:
 89 |         parameters['sourceapp'] = self.sourceapp
 90 |         cached = self._get_cache(method, parameters)
 91 |         if cached:
 92 |             return cached
 93 |         url = self.url.format(method=method, parameters='&'.join(['{}={}'.format(k, str(v).lower()) for k, v in parameters.items()]))
 94 |         response = requests.get(url)
 95 |         j_content = response.json()
 96 |         self._save_cache(method, parameters, j_content)
 97 |         return j_content
 98 | 
 99 |     def network_info(self, ip: IPTypes) -> dict:
100 |         parameters = {'resource': ip}
101 |         return self._get('network-info', parameters)
102 | 
103 |     def prefix_overview(self, prefix: PrefixTypes, min_peers_seeing: int= 0,
104 |                         max_related: int=0, query_time: Optional[TimeTypes]=None) -> dict:
105 |         parameters: Dict[str, Any] = {'resource': prefix}
106 |         if min_peers_seeing:
107 |             parameters['min_peers_seeing'] = min_peers_seeing
108 |         if max_related:
109 |             parameters['max_related'] = max_related
110 |         if query_time:
111 |             parameters['query_time'] = self.__time_to_text(query_time)
112 |         return self._get('prefix-overview', parameters)
113 | 
114 |     def ris_asns(self, query_time: Optional[TimeTypes]=None, list_asns: bool=False, asn_types: ASNsTypes=ASNsTypes.undefined):
115 |         parameters: Dict[str, Any] = {}
116 |         if list_asns:
117 |             parameters['list_asns'] = list_asns
118 |         if asn_types:
119 |             parameters['asn_types'] = asn_types.value
120 |         if query_time:
121 |             parameters['query_time'] = self.__time_to_text(query_time)
122 |         return self._get('ris-asns', parameters)
123 | 
124 |     def ris_prefixes(self, asn: int, query_time: Optional[TimeTypes]=None,
125 |                      list_prefixes: bool=False, types: ASNsTypes=ASNsTypes.undefined,
126 |                      af: AddressFamilies=AddressFamilies.undefined, noise: Noise=Noise.keep):
127 |         parameters: Dict[str, Any] = {'resource': str(asn)}
128 |         if query_time:
129 |             parameters['query_time'] = self.__time_to_text(query_time)
130 |         if list_prefixes:
131 |             parameters['list_prefixes'] = list_prefixes
132 |         if types:
133 |             parameters['types'] = types.value
134 |         if af:
135 |             parameters['af'] = af.value
136 |         if noise:
137 |             parameters['noise'] = noise.value
138 |         return self._get('ris-prefixes', parameters)
139 | 
140 |     def country_asns(self, country: str, details: int=0, query_time: Optional[TimeTypes]=None):
141 |         parameters: Dict[str, Any] = {'resource': country}
142 |         if details:
143 |             parameters['lod'] = details
144 |         if query_time:
145 |             parameters['query_time'] = self.__time_to_text(query_time)
146 |         return self._get('country-asns', parameters)
147 | 


--------------------------------------------------------------------------------
/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/bin/__init__.py


--------------------------------------------------------------------------------
/bin/archiver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | import logging
 6 | import zipfile
 7 | 
 8 | from collections import defaultdict
 9 | from datetime import date
10 | from logging import Logger
11 | from pathlib import Path
12 | 
13 | from dateutil import parser
14 | from dateutil.relativedelta import relativedelta
15 | 
16 | from bgpranking.default import safe_create_dir, AbstractManager
17 | from bgpranking.helpers import get_modules, get_data_dir
18 | 
19 | 
20 | logger = logging.getLogger('Archiver')
21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
22 |                     level=logging.INFO)
23 | 
24 | 
25 | class DeepArchive():
26 | 
27 |     def __init__(self, config_file: Path, logger: Logger):
28 |         '''Archive everyfile older than 2 month.'''
29 |         with config_file.open() as f:
30 |             module_parameters = json.load(f)
31 |         self.logger = logger
32 |         self.vendor = module_parameters['vendor']
33 |         self.listname = module_parameters['name']
34 |         self.directory = get_data_dir() / self.vendor / self.listname / 'archive'
35 |         safe_create_dir(self.directory)
36 |         self.deep_archive = self.directory / 'deep'
37 |         safe_create_dir(self.deep_archive)
38 | 
39 |     def archive(self):
40 |         to_archive = defaultdict(list)
41 |         today = date.today()
42 |         last_day_to_keep = date(today.year, today.month, 1) - relativedelta(months=2)
43 |         for p in self.directory.iterdir():
44 |             if not p.is_file():
45 |                 continue
46 |             filedate = parser.parse(p.name.split('.')[0]).date()
47 |             if filedate >= last_day_to_keep:
48 |                 continue
49 |             to_archive['{}.zip'.format(filedate.strftime('%Y%m'))].append(p)
50 |         if to_archive:
51 |             self.logger.info('Found old files. Archiving: {}'.format(', '.join(to_archive.keys())))
52 |         else:
53 |             self.logger.debug('No old files.')
54 |         for archivename, path_list in to_archive.items():
55 |             with zipfile.ZipFile(self.deep_archive / archivename, 'x', zipfile.ZIP_DEFLATED) as z:
56 |                 for f in path_list:
57 |                     z.write(f, f.name)
58 |             # Delete all the files if the archiving worked out properly
59 |             [f.unlink() for f in path_list]
60 | 
61 | 
62 | class ModulesArchiver(AbstractManager):
63 | 
64 |     def __init__(self, loglevel: int=logging.INFO):
65 |         super().__init__(loglevel)
66 |         self.script_name = 'archiver'
67 |         self.modules = [DeepArchive(path, self.logger) for path in get_modules()]
68 | 
69 |     def _to_run_forever(self):
70 |         [module.archive() for module in self.modules]
71 | 
72 | 
73 | def main():
74 |     archiver = ModulesArchiver()
75 |     archiver.run(sleep_in_sec=360000)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/bin/asn_descriptions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import logging
 5 | import re
 6 | import requests
 7 | 
 8 | from dateutil.parser import parse
 9 | from redis import Redis
10 | 
11 | from bgpranking.default import get_socket_path, safe_create_dir, AbstractManager, get_config
12 | from bgpranking.helpers import get_data_dir
13 | 
14 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
15 |                     level=logging.INFO)
16 | 
17 | 
18 | class ASNDescriptions(AbstractManager):
19 | 
20 |     def __init__(self, loglevel: int=logging.INFO):
21 |         super().__init__(loglevel)
22 |         self.script_name = 'asn_descr'
23 |         self.asn_meta = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), db=2, decode_responses=True)
24 |         self.logger.debug('Starting ASN History')
25 |         self.directory = get_data_dir() / 'asn_descriptions'
26 |         safe_create_dir(self.directory)
27 |         self.archives = self.directory / 'archive'
28 |         safe_create_dir(self.archives)
29 |         self.url = 'https://www.cidr-report.org/as2.0/autnums.html'
30 | 
31 |     def __update_available(self):
32 |         r = requests.head(self.url)
33 |         print(r.headers)
34 |         current_last_modified = parse(r.headers['Last-Modified'])
35 |         if not self.asn_meta.exists('ans_description_last_update'):
36 |             return True
37 |         last_update = parse(self.asn_meta.get('ans_description_last_update'))  # type: ignore
38 |         if last_update < current_last_modified:
39 |             return True
40 |         return False
41 | 
42 |     def load_descriptions(self):
43 |         if not self.__update_available():
44 |             self.logger.debug('No new file to import.')
45 |             return
46 |         self.logger.info('Importing new ASN descriptions.')
47 |         r = requests.get(self.url)
48 |         last_modified = parse(r.headers['Last-Modified']).isoformat()
49 |         p = self.asn_meta.pipeline()
50 |         new_asn = 0
51 |         new_description = 0
52 |         for asn, descr in re.findall('as=AS(.*)&.*</a> (.*)\n', r.text):
53 |             existing_descriptions = self.asn_meta.hgetall(f'{asn}|descriptions')
54 |             if not existing_descriptions:
55 |                 self.logger.debug(f'New ASN: {asn} - {descr}')
56 |                 p.hset(f'{asn}|descriptions', last_modified, descr)
57 |                 new_asn += 1
58 |             else:
59 |                 last_descr = sorted(existing_descriptions.keys(), reverse=True)[0]
60 |                 if descr != existing_descriptions[last_descr]:
61 |                     self.logger.debug(f'New description for {asn}: {existing_descriptions[last_descr]} -> {descr}')
62 |                     p.hset(f'{asn}|descriptions', last_modified, descr)
63 |                     new_description += 1
64 |         p.set('ans_description_last_update', last_modified)
65 |         p.execute()
66 |         self.logger.info(f'Done with import. New ASNs: {new_asn}, new descriptions: {new_description}')
67 |         if new_asn or new_description:
68 |             with open(self.archives / f'{last_modified}.html', 'w') as f:
69 |                 f.write(r.text)
70 | 
71 |     def _to_run_forever(self):
72 |         self.load_descriptions()
73 | 
74 | 
75 | def main():
76 |     asnd_manager = ASNDescriptions()
77 |     asnd_manager.run(sleep_in_sec=3600)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------
/bin/dbinsert.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | import time
  6 | 
  7 | from typing import List
  8 | 
  9 | from redis import Redis
 10 | 
 11 | from bgpranking.default import get_socket_path, AbstractManager, get_config
 12 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn
 13 | 
 14 | 
 15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 16 |                     level=logging.INFO)
 17 | 
 18 | 
 19 | class DBInsertManager(AbstractManager):
 20 | 
 21 |     def __init__(self, loglevel: int=logging.INFO):
 22 |         super().__init__(loglevel)
 23 |         self.script_name = 'db_insert'
 24 |         self.kvrocks_storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
 25 |         self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
 26 |         self.ipasn = get_ipasn()
 27 |         self.logger.debug('Starting import')
 28 | 
 29 |     def _to_run_forever(self):
 30 |         ready, message = sanity_check_ipasn(self.ipasn)
 31 |         if not ready:
 32 |             # Try again later.
 33 |             self.logger.warning(message)
 34 |             return
 35 |         self.logger.debug(message)
 36 | 
 37 |         while True:
 38 |             if self.shutdown_requested():
 39 |                 break
 40 |             try:
 41 |                 if not self.ipasn.is_up:
 42 |                     break
 43 |             except Exception:
 44 |                 self.logger.warning('Unable to query ipasnhistory')
 45 |                 time.sleep(10)
 46 |                 continue
 47 |             uuids: List[str] = self.redis_sanitized.spop('to_insert', 100)  # type: ignore
 48 |             if not uuids:
 49 |                 break
 50 |             p = self.redis_sanitized.pipeline(transaction=False)
 51 |             [p.hgetall(uuid) for uuid in uuids]
 52 |             sanitized_data = p.execute()
 53 | 
 54 |             for_query = []
 55 |             for i, uuid in enumerate(uuids):
 56 |                 data = sanitized_data[i]
 57 |                 if not data:
 58 |                     self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.')
 59 |                     continue
 60 |                 for_query.append({'ip': data['ip'], 'address_family': data['address_family'],
 61 |                                   'date': data['datetime'], 'precision_delta': {'days': 3}})
 62 |             try:
 63 |                 responses = self.ipasn.mass_query(for_query)
 64 |             except Exception:
 65 |                 self.logger.exception('Mass query in IPASN History failed, trying again later.')
 66 |                 # Rollback the spop
 67 |                 self.redis_sanitized.sadd('to_insert', *uuids)
 68 |                 time.sleep(10)
 69 |                 continue
 70 |             retry = []
 71 |             done = []
 72 |             ardb_pipeline = self.kvrocks_storage.pipeline(transaction=False)
 73 |             for i, uuid in enumerate(uuids):
 74 |                 data = sanitized_data[i]
 75 |                 if not data:
 76 |                     self.logger.warning(f'No data for UUID {uuid}. This should not happen, but lets move on.')
 77 |                     done.append(uuid)
 78 |                     continue
 79 |                 routing_info = responses['responses'][i]['response']  # our queries are on one single date, not a range
 80 |                 # Data gathered from IPASN History:
 81 |                 # * IP Block of the IP
 82 |                 # * AS number
 83 |                 if not routing_info:
 84 |                     self.logger.warning(f"No response for {responses['responses'][i]}")
 85 |                     done.append(uuid)
 86 |                     continue
 87 |                 if 'error' in routing_info:
 88 |                     self.logger.warning(f"Unable to find routing information for {data['ip']} - {data['datetime']}: {routing_info['error']}")
 89 |                     done.append(uuid)
 90 |                     continue
 91 |                 # Single date query, getting from the object
 92 |                 datetime_routing = list(routing_info.keys())[0]
 93 |                 entry = routing_info[datetime_routing]
 94 |                 if not entry:
 95 |                     # routing info is missing, need to try again later.
 96 |                     retry.append(uuid)
 97 |                     continue
 98 |                 if 'asn' in entry and entry['asn'] in [None, '0']:
 99 |                     self.logger.warning(f"Unable to find the AS number associated to {data['ip']} - {data['datetime']} (got {entry['asn']}) - {entry}.")
100 |                     done.append(uuid)
101 |                     continue
102 |                 if 'prefix' in entry and entry['prefix'] in [None, '0.0.0.0/0', '::/0']:
103 |                     self.logger.warning(f"Unable to find the prefix associated to {data['ip']} - {data['datetime']} (got {entry['prefix']}).")
104 |                     done.append(uuid)
105 |                     continue
106 | 
107 |                 # Format: <YYYY-MM-DD>|sources -> set([<source>, ...])
108 |                 ardb_pipeline.sadd(f"{data['date']}|sources", data['source'])
109 | 
110 |                 # Format: <YYYY-MM-DD>|<source> -> set([<asn>, ...])
111 |                 ardb_pipeline.sadd(f"{data['date']}|{data['source']}", entry['asn'])
112 |                 # Format: <YYYY-MM-DD>|<source>|<asn> -> set([<prefix>, ...])
113 |                 ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}", entry['prefix'])
114 | 
115 |                 # Format: <YYYY-MM-DD>|<source>|<asn>|<prefix> -> set([<ip>|<datetime>, ...])
116 |                 ardb_pipeline.sadd(f"{data['date']}|{data['source']}|{entry['asn']}|{entry['prefix']}",
117 |                                    f"{data['ip']}|{data['datetime']}")
118 |                 done.append(uuid)
119 |             ardb_pipeline.execute()
120 |             p = self.redis_sanitized.pipeline(transaction=False)
121 |             if done:
122 |                 p.delete(*done)
123 |             if retry:
124 |                 p.sadd('to_insert', *retry)
125 |             p.execute()
126 | 
127 | 
128 | def main():
129 |     dbinsert = DBInsertManager()
130 |     dbinsert.run(sleep_in_sec=120)
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     main()
135 | 


--------------------------------------------------------------------------------
/bin/fetcher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import json
  5 | import asyncio
  6 | import logging
  7 | 
  8 | from datetime import datetime, date
  9 | from hashlib import sha512  # Faster than sha256 on 64b machines.
 10 | from logging import Logger
 11 | from pathlib import Path
 12 | 
 13 | import aiohttp
 14 | from dateutil import parser
 15 | from pid import PidFile, PidFileError  # type: ignore
 16 | 
 17 | from bgpranking.default import AbstractManager, safe_create_dir
 18 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir
 19 | 
 20 | 
 21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 22 |                     level=logging.INFO)
 23 | 
 24 | 
 25 | class Fetcher():
 26 | 
 27 |     def __init__(self, config_file: Path, logger: Logger):
 28 |         '''Load `config_file`, and store the fetched data into `storage_directory`
 29 |         Note: if the `config_file` does not provide a URL (the file is
 30 |               gathered by some oter mean), the fetcher is automatically stoped.'''
 31 |         with open(config_file, 'r') as f:
 32 |             module_parameters = json.load(f)
 33 |         self.vendor = module_parameters['vendor']
 34 |         self.listname = module_parameters['name']
 35 |         self.logger = logger
 36 |         self.fetcher = True
 37 |         if 'url' not in module_parameters:
 38 |             self.logger.info(f'{self.vendor}-{self.listname}: No URL to fetch, breaking.')
 39 |             self.fetcher = False
 40 |             return
 41 |         self.url = module_parameters['url']
 42 |         self.logger.debug(f'{self.vendor}-{self.listname}: Starting fetcher on {self.url}')
 43 |         self.directory = get_data_dir() / self.vendor / self.listname
 44 |         safe_create_dir(self.directory)
 45 |         self.meta = self.directory / 'meta'
 46 |         safe_create_dir(self.meta)
 47 |         self.archive_dir = self.directory / 'archive'
 48 |         safe_create_dir(self.archive_dir)
 49 |         self.first_fetch = True
 50 | 
 51 |     async def __get_last_modified(self):
 52 |         async with aiohttp.ClientSession() as session:
 53 |             async with session.head(self.url) as r:
 54 |                 headers = r.headers
 55 |                 if 'Last-Modified' in headers:
 56 |                     return parser.parse(headers['Last-Modified'])
 57 |                 return None
 58 | 
 59 |     async def __newer(self):
 60 |         '''Check if the file available for download is newed than the one
 61 |         already downloaded by checking the `Last-Modified` header.
 62 |         Note: return False if the file containing the last header content
 63 |             is not existing, or the header doesn't have this key.
 64 |         '''
 65 |         last_modified_path = self.meta / 'lastmodified'
 66 |         if not last_modified_path.exists():
 67 |             # The file doesn't exists
 68 |             if not self.first_fetch:
 69 |                 # The URL has no Last-Modified header, we cannot use it.
 70 |                 self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available')
 71 |                 return True
 72 |             self.first_fetch = False
 73 |             last_modified = await self.__get_last_modified()
 74 |             if last_modified:
 75 |                 self.logger.debug(f'{self.vendor}-{self.listname}: Last-Modified header available')
 76 |                 with last_modified_path.open('w') as f:
 77 |                     f.write(last_modified.isoformat())
 78 |             else:
 79 |                 self.logger.debug(f'{self.vendor}-{self.listname}: No Last-Modified header available')
 80 |             return True
 81 |         with last_modified_path.open() as f:
 82 |             file_content = f.read()
 83 |             last_modified_file = parser.parse(file_content)
 84 |         last_modified = await self.__get_last_modified()
 85 |         if not last_modified:
 86 |             # No more Last-Modified header Oo
 87 |             self.logger.warning(f'{self.vendor}-{self.listname}: Last-Modified header was present, isn\'t anymore!')
 88 |             last_modified_path.unlink()
 89 |             return True
 90 |         if last_modified > last_modified_file:
 91 |             self.logger.info(f'{self.vendor}-{self.listname}: Got a new file.')
 92 |             with last_modified_path.open('w') as f:
 93 |                 f.write(last_modified.isoformat())
 94 |             return True
 95 |         return False
 96 | 
 97 |     def __same_as_last(self, downloaded):
 98 |         '''Figure out the last downloaded file, check if it is the same as the
 99 |         newly downloaded one. Returns true if both files have been downloaded the
100 |         same day.
101 |         Note: we check the new and the archive directory because we may have backlog
102 |             and the newest file is always the first one we process
103 |         '''
104 |         to_check = []
105 |         to_check_new = sorted([f for f in self.directory.iterdir() if f.is_file()])
106 |         if to_check_new:
107 |             # we have files waiting to be processed
108 |             self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_new)} file(s) are waiting to be processed')
109 |             to_check.append(to_check_new[-1])
110 |         to_check_archive = sorted([f for f in self.archive_dir.iterdir() if f.is_file()])
111 |         if to_check_archive:
112 |             # we have files already processed, in the archive
113 |             self.logger.debug(f'{self.vendor}-{self.listname}: {len(to_check_archive)} file(s) have been processed')
114 |             to_check.append(to_check_archive[-1])
115 |         if not to_check:
116 |             self.logger.debug(f'{self.vendor}-{self.listname}: New list, no hisorical files')
117 |             # nothing has been downloaded ever, moving on
118 |             return False
119 |         dl_hash = sha512(downloaded)
120 |         for last_file in to_check:
121 |             with last_file.open('rb') as f:
122 |                 last_hash = sha512(f.read())
123 |             if (dl_hash.digest() == last_hash.digest()
124 |                     and parser.parse(last_file.name.split('.')[0]).date() == date.today()):
125 |                 self.logger.debug(f'{self.vendor}-{self.listname}: Same file already downloaded today.')
126 |                 return True
127 |         return False
128 | 
129 |     async def fetch_list(self):
130 |         '''Fetch & store the list'''
131 |         if not self.fetcher:
132 |             return
133 |         try:
134 |             with PidFile(f'{self.listname}.pid', piddir=self.meta):
135 |                 if not await self.__newer():
136 |                     return
137 |                 async with aiohttp.ClientSession() as session:
138 |                     async with session.get(self.url) as r:
139 |                         content = await r.content.read()
140 |                         if self.__same_as_last(content):
141 |                             return
142 |                         self.logger.info(f'{self.vendor}-{self.listname}: Got a new file!')
143 |                         with (self.directory / '{}.txt'.format(datetime.now().isoformat())).open('wb') as f:
144 |                             f.write(content)
145 |         except PidFileError:
146 |             self.logger.info(f'{self.vendor}-{self.listname}: Fetcher already running')
147 | 
148 | 
149 | class ModulesManager(AbstractManager):
150 | 
151 |     def __init__(self, loglevel: int=logging.DEBUG):
152 |         super().__init__(loglevel)
153 |         self.script_name = 'modules_manager'
154 |         self.modules_paths = get_modules()
155 |         self.modules = [Fetcher(path, self.logger) for path in self.modules_paths]
156 | 
157 |     async def _to_run_forever_async(self):
158 |         # Check if there are new config files
159 |         new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths]
160 |         self.modules += [Fetcher(path, self.logger) for path in new_modules_paths]
161 |         self.modules_paths += new_modules_paths
162 | 
163 |         if self.modules:
164 |             for module in self.modules:
165 |                 if module.fetcher:
166 |                     await module.fetch_list()
167 |         else:
168 |             self.logger.info('No config files were found so there are no fetchers running yet. Will try again later.')
169 | 
170 | 
171 | def main():
172 |     m = ModulesManager()
173 |     asyncio.run(m.run_async(sleep_in_sec=3600))
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     main()
178 | 


--------------------------------------------------------------------------------
/bin/manual_ranking.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import argparse
 5 | import logging
 6 | from dateutil.parser import parse
 7 | from datetime import timedelta
 8 | 
 9 | from bgpranking.helpers import load_all_modules_configs
10 | from .ranking import Ranking
11 | 
12 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
13 |                     level=logging.INFO)
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser(description='Manually force the ranking of a day or a time interval.')
18 |     group = parser.add_mutually_exclusive_group(required=True)
19 |     group.add_argument('-d', '--day', type=str, help='Day to rank (Format: YYYY-MM-DD).')
20 |     group.add_argument('-i', '--interval', type=str, nargs=2, help='Interval to rank, first to last (Format: YYYY-MM-DD YYYY-MM-DD).')
21 |     args = parser.parse_args()
22 | 
23 |     ranking = Ranking(loglevel=logging.DEBUG)
24 |     config_files = load_all_modules_configs()
25 |     if args.day:
26 |         day = parse(args.day).date().isoformat()
27 |         ranking.rank_a_day(day)
28 |     else:
29 |         current = parse(args.interval[1]).date()
30 |         stop_date = parse(args.interval[0]).date()
31 |         while current >= stop_date:
32 |             ranking.rank_a_day(current.isoformat())
33 |             current -= timedelta(days=1)
34 | 


--------------------------------------------------------------------------------
/bin/parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import importlib
  5 | import json
  6 | import logging
  7 | import re
  8 | import types
  9 | 
 10 | from datetime import datetime
 11 | from io import BytesIO
 12 | from logging import Logger
 13 | from pathlib import Path
 14 | from typing import List, Union, Tuple
 15 | from uuid import uuid4
 16 | 
 17 | from redis import Redis
 18 | 
 19 | from bgpranking.default import AbstractManager, safe_create_dir, get_socket_path
 20 | from bgpranking.helpers import get_modules, get_data_dir, get_modules_dir
 21 | 
 22 | 
 23 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 24 |                     level=logging.INFO)
 25 | 
 26 | 
 27 | class RawFilesParser():
 28 | 
 29 |     def __init__(self, config_file: Path, logger: Logger) -> None:
 30 |         self.logger = logger
 31 |         with open(config_file, 'r') as f:
 32 |             module_parameters = json.load(f)
 33 |         self.vendor = module_parameters['vendor']
 34 |         self.listname = module_parameters['name']
 35 |         if 'parser' in module_parameters:
 36 |             self.parse_raw_file = types.MethodType(importlib.import_module(module_parameters['parser'], 'bgpranking').parse_raw_file, self)  # type: ignore
 37 |         self.source = f'{self.vendor}-{self.listname}'
 38 |         self.directory = get_data_dir() / self.vendor / self.listname
 39 |         safe_create_dir(self.directory)
 40 |         self.unparsable_dir = self.directory / 'unparsable'
 41 |         safe_create_dir(self.unparsable_dir)
 42 |         self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0)
 43 |         self.logger.debug(f'{self.source}: Starting intake.')
 44 | 
 45 |     @property
 46 |     def files_to_parse(self) -> List[Path]:
 47 |         return sorted([f for f in self.directory.iterdir() if f.is_file()], reverse=True)
 48 | 
 49 |     def extract_ipv4(self, bytestream: bytes) -> List[Union[bytes, Tuple[bytes, datetime]]]:
 50 |         return re.findall(rb'[0-9]+(?:\.[0-9]+){3}', bytestream)
 51 | 
 52 |     def strip_leading_zeros(self, ips: List[bytes]) -> List[bytes]:
 53 |         '''Helper to get rid of leading 0s in an IP list.
 54 |         Only run it when needed, it is nasty and slow'''
 55 |         return ['.'.join(str(int(part)) for part in ip.split(b'.')).encode() for ip in ips]
 56 | 
 57 |     def parse_raw_file(self, f: BytesIO) -> List[Union[bytes, Tuple[bytes, datetime]]]:
 58 |         # If the list doesn't provide a time, fallback to current day, midnight
 59 |         self.datetime = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
 60 |         return self.extract_ipv4(f.getvalue())
 61 | 
 62 |     def parse_raw_files(self) -> None:
 63 |         nb_unparsable_files = len([f for f in self.unparsable_dir.iterdir() if f.is_file()])
 64 |         if nb_unparsable_files:
 65 |             self.logger.warning(f'{self.source}: Was unable to parse {nb_unparsable_files} files.')
 66 |         try:
 67 |             for filepath in self.files_to_parse:
 68 |                 self.logger.debug(f'{self.source}: Parsing {filepath}, {len(self.files_to_parse) - 1} to go.')
 69 |                 with open(filepath, 'rb') as f:
 70 |                     to_parse = BytesIO(f.read())
 71 |                 p = self.redis_intake.pipeline()
 72 |                 for line in self.parse_raw_file(to_parse):
 73 |                     if isinstance(line, tuple):
 74 |                         ip, datetime = line
 75 |                     else:
 76 |                         ip = line
 77 |                         datetime = self.datetime
 78 |                     uuid = uuid4()
 79 |                     p.hmset(str(uuid), {'ip': ip, 'source': self.source,
 80 |                                         'datetime': datetime.isoformat()})
 81 |                     p.sadd('intake', str(uuid))
 82 |                 p.execute()
 83 |                 self._archive(filepath)
 84 |         except Exception as e:
 85 |             self.logger.exception(f"{self.source}: That didn't go well: {e}")
 86 |             self._unparsable(filepath)
 87 | 
 88 |     def _archive(self, filepath: Path) -> None:
 89 |         '''After processing, move file to the archive directory'''
 90 |         filepath.rename(self.directory / 'archive' / filepath.name)
 91 | 
 92 |     def _unparsable(self, filepath: Path) -> None:
 93 |         '''After processing, move file to the archive directory'''
 94 |         filepath.rename(self.unparsable_dir / filepath.name)
 95 | 
 96 | 
 97 | class ParserManager(AbstractManager):
 98 | 
 99 |     def __init__(self, loglevel: int=logging.DEBUG):
100 |         super().__init__(loglevel)
101 |         self.script_name = 'parser'
102 |         self.modules_paths = get_modules()
103 |         self.modules = [RawFilesParser(path, self.logger) for path in self.modules_paths]
104 | 
105 |     def _to_run_forever(self):
106 |         # Check if there are new config files
107 |         new_modules_paths = [modulepath for modulepath in get_modules_dir().glob('*.json') if modulepath not in self.modules_paths]
108 |         self.modules += [RawFilesParser(path, self.logger) for path in new_modules_paths]
109 |         self.modules_paths += new_modules_paths
110 | 
111 |         if self.modules:
112 |             for module in self.modules:
113 |                 module.parse_raw_files()
114 |         else:
115 |             self.logger.warning('No config files were found so there are no parsers running yet. Will try again later.')
116 | 
117 | 
118 | def main():
119 |     parser_manager = ParserManager()
120 |     parser_manager.run(sleep_in_sec=120)
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     main()
125 | 


--------------------------------------------------------------------------------
/bin/ranking.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | 
  6 | from datetime import datetime, date, timedelta
  7 | from ipaddress import ip_network
  8 | from typing import Dict, Any
  9 | 
 10 | from redis import Redis
 11 | import requests
 12 | 
 13 | from bgpranking.default import AbstractManager, get_config
 14 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn, load_all_modules_configs
 15 | 
 16 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 17 |                     level=logging.INFO)
 18 | 
 19 | 
 20 | class Ranking(AbstractManager):
 21 | 
 22 |     def __init__(self, loglevel: int=logging.INFO):
 23 |         super().__init__(loglevel)
 24 |         self.script_name = 'ranking'
 25 |         self.storage = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'), decode_responses=True)
 26 |         self.ranking = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'), decode_responses=True)
 27 |         self.ipasn = get_ipasn()
 28 | 
 29 |     def rank_a_day(self, day: str):
 30 |         asns_aggregation_key_v4 = f'{day}|asns|v4'
 31 |         asns_aggregation_key_v6 = f'{day}|asns|v6'
 32 |         to_delete = set([asns_aggregation_key_v4, asns_aggregation_key_v6])
 33 |         r_pipeline = self.ranking.pipeline()
 34 |         cached_meta: Dict[str, Dict[str, Any]] = {}
 35 |         config_files = load_all_modules_configs()
 36 |         for source in self.storage.smembers(f'{day}|sources'):
 37 |             if source not in config_files:
 38 |                 # get it again, just in case it is created after we open them
 39 |                 config_files = load_all_modules_configs()
 40 |             self.logger.info(f'{day} - Ranking source: {source}')
 41 |             source_aggregation_key_v4 = f'{day}|{source}|asns|v4'
 42 |             source_aggregation_key_v6 = f'{day}|{source}|asns|v6'
 43 |             to_delete.update([source_aggregation_key_v4, source_aggregation_key_v6])
 44 |             for asn in self.storage.smembers(f'{day}|{source}'):
 45 |                 prefixes_aggregation_key_v4 = f'{day}|{asn}|v4'
 46 |                 prefixes_aggregation_key_v6 = f'{day}|{asn}|v6'
 47 |                 to_delete.update([prefixes_aggregation_key_v4, prefixes_aggregation_key_v6])
 48 |                 if asn == '0':
 49 |                     # Default ASN when no matches. Probably spoofed.
 50 |                     continue
 51 |                 self.logger.debug(f'{day} - Ranking source: {source} / ASN: {asn}')
 52 |                 asn_rank_v4 = 0.0
 53 |                 asn_rank_v6 = 0.0
 54 |                 for prefix in self.storage.smembers(f'{day}|{source}|{asn}'):
 55 |                     if prefix == 'None':
 56 |                         # This should not happen and requires a DB cleanup.
 57 |                         self.logger.critical(f'Fucked up prefix in "{day}|{source}|{asn}"')
 58 |                         continue
 59 |                     ips = set([ip_ts.split('|')[0]
 60 |                                for ip_ts in self.storage.smembers(f'{day}|{source}|{asn}|{prefix}')])
 61 |                     py_prefix = ip_network(prefix)
 62 |                     prefix_rank = float(len(ips)) / py_prefix.num_addresses
 63 |                     r_pipeline.zadd(f'{day}|{source}|{asn}|v{py_prefix.version}|prefixes', {prefix: prefix_rank})
 64 |                     if py_prefix.version == 4:
 65 |                         asn_rank_v4 += len(ips) * config_files[source]['impact']
 66 |                         r_pipeline.zincrby(prefixes_aggregation_key_v4, prefix_rank * config_files[source]['impact'], prefix)
 67 |                     else:
 68 |                         asn_rank_v6 += len(ips) * config_files[source]['impact']
 69 |                         r_pipeline.zincrby(prefixes_aggregation_key_v6, prefix_rank * config_files[source]['impact'], prefix)
 70 |                 if asn in cached_meta:
 71 |                     v4info = cached_meta[asn]['v4']
 72 |                     v6info = cached_meta[asn]['v6']
 73 |                 else:
 74 |                     retry = 3
 75 |                     while retry:
 76 |                         try:
 77 |                             v4info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v4', date=day)
 78 |                             v6info = self.ipasn.asn_meta(asn=asn, source='caida', address_family='v6', date=day)
 79 |                             break
 80 |                         except requests.exceptions.ConnectionError:
 81 |                             # Sometimes, ipasnhistory is unreachable try again a few times
 82 |                             retry -= 1
 83 |                     else:
 84 |                         # if it keeps failing, the ASN will be ranked on next run.
 85 |                         continue
 86 | 
 87 |                     cached_meta[asn] = {'v4': v4info, 'v6': v6info}
 88 |                 ipasnhistory_date_v4 = list(v4info['response'].keys())[0]
 89 |                 v4count = v4info['response'][ipasnhistory_date_v4][asn]['ipcount']
 90 |                 ipasnhistory_date_v6 = list(v6info['response'].keys())[0]
 91 |                 v6count = v6info['response'][ipasnhistory_date_v6][asn]['ipcount']
 92 |                 if v4count:
 93 |                     asn_rank_v4 /= float(v4count)
 94 |                     if asn_rank_v4:
 95 |                         r_pipeline.set(f'{day}|{source}|{asn}|v4', asn_rank_v4)
 96 |                         r_pipeline.zincrby(asns_aggregation_key_v4, asn_rank_v4, asn)
 97 |                         r_pipeline.zadd(source_aggregation_key_v4, {asn: asn_rank_v4})
 98 |                 if v6count:
 99 |                     asn_rank_v6 /= float(v6count)
100 |                     if asn_rank_v6:
101 |                         r_pipeline.set(f'{day}|{source}|{asn}|v6', asn_rank_v6)
102 |                         r_pipeline.zincrby(asns_aggregation_key_v6, asn_rank_v6, asn)
103 |                         r_pipeline.zadd(source_aggregation_key_v6, {asn: asn_rank_v6})
104 |         self.ranking.delete(*to_delete)
105 |         r_pipeline.execute()
106 | 
107 |     def compute(self):
108 |         ready, message = sanity_check_ipasn(self.ipasn)
109 |         if not ready:
110 |             # Try again later.
111 |             self.logger.warning(message)
112 |             return
113 |         self.logger.debug(message)
114 | 
115 |         self.logger.info('Start ranking')
116 |         today = date.today()
117 |         now = datetime.now()
118 |         today12am = now.replace(hour=12, minute=0, second=0, microsecond=0)
119 |         if now < today12am:
120 |             # Compute yesterday and today's ranking (useful when we have lists generated only once a day)
121 |             self.rank_a_day((today - timedelta(days=1)).isoformat())
122 |         self.rank_a_day(today.isoformat())
123 |         self.logger.info('Ranking done.')
124 | 
125 |     def _to_run_forever(self):
126 |         self.compute()
127 | 
128 | 
129 | def main():
130 |     ranking = Ranking()
131 |     ranking.run(sleep_in_sec=3600)
132 | 
133 | 
134 | if __name__ == '__main__':
135 |     main()
136 | 


--------------------------------------------------------------------------------
/bin/run_backend.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import os
  6 | import time
  7 | from pathlib import Path
  8 | from subprocess import Popen
  9 | from typing import Optional, Dict
 10 | 
 11 | from redis import Redis
 12 | from redis.exceptions import ConnectionError
 13 | 
 14 | from bgpranking.default import get_homedir, get_socket_path, get_config
 15 | 
 16 | 
 17 | def check_running(name: str) -> bool:
 18 |     if name == "storage":
 19 |         r = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'))
 20 |     elif name == "ranking":
 21 |         r = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
 22 |     else:
 23 |         socket_path = get_socket_path(name)
 24 |         if not os.path.exists(socket_path):
 25 |             return False
 26 |         r = Redis(unix_socket_path=socket_path)
 27 |     try:
 28 |         return True if r.ping() else False
 29 |     except ConnectionError:
 30 |         return False
 31 | 
 32 | 
 33 | def launch_cache(storage_directory: Optional[Path]=None):
 34 |     if not storage_directory:
 35 |         storage_directory = get_homedir()
 36 |     if not check_running('cache'):
 37 |         Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
 38 | 
 39 | 
 40 | def shutdown_cache(storage_directory: Optional[Path]=None):
 41 |     if not storage_directory:
 42 |         storage_directory = get_homedir()
 43 |     r = Redis(unix_socket_path=get_socket_path('cache'))
 44 |     r.shutdown(save=True)
 45 |     print('Redis cache database shutdown.')
 46 | 
 47 | 
 48 | def launch_temp(storage_directory: Optional[Path]=None):
 49 |     if not storage_directory:
 50 |         storage_directory = get_homedir()
 51 |     if not check_running('intake') and not check_running('prepare'):
 52 |         Popen(["./run_redis.sh"], cwd=(storage_directory / 'temp'))
 53 | 
 54 | 
 55 | def shutdown_temp(storage_directory: Optional[Path]=None):
 56 |     if not storage_directory:
 57 |         storage_directory = get_homedir()
 58 |     r = Redis(unix_socket_path=get_socket_path('intake'))
 59 |     r.shutdown(save=True)
 60 |     print('Redis intake database shutdown.')
 61 |     r = Redis(unix_socket_path=get_socket_path('prepare'))
 62 |     r.shutdown(save=True)
 63 |     print('Redis prepare database shutdown.')
 64 | 
 65 | 
 66 | def launch_storage(storage_directory: Optional[Path]=None):
 67 |     if not storage_directory:
 68 |         storage_directory = get_homedir()
 69 |     if not check_running('storage'):
 70 |         Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'storage'))
 71 | 
 72 | 
 73 | def shutdown_storage(storage_directory: Optional[Path]=None):
 74 |     redis = Redis(get_config('generic', 'storage_db_hostname'), get_config('generic', 'storage_db_port'))
 75 |     redis.shutdown()
 76 | 
 77 | 
 78 | def launch_ranking(storage_directory: Optional[Path]=None):
 79 |     if not storage_directory:
 80 |         storage_directory = get_homedir()
 81 |     if not check_running('ranking'):
 82 |         Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'ranking'))
 83 | 
 84 | 
 85 | def shutdown_ranking(storage_directory: Optional[Path]=None):
 86 |     redis = Redis(get_config('generic', 'ranking_db_hostname'), get_config('generic', 'ranking_db_port'))
 87 |     redis.shutdown()
 88 | 
 89 | 
 90 | def launch_all():
 91 |     launch_cache()
 92 |     launch_temp()
 93 |     launch_storage()
 94 |     launch_ranking()
 95 | 
 96 | 
 97 | def check_all(stop: bool=False):
 98 |     backends: Dict[str, bool] = {'cache': False, 'storage': False, 'ranking': False,
 99 |                                  'intake': False, 'prepare': False}
100 |     while True:
101 |         for db_name in backends.keys():
102 |             print(backends[db_name])
103 |             try:
104 |                 backends[db_name] = check_running(db_name)
105 |             except Exception:
106 |                 backends[db_name] = False
107 |         if stop:
108 |             if not any(running for running in backends.values()):
109 |                 break
110 |         else:
111 |             if all(running for running in backends.values()):
112 |                 break
113 |         for db_name, running in backends.items():
114 |             if not stop and not running:
115 |                 print(f"Waiting on {db_name} to start")
116 |             if stop and running:
117 |                 print(f"Waiting on {db_name} to stop")
118 |         time.sleep(1)
119 | 
120 | 
121 | def stop_all():
122 |     shutdown_cache()
123 |     shutdown_temp()
124 |     shutdown_storage()
125 |     shutdown_ranking()
126 | 
127 | 
128 | def main():
129 |     parser = argparse.ArgumentParser(description='Manage backend DBs.')
130 |     parser.add_argument("--start", action='store_true', default=False, help="Start all")
131 |     parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
132 |     parser.add_argument("--status", action='store_true', default=True, help="Show status")
133 |     args = parser.parse_args()
134 | 
135 |     if args.start:
136 |         launch_all()
137 |     if args.stop:
138 |         stop_all()
139 |     if not args.stop and args.status:
140 |         check_all()
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     main()
145 | 


--------------------------------------------------------------------------------
/bin/sanitizer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import ipaddress
  5 | import logging
  6 | import time
  7 | 
  8 | from datetime import timezone
  9 | from typing import Optional, List, Dict
 10 | 
 11 | from dateutil import parser
 12 | from redis import Redis
 13 | import requests
 14 | 
 15 | from bgpranking.default import AbstractManager, get_socket_path
 16 | from bgpranking.helpers import get_ipasn, sanity_check_ipasn
 17 | 
 18 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 19 |                     level=logging.INFO)
 20 | 
 21 | 
 22 | class Sanitizer(AbstractManager):
 23 | 
 24 |     def __init__(self, loglevel: int=logging.INFO):
 25 |         super().__init__(loglevel)
 26 |         self.script_name = 'sanitizer'
 27 |         self.redis_intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True)
 28 |         self.redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
 29 |         self.ipasn = get_ipasn()
 30 |         self.logger.debug('Starting import')
 31 | 
 32 |     def _sanitize_ip(self, pipeline: Redis, uuid: str, data: Dict) -> Optional[Dict]:
 33 |         try:
 34 |             ip = ipaddress.ip_address(data['ip'])
 35 |             if isinstance(ip, ipaddress.IPv6Address):
 36 |                 address_family = 'v6'
 37 |             else:
 38 |                 address_family = 'v4'
 39 |         except ValueError:
 40 |             self.logger.info(f"Invalid IP address: {data['ip']}")
 41 |             return None
 42 |         except KeyError:
 43 |             self.logger.info(f"Invalid entry {data}")
 44 |             return None
 45 | 
 46 |         if not ip.is_global:
 47 |             self.logger.info(f"The IP address {data['ip']} is not global")
 48 |             return None
 49 | 
 50 |         datetime = parser.parse(data['datetime'])
 51 |         if datetime.tzinfo:
 52 |             # Make sure the datetime isn't TZ aware, and UTC.
 53 |             datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None)
 54 | 
 55 |         # Add to temporay DB for further processing
 56 |         pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family,
 57 |                               'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()})
 58 |         pipeline.sadd('to_insert', uuid)
 59 | 
 60 |         return {'ip': str(ip), 'address_family': address_family,
 61 |                 'date': datetime.isoformat(), 'precision_delta': {'days': 3}}
 62 | 
 63 |     def _sanitize_network(self, pipeline: Redis, uuid: str, data: Dict) -> List[Dict]:
 64 |         try:
 65 |             network = ipaddress.ip_network(data['ip'])
 66 |             if isinstance(network, ipaddress.IPv6Network):
 67 |                 address_family = 'v6'
 68 |             else:
 69 |                 address_family = 'v4'
 70 |         except ValueError:
 71 |             self.logger.info(f"Invalid IP network: {data['ip']}")
 72 |             return []
 73 |         except KeyError:
 74 |             self.logger.info(f"Invalid entry {data}")
 75 |             return []
 76 | 
 77 |         datetime = parser.parse(data['datetime'])
 78 |         if datetime.tzinfo:
 79 |             # Make sure the datetime isn't TZ aware, and UTC.
 80 |             datetime = datetime.astimezone(timezone.utc).replace(tzinfo=None)
 81 | 
 82 |         for_cache = []
 83 |         for ip in network.hosts():
 84 |             if not ip.is_global:
 85 |                 self.logger.info(f"The IP address {ip} is not global")
 86 |                 continue
 87 | 
 88 |             # Add to temporay DB for further processing
 89 |             pipeline.hmset(uuid, {'ip': str(ip), 'source': data['source'], 'address_family': address_family,
 90 |                                   'date': datetime.date().isoformat(), 'datetime': datetime.isoformat()})
 91 |             pipeline.sadd('to_insert', uuid)
 92 | 
 93 |             for_cache.append({'ip': str(ip), 'address_family': address_family,
 94 |                               'date': datetime.isoformat(), 'precision_delta': {'days': 3}})
 95 |         return for_cache
 96 | 
 97 |     def sanitize(self):
 98 |         ready, message = sanity_check_ipasn(self.ipasn)
 99 |         if not ready:
100 |             # Try again later.
101 |             self.logger.warning(message)
102 |             return
103 |         self.logger.debug(message)
104 | 
105 |         while True:
106 |             try:
107 |                 if self.shutdown_requested() or not self.ipasn.is_up:
108 |                     break
109 |             except requests.exceptions.ConnectionError:
110 |                 # Temporary issue with ipasnhistory
111 |                 self.logger.info('Temporary issue with ipasnhistory, trying again later.')
112 |                 time.sleep(10)
113 |                 continue
114 |             uuids: Optional[List[str]] = self.redis_intake.spop('intake', 100)  # type: ignore
115 |             if not uuids:
116 |                 break
117 |             for_cache = []
118 |             pipeline = self.redis_sanitized.pipeline(transaction=False)
119 |             for uuid in uuids:
120 |                 data = self.redis_intake.hgetall(uuid)
121 |                 if not data:
122 |                     continue
123 |                 if '/' in data['ip']:
124 |                     entries_for_cache = self._sanitize_network(pipeline, uuid, data)
125 |                     if entries_for_cache:
126 |                         for_cache += entries_for_cache
127 |                 else:
128 |                     entry_for_cache = self._sanitize_ip(pipeline, uuid, data)
129 |                     if entry_for_cache:
130 |                         for_cache.append(entry_for_cache)
131 | 
132 |             pipeline.execute()
133 |             self.redis_intake.delete(*uuids)
134 | 
135 |             try:
136 |                 # Just cache everything so the lookup scripts can do their thing.
137 |                 self.ipasn.mass_cache(for_cache)
138 |             except Exception:
139 |                 self.logger.info('Mass cache in IPASN History failed, trying again later.')
140 |                 # Rollback the spop
141 |                 self.redis_intake.sadd('intake', *uuids)
142 |                 break
143 | 
144 |     def _to_run_forever(self):
145 |         self.sanitize()
146 | 
147 | 
148 | def main():
149 |     sanitizer = Sanitizer()
150 |     sanitizer.run(sleep_in_sec=120)
151 | 
152 | 
153 | if __name__ == '__main__':
154 |     main()
155 | 


--------------------------------------------------------------------------------
/bin/shutdown.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import time
 5 | 
 6 | from bgpranking.default import AbstractManager
 7 | 
 8 | 
 9 | def main():
10 |     AbstractManager.force_shutdown()
11 |     time.sleep(5)
12 |     while True:
13 |         try:
14 |             running = AbstractManager.is_running()
15 |         except FileNotFoundError:
16 |             print('Redis is already down.')
17 |             break
18 |         if not running:
19 |             break
20 |         print(running)
21 |         time.sleep(5)
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     main()
26 | 


--------------------------------------------------------------------------------
/bin/ssfetcher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import logging
  5 | from logging import Logger
  6 | import json
  7 | import asyncio
  8 | 
  9 | from typing import Tuple, Dict, List, Optional, TypeVar, Any
 10 | from datetime import datetime, date
 11 | from pathlib import Path
 12 | 
 13 | import aiohttp
 14 | from bs4 import BeautifulSoup  # type: ignore
 15 | from dateutil.parser import parse
 16 | 
 17 | from bgpranking.default import AbstractManager, get_homedir, safe_create_dir
 18 | from bgpranking.helpers import get_data_dir, get_modules_dir
 19 | 
 20 | 
 21 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 22 |                     level=logging.INFO)
 23 | 
 24 | 
 25 | Dates = TypeVar('Dates', datetime, date, str)
 26 | 
 27 | 
 28 | class ShadowServerFetcher():
 29 | 
 30 |     def __init__(self, user, password, logger: Logger) -> None:
 31 |         self.logger = logger
 32 |         self.storage_directory = get_data_dir()
 33 |         self.config_path_modules = get_modules_dir()
 34 |         self.user = user
 35 |         self.password = password
 36 |         self.index_page = 'https://dl.shadowserver.org/reports/index.php'
 37 |         self.vendor = 'shadowserver'
 38 |         self.known_list_types = ('blacklist', 'blocklist', 'botnet', 'cc', 'cisco', 'cwsandbox',
 39 |                                  'device', 'drone', 'event4', 'malware', 'scan6', 'event6', 'netis',
 40 |                                  'microsoft', 'scan', 'sinkhole6', 'sinkhole', 'outdated',
 41 |                                  'compromised', 'hp', 'darknet', 'ddos')
 42 |         self.first_available_day: Optional[date] = None
 43 |         self.last_available_day: date
 44 |         self.available_entries: Dict[str, List[Tuple[str, str]]] = {}
 45 | 
 46 |     async def __get_index(self):
 47 |         auth_details = {'user': self.user, 'password': self.password, 'login': 'Login'}
 48 |         async with aiohttp.ClientSession() as s:
 49 |             self.logger.debug('Fetching the index.')
 50 |             async with s.post(self.index_page, data=auth_details) as r:
 51 |                 return await r.text()
 52 | 
 53 |     async def __build_daily_dict(self):
 54 |         html_index = await self.__get_index()
 55 |         soup = BeautifulSoup(html_index, 'html.parser')
 56 |         treeview = soup.find(id='treemenu1')
 57 |         for y in treeview.select(':scope > li'):
 58 |             year = y.contents[0]
 59 |             for m in y.contents[1].select(':scope > li'):
 60 |                 month = m.contents[0]
 61 |                 for d in m.contents[1].select(':scope > li'):
 62 |                     day = d.contents[0]
 63 |                     date = parse(f'{year} {month} {day}').date()
 64 |                     self.available_entries[date.isoformat()] = []
 65 |                     for a in d.contents[1].find_all('a', href=True):
 66 |                         if not self.first_available_day:
 67 |                             self.first_available_day = date
 68 |                         self.last_available_day = date
 69 |                         self.available_entries[date.isoformat()].append((a['href'], a.string))
 70 |         self.logger.debug('Dictionary created.')
 71 | 
 72 |     def __normalize_day(self, day: Optional[Dates]=None) -> str:
 73 |         if not day:
 74 |             if not self.last_available_day:
 75 |                 raise Exception('Unable to figure out the last available day. You need to run build_daily_dict first')
 76 |             to_return = self.last_available_day
 77 |         else:
 78 |             if isinstance(day, str):
 79 |                 to_return = parse(day).date()
 80 |             elif isinstance(day, datetime):
 81 |                 to_return = day.date()
 82 |         return to_return.isoformat()
 83 | 
 84 |     def __split_name(self, name):
 85 |         type_content, country, list_type = name.split('-')
 86 |         if '_' in type_content:
 87 |             type_content, details_type = type_content.split('_', maxsplit=1)
 88 |             if '_' in details_type:
 89 |                 details_type, sub = details_type.split('_', maxsplit=1)
 90 |                 return list_type, country, (type_content, details_type, sub)
 91 |             return list_type, country, (type_content, details_type)
 92 |         return list_type, country, (type_content)
 93 | 
 94 |     def __check_config(self, filename: str) -> Optional[Path]:
 95 |         self.logger.debug(f'Working on config for {filename}.')
 96 |         config: Dict[str, Any] = {'vendor': 'shadowserver', 'parser': '.parsers.shadowserver'}
 97 |         type_content, _, type_details = self.__split_name(filename)
 98 |         prefix = type_content.split('.')[0]
 99 | 
100 |         if isinstance(type_details, str):
101 |             main_type = type_details
102 |             config['name'] = '{}-{}'.format(prefix, type_details)
103 |         else:
104 |             main_type = type_details[0]
105 |             config['name'] = '{}-{}'.format(prefix, '_'.join(type_details))
106 | 
107 |         if main_type not in self.known_list_types:
108 |             self.logger.warning(f'Unknown type: {main_type}. Please update the config creator script.')
109 |             return None
110 | 
111 |         if main_type == 'blacklist':
112 |             config['impact'] = 5
113 |         elif main_type == 'blocklist':
114 |             config['impact'] = 5
115 |         elif main_type == 'botnet':
116 |             config['impact'] = 2
117 |         elif main_type == 'malware':
118 |             config['impact'] = 2
119 |         elif main_type == 'cc':
120 |             config['impact'] = 5
121 |         elif main_type == 'cisco':
122 |             config['impact'] = 3
123 |         elif main_type == 'cwsandbox':
124 |             config['impact'] = 5
125 |         elif main_type == 'drone':
126 |             config['impact'] = 2
127 |         elif main_type == 'microsoft':
128 |             config['impact'] = 3
129 |         elif main_type == 'scan':
130 |             config['impact'] = 1
131 |         elif main_type == 'scan6':
132 |             config['impact'] = 1
133 |         elif main_type == 'sinkhole6':
134 |             config['impact'] = 2
135 |         elif main_type == 'sinkhole':
136 |             config['impact'] = 2
137 |         elif main_type == 'device':
138 |             config['impact'] = 1
139 |         elif main_type == 'event4':
140 |             config['impact'] = 2
141 |         elif main_type == 'event6':
142 |             config['impact'] = 2
143 |         elif main_type == 'netis':
144 |             config['impact'] = 2
145 |         else:
146 |             config['impact'] = 1
147 | 
148 |         if not (self.config_path_modules / f"{config['vendor']}_{config['name']}.json").exists():
149 |             self.logger.debug(f'Creating config file for {filename}.')
150 |             with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'w') as f:
151 |                 json.dump(config, f, indent=2)
152 |         else:
153 |             with open(self.config_path_modules / f"{config['vendor']}_{config['name']}.json", 'r') as f:
154 |                 # Validate new config file with old
155 |                 config_current = json.load(f)
156 |                 if config_current != config:
157 |                     self.logger.warning('The config file created by this script is different from the one on disk: \n{}\n{}'.format(json.dumps(config), json.dumps(config_current)))
158 |         # Init list directory
159 |         directory = self.storage_directory / config['vendor'] / config['name']
160 |         safe_create_dir(directory)
161 |         meta = directory / 'meta'
162 |         safe_create_dir(meta)
163 |         archive_dir = directory / 'archive'
164 |         safe_create_dir(archive_dir)
165 |         self.logger.debug(f'Done with config for {filename}.')
166 |         return directory
167 | 
168 |     async def download_daily_entries(self, day: Optional[Dates]=None):
169 |         await self.__build_daily_dict()
170 |         for url, filename in self.available_entries[self.__normalize_day(day)]:
171 |             storage_dir = self.__check_config(filename)
172 |             if not storage_dir:
173 |                 continue
174 |             # Check if the file we're trying to download has already been downloaded. Skip if True.
175 |             uuid = url.split('/')[-1]
176 |             if (storage_dir / 'meta' / 'last_download').exists():
177 |                 with open(storage_dir / 'meta' / 'last_download') as _fr:
178 |                     last_download_uuid = _fr.read()
179 |                 if last_download_uuid == uuid:
180 |                     self.logger.debug(f'Already downloaded: {url}.')
181 |                     continue
182 |             async with aiohttp.ClientSession() as s:
183 |                 async with s.get(url) as r:
184 |                     self.logger.info(f'Downloading {url}.')
185 |                     content = await r.content.read()
186 |                     with (storage_dir / f'{datetime.now().isoformat()}.txt').open('wb') as _fw:
187 |                         _fw.write(content)
188 |                     with (storage_dir / 'meta' / 'last_download').open('w') as _fwt:
189 |                         _fwt.write(uuid)
190 | 
191 | 
192 | class ShadowServerManager(AbstractManager):
193 | 
194 |     def __init__(self, loglevel: int=logging.INFO):
195 |         super().__init__(loglevel)
196 |         self.script_name = 'shadowserver_fetcher'
197 |         shadow_server_config_file = get_homedir() / 'config' / 'shadowserver.json'
198 |         self.config = True
199 |         if not shadow_server_config_file.exists():
200 |             self.config = False
201 |             self.logger.warning(f'No config file available {shadow_server_config_file}, the shadow server module will not be launched.')
202 |             return
203 |         with shadow_server_config_file.open() as f:
204 |             ss_config = json.load(f)
205 |         self.fetcher = ShadowServerFetcher(ss_config['user'], ss_config['password'], self.logger)
206 | 
207 |     async def _to_run_forever_async(self):
208 |         await self.fetcher.download_daily_entries()
209 | 
210 | 
211 | def main():
212 |     modules_manager = ShadowServerManager()
213 |     if modules_manager.config:
214 |         asyncio.run(modules_manager.run_async(sleep_in_sec=3600))
215 | 
216 | 
217 | if __name__ == '__main__':
218 |     main()
219 | 


--------------------------------------------------------------------------------
/bin/start.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from subprocess import Popen, run
 5 | 
 6 | from bgpranking.default import get_homedir
 7 | 
 8 | 
 9 | def main():
10 |     # Just fail if the env isn't set.
11 |     get_homedir()
12 |     print('Start backend (redis)...')
13 |     p = run(['run_backend', '--start'])
14 |     p.check_returncode()
15 |     print('done.')
16 |     Popen(['fetcher'])
17 |     # Popen(['ssfetcher'])
18 |     Popen(['parser'])
19 |     Popen(['sanitizer'])
20 |     Popen(['dbinsert'])
21 |     Popen(['ranking'])
22 |     Popen(['asn_descriptions'])
23 |     print('Start website...')
24 |     Popen(['start_website'])
25 |     print('done.')
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     main()
30 | 


--------------------------------------------------------------------------------
/bin/start_website.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import logging
 5 | from subprocess import Popen
 6 | 
 7 | from bgpranking.default import AbstractManager
 8 | from bgpranking.default import get_config, get_homedir
 9 | 
10 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
11 |                     level=logging.INFO)
12 | 
13 | 
14 | class Website(AbstractManager):
15 | 
16 |     def __init__(self, loglevel: int=logging.INFO):
17 |         super().__init__(loglevel)
18 |         self.script_name = 'website'
19 |         self.process = self._launch_website()
20 |         self.set_running()
21 | 
22 |     def _launch_website(self):
23 |         website_dir = get_homedir() / 'website'
24 |         ip = get_config('generic', 'website_listen_ip')
25 |         port = get_config('generic', 'website_listen_port')
26 |         return Popen(['gunicorn', '-w', '10',
27 |                       '--graceful-timeout', '2', '--timeout', '300',
28 |                       '-b', f'{ip}:{port}',
29 |                       '--log-level', 'info',
30 |                       'web:app'],
31 |                      cwd=website_dir)
32 | 
33 | 
34 | def main():
35 |     w = Website()
36 |     w.run(sleep_in_sec=10)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     main()
41 | 


--------------------------------------------------------------------------------
/bin/stop.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from subprocess import Popen, run
 5 | 
 6 | from redis import Redis
 7 | from redis.exceptions import ConnectionError
 8 | 
 9 | from bgpranking.default import get_homedir, get_socket_path
10 | 
11 | 
12 | def main():
13 |     get_homedir()
14 |     p = Popen(['shutdown'])
15 |     p.wait()
16 |     try:
17 |         r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
18 |         r.delete('shutdown')
19 |         print('Shutting down databases...')
20 |         p_backend = run(['run_backend', '--stop'])
21 |         p_backend.check_returncode()
22 |         print('done.')
23 |     except ConnectionError:
24 |         # Already down, skip the stacktrace
25 |         pass
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     main()
30 | 


--------------------------------------------------------------------------------
/bin/update.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import hashlib
  6 | import logging
  7 | import platform
  8 | import shlex
  9 | import subprocess
 10 | import sys
 11 | from pathlib import Path
 12 | 
 13 | from bgpranking.default import get_homedir, get_config
 14 | 
 15 | logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
 16 |                     level=logging.INFO)
 17 | 
 18 | 
 19 | def compute_hash_self():
 20 |     m = hashlib.sha256()
 21 |     with (get_homedir() / 'bin' / 'update.py').open('rb') as f:
 22 |         m.update(f.read())
 23 |         return m.digest()
 24 | 
 25 | 
 26 | def keep_going(ignore=False):
 27 |     if ignore:
 28 |         return
 29 |     keep_going = input('Continue? (y/N) ')
 30 |     if keep_going.lower() != 'y':
 31 |         print('Okay, quitting.')
 32 |         sys.exit()
 33 | 
 34 | 
 35 | def run_command(command, expect_fail: bool=False, capture_output: bool=True):
 36 |     args = shlex.split(command)
 37 |     homedir = get_homedir()
 38 |     process = subprocess.run(args, cwd=homedir, capture_output=capture_output)
 39 |     if capture_output:
 40 |         print(process.stdout.decode())
 41 |     if process.returncode and not expect_fail:
 42 |         print(process.stderr.decode())
 43 |         sys.exit()
 44 | 
 45 | 
 46 | def check_poetry_version():
 47 |     args = shlex.split("poetry self -V")
 48 |     homedir = get_homedir()
 49 |     process = subprocess.run(args, cwd=homedir, capture_output=True)
 50 |     poetry_version_str = process.stdout.decode()
 51 |     version = poetry_version_str.split()[2]
 52 |     version = version.strip(')')
 53 |     version_details = tuple(int(i) for i in version.split('.'))
 54 |     if version_details < (1, 1, 0):
 55 |         print('The project requires poetry >= 1.1.0, please update.')
 56 |         print('If you installed with "pip install --user poetry", run "pip install --user -U poetry"')
 57 |         print('If you installed via the recommended method, use "poetry self update"')
 58 |         print('More details: https://github.com/python-poetry/poetry#updating-poetry')
 59 |         sys.exit()
 60 | 
 61 | 
 62 | def main():
 63 |     parser = argparse.ArgumentParser(description='Pull latest release, update dependencies, update and validate the config files, update 3rd deps for the website.')
 64 |     parser.add_argument('--yes', default=False, action='store_true', help='Run all commands without asking.')
 65 |     args = parser.parse_args()
 66 | 
 67 |     old_hash = compute_hash_self()
 68 | 
 69 |     print('* Update repository.')
 70 |     keep_going(args.yes)
 71 |     run_command('git pull')
 72 |     new_hash = compute_hash_self()
 73 |     if old_hash != new_hash:
 74 |         print('Update script changed, please do "poetry run update"')
 75 |         sys.exit()
 76 | 
 77 |     check_poetry_version()
 78 | 
 79 |     print('* Install/update dependencies.')
 80 |     keep_going(args.yes)
 81 |     run_command('poetry install')
 82 | 
 83 |     print('* Validate configuration files.')
 84 |     keep_going(args.yes)
 85 |     run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --check')
 86 | 
 87 |     print('* Update configuration files.')
 88 |     keep_going(args.yes)
 89 |     run_command(f'poetry run {(Path("tools") / "validate_config_files.py").as_posix()} --update')
 90 | 
 91 |     print('* Restarting')
 92 |     keep_going(args.yes)
 93 |     if platform.system() == 'Windows':
 94 |         print('Restarting with poetry...')
 95 |         run_command('poetry run stop', expect_fail=True)
 96 |         run_command('poetry run start', capture_output=False)
 97 |         print('Started.')
 98 |     else:
 99 |         service = get_config('generic', 'systemd_service_name')
100 |         p = subprocess.run(["systemctl", "is-active", "--quiet", service])
101 |         try:
102 |             p.check_returncode()
103 |             print('Restarting with systemd...')
104 |             run_command(f'sudo service {service} restart')
105 |             print('done.')
106 |         except subprocess.CalledProcessError:
107 |             print('Restarting with poetry...')
108 |             run_command('poetry run stop', expect_fail=True)
109 |             run_command('poetry run start', capture_output=False)
110 |             print('Started.')
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------
/cache/run_redis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | if [ -f  ../../valkey/src/valkey-server ]; then
 7 |     ../../valkey/src/valkey-server ./cache.conf
 8 | elif [ -f ../../redis/src/redis-server ]; then
 9 |     ../../redis/src/redis-server ./cache.conf
10 | else
11 |     echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2
12 |     /usr/bin/redis-server ./cache.conf
13 | fi
14 | 


--------------------------------------------------------------------------------
/cache/shutdown_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # set -e
4 | set -x
5 | 
6 | ../../redis/src/redis-cli -s ./cache.sock shutdown
7 | 


--------------------------------------------------------------------------------
/config/generic.json.sample:
--------------------------------------------------------------------------------
 1 | {
 2 |     "loglevel": "INFO",
 3 |     "website_listen_ip": "0.0.0.0",
 4 |     "website_listen_port": 5005,
 5 |     "systemd_service_name": "bgpranking",
 6 |     "storage_db_hostname": "127.0.0.1",
 7 |     "storage_db_port": 5188,
 8 |     "ranking_db_hostname": "127.0.0.1",
 9 |     "ranking_db_port": 5189,
10 |     "ipasnhistory_url": "https://ipasnhistory.circl.lu/",
11 |     "_notes": {
12 |         "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
13 |         "website_listen_ip": "IP Flask will listen on. Defaults to 0.0.0.0, meaning all interfaces.",
14 |         "website_listen_port": "Port Flask will listen on.",
15 |         "systemd_service_name": "(Optional) Name of the systemd service if your project has one.",
16 |         "storage_db_hostname": "Hostname of the storage database (kvrocks)",
17 |         "storage_db_port": "Port of the storage database (kvrocks)",
18 |         "ranking_db_hostname": "Hostname of the ranking database (kvrocks)",
19 |         "ranking_db_port": "Port of the ranking database (kvrocks)",
20 |         "ipasnhistory_url": "URL of the IP ASN History service, defaults to the public one."
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/config/modules/Alienvault.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://reputation.alienvault.com/reputation.generic",
3 |   "vendor": "alienvault",
4 |   "name": "reputation.generic",
5 |   "impact": 0.01
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeApache.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/apache.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "apache",
5 |   "impact": 0.1
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeBots.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/bots.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "bots",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeFTP.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/ftp.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "ftp",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeIMAP.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/imap.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "imap",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeMail.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/mail.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "mail",
5 |   "impact": 0.1
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeSIP.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/sip.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "sip",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeSSH.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/ssh.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "ssh",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/BlocklistDeStrong.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.blocklist.de/lists/strongips.txt",
3 |   "vendor": "blocklist_de",
4 |   "name": "strong",
5 |   "impact": 6
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/CIArmy.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.ciarmy.com/list/ci-badguys.txt",
3 |   "vendor": "ciarmy",
4 |   "name": "ip",
5 |   "impact": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/CleanMXMalwares.json:
--------------------------------------------------------------------------------
1 | {
2 |   "vendor": "cleanmx",
3 |   "name": "malwares",
4 |   "impact": 5
5 | }
6 | 


--------------------------------------------------------------------------------
/config/modules/CleanMXPhishing.json:
--------------------------------------------------------------------------------
1 | {
2 |   "vendor": "cleanmx",
3 |   "name": "phishing",
4 |   "impact": 5
5 | }
6 | 


--------------------------------------------------------------------------------
/config/modules/CleanMXPortals.json:
--------------------------------------------------------------------------------
1 | {
2 |   "vendor": "cleanmx",
3 |   "name": "portals",
4 |   "impact": 5
5 | }
6 | 


--------------------------------------------------------------------------------
/config/modules/CoinBlockerLists.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://zerodot1.gitlab.io/CoinBlockerLists/MiningServerIPList.txt",
3 |   "vendor": "ZeroDot1",
4 |   "name": "CoinBlockerLists",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/DshieldDaily.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.dshield.org/feeds/daily_sources",
3 |   "vendor": "dshield",
4 |   "name": "daily",
5 |   "impact": 0.1,
6 |   "parser": ".parsers.dshield"
7 | }
8 | 


--------------------------------------------------------------------------------
/config/modules/DshieldTopIPs.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.dshield.org/feeds/topips.txt",
3 |   "vendor": "dshield",
4 |   "name": "topips",
5 |   "impact": 1
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/EmergingThreatsCompromized.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://rules.emergingthreats.net/blockrules/compromised-ips.txt",
3 |   "vendor": "emergingthreats",
4 |   "name": "compromized",
5 |   "impact": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/FeodotrackerIPBlockList.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://feodotracker.abuse.ch/downloads/ipblocklist.txt",
3 |   "vendor": "feodotracker",
4 |   "name": "ipblocklist",
5 |   "impact": 5,
6 |   "parser": ".parsers.abusech_feodo"
7 | }
8 | 


--------------------------------------------------------------------------------
/config/modules/Malc0de.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://malc0de.com/bl/IP_Blacklist.txt",
3 |   "vendor": "malc0de",
4 |   "name": "blocklist",
5 |   "impact": 5,
6 |   "parser": ".parsers.malc0de"
7 | }
8 | 


--------------------------------------------------------------------------------
/config/modules/MalwareDomainListIP.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "http://www.malwaredomainlist.com/hostslist/ip.txt",
3 |   "vendor": "malwaredomainlist",
4 |   "name": "ip",
5 |   "impact": 5
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/SSLBlacklist.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://sslbl.abuse.ch/blacklist/sslipblacklist.txt",
3 |   "vendor": "abuse.ch",
4 |   "name": "sslblacklist",
5 |   "impact": 7,
6 |   "parser": ".parsers.abusech"
7 | }
8 | 


--------------------------------------------------------------------------------
/config/modules/ThreatFoxIOC.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://threatfox.abuse.ch/export/json/ip-port/recent/",
3 |   "vendor": "abuse.ch",
4 |   "name": "threatfox",
5 |   "impact": 5,
6 |   "parser": ".parsers.abusech_threatfox"
7 | }
8 | 


--------------------------------------------------------------------------------
/config/modules/greensnow.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://blocklist.greensnow.co/greensnow.txt",
3 |   "vendor": "greensnow",
4 |   "name": "blocklist",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/jq_all_the_things.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | # Seeds sponge, from moreutils
 7 | 
 8 | for dir in ./*.json
 9 | do
10 |     cat ${dir} | jq . | sponge ${dir}
11 | done
12 | 


--------------------------------------------------------------------------------
/config/modules/module.schema:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/schema#",
 3 |   "title": "BGP Ranking NG module",
 4 |   "id": "https://www.github.com/CIRCL/bgpranking-ng/modules.json",
 5 |   "type": "object",
 6 |   "additionalProperties": false,
 7 |   "properties": {
 8 |       "url": {
 9 |         "type": "string"
10 |       },
11 |       "vendor": {
12 |         "type": "string"
13 |       },
14 |       "name": {
15 |         "type": "string"
16 |       },
17 |       "impact": {
18 |         "type": "number"
19 |       },
20 |       "parser": {
21 |         "type": "string"
22 |       },
23 |       "tags": {
24 |         "type": "array",
25 |         "uniqueItems": true,
26 |         "items": {
27 |           "type": "string"
28 |         }
29 |       }
30 |   },
31 |   "required": [
32 |     "name",
33 |     "vendor",
34 |     "impact"
35 |   ]
36 | }
37 | 


--------------------------------------------------------------------------------
/config/modules/pop3gropers.json:
--------------------------------------------------------------------------------
1 | {
2 |   "url": "https://home.nuug.no/~peter/pop3gropers.txt",
3 |   "vendor": "bsdly",
4 |   "name": "pop3gropers",
5 |   "impact": 3
6 | }
7 | 


--------------------------------------------------------------------------------
/config/modules/shadowserver_only.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | set -x
5 | 
6 | find .  -maxdepth 1 -type f -name "*.json" ! -iname "shadowserver*.json" -delete
7 | 


--------------------------------------------------------------------------------
/config/modules/validate_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | # remove the exec flag on the json files
 7 | find -name "*.json" -exec chmod -x "{}" \;
 8 | 
 9 | diffs=`git status --porcelain | wc -l`
10 | 
11 | if ! [ $diffs -eq 0 ]; then
12 |     echo "Please make sure you run remove the executable flag on the json files before commiting: find -name "*.json" -exec chmod -x \"{}\" \\;"
13 |     # exit 1
14 | fi
15 | 
16 | ./jq_all_the_things.sh
17 | 
18 | diffs=`git status --porcelain | wc -l`
19 | 
20 | if ! [ $diffs -eq 0 ]; then
21 | 	echo "Please make sure you run ./jq_all_the_things.sh before commiting."
22 | 	# exit 1
23 | fi
24 | 
25 | for dir in ./*.json
26 | do
27 |   echo -n "${dir}: "
28 |   jsonschema -i ${dir} module.schema
29 |   echo ''
30 | done
31 | 


--------------------------------------------------------------------------------
/config/shadowserver.json.sample:
--------------------------------------------------------------------------------
1 | {
2 |   "user": "[USERNAME]",
3 |   "password": "[PASSWORD]"
4 | }
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "bgpranking"
 3 | version = "2.0"
 4 | description = "BGP Ranking is a software to rank AS numbers based on their malicious activities."
 5 | authors = ["Raphaël Vinot <raphael.vinot@circl.lu>"]
 6 | license = "AGPLv3"
 7 | 
 8 | [tool.poetry.scripts]
 9 | start = "bin.start:main"
10 | stop = "bin.stop:main"
11 | update = "bin.update:main"
12 | shutdown = "bin.shutdown:main"
13 | run_backend = "bin.run_backend:main"
14 | start_website = "bin.start_website:main"
15 | 
16 | archiver = "bin.archiver:main"
17 | asn_descriptions = "bin.asn_descriptions:main"
18 | dbinsert = "bin.dbinsert:main"
19 | fetcher = "bin.fetcher:main"
20 | parser = "bin.parser:main"
21 | ranking = "bin.ranking:main"
22 | sanitizer = "bin.sanitizer:main"
23 | ssfetcher = "bin.ssfetcher:main"
24 | 
25 | 
26 | [tool.poetry.dependencies]
27 | python = "^3.8.1"
28 | redis = {version = "^5.0.7", extras = ["hiredis"]}
29 | flask-restx = "^1.3.0"
30 | gunicorn = "^22.0.0"
31 | python-dateutil = "^2.9.0.post0"
32 | pyipasnhistory = "^2.1.2"
33 | pycountry = "^23.12.11"
34 | beautifulsoup4 = "^4.12.3"
35 | aiohttp = "^3.9.5"
36 | Bootstrap-Flask = "^2.4.0"
37 | pid = "^3.0.4"
38 | pybgpranking2 = "^2.0.1"
39 | 
40 | [tool.poetry.dev-dependencies]
41 | ipython = [
42 |     {version = "<8.13.0", python = "<3.9"},
43 |     {version = "^8.18.0", python = ">=3.9"},
44 |     {version = "^8.24.0", python = ">=3.10"}
45 | ]
46 | mypy = "^1.10.1"
47 | types-setuptools = "^70.2.0.20240704"
48 | types-redis = "^4.6.0.20240425"
49 | types-requests = "^2.32.0.20240622"
50 | types-python-dateutil = "^2.9.0.20240316"
51 | 
52 | [build-system]
53 | requires = ["poetry-core"]
54 | build-backend = "poetry.core.masonry.api"
55 | 
56 | [tool.mypy]
57 | python_version = 3.8
58 | check_untyped_defs = true
59 | ignore_errors = false
60 | ignore_missing_imports = false
61 | strict_optional = true
62 | no_implicit_optional = true
63 | warn_unused_ignores = true
64 | warn_redundant_casts = true
65 | warn_unused_configs = true
66 | warn_unreachable = true
67 | 
68 | show_error_context = true
69 | pretty = true
70 | 


--------------------------------------------------------------------------------
/ranking/run_kvrocks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | set -x
5 | 
6 | ../../kvrocks/build/kvrocks -c kvrocks.conf
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | setup(
 7 |     name='bgpranking',
 8 |     version='0.1',
 9 |     author='Raphaël Vinot',
10 |     author_email='raphael.vinot@circl.lu',
11 |     maintainer='Raphaël Vinot',
12 |     url='https://github.com/D4-project/BGP-Ranking',
13 |     description='BGP Ranking, the new one.',
14 |     packages=['bgpranking'],
15 |     scripts=['bin/archiver.py', 'bin/dbinsert.py', 'bin/fetcher.py', 'bin/parser.py',
16 |              'bin/sanitizer.py', 'bin/run_backend.py', 'bin/ssfetcher.py', 'bin/start_website.py',
17 |              'bin/monitor.py', 'bin/ranking.py', 'bin/asn_descriptions.py',
18 |              'bin/manual_ranking.py',
19 |              'bin/start.py', 'bin/stop.py', 'bin/shutdown.py'],
20 |     classifiers=[
21 |         'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
22 |         'Development Status :: 3 - Alpha',
23 |         'Environment :: Console',
24 |         'Operating System :: POSIX :: Linux',
25 |         'Intended Audience :: Science/Research',
26 |         'Intended Audience :: Telecommunications Industry',
27 |         'Intended Audience :: Information Technology',
28 |         'Programming Language :: Python :: 3',
29 |         'Topic :: Security',
30 |         'Topic :: Internet',
31 |     ],
32 |     include_package_data=True,
33 |     package_data={'config': ['config/*/*.conf',
34 |                              'config/modules/*.json']},
35 | )
36 | 


--------------------------------------------------------------------------------
/storage/run_kvrocks.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | set -x
5 | 
6 | ../../kvrocks/build/kvrocks -c kvrocks.conf
7 | 


--------------------------------------------------------------------------------
/temp/run_redis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | set -x
 5 | 
 6 | if [ -f  ../../valkey/src/valkey-server ]; then
 7 |     ../../valkey/src/valkey-server ./intake.conf
 8 |     ../../valkey/src/valkey-server ./prepare.conf
 9 | elif [ -f ../../redis/src/redis-server ]; then
10 |     ../../redis/src/redis-server ./intake.conf
11 |     ../../redis/src/redis-server ./prepare.conf
12 | else
13 |     echo "Warning: using system redis-server. Valkey-server or redis-server from source is recommended." >&2
14 |     /usr/bin/redis-server ./intake.conf
15 |     /usr/bin/redis-server ./prepare.conf
16 | fi
17 | 


--------------------------------------------------------------------------------
/temp/shutdown_redis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # set -e
4 | set -x
5 | 
6 | ../../redis/src/redis-cli -s ./intake.sock shutdown
7 | ../../redis/src/redis-cli -s ./prepare.sock shutdown
8 | 


--------------------------------------------------------------------------------
/tools/3rdparty.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import requests
 5 | 
 6 | from bgpranking.default import get_homedir
 7 | 
 8 | d3js_version = '7'
 9 | bootstrap_select_version = "1.14.0-beta3"
10 | jquery_version = "3.7.1"
11 | 
12 | if __name__ == '__main__':
13 |     dest_dir = get_homedir() / 'website' / 'web' / 'static'
14 | 
15 |     d3 = requests.get(f'https://d3js.org/d3.v{d3js_version}.min.js')
16 |     with (dest_dir / f'd3.v{d3js_version}.min.js').open('wb') as f:
17 |         f.write(d3.content)
18 |         print(f'Downloaded d3js v{d3js_version}.')
19 | 
20 |     bootstrap_select_js = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/js/bootstrap-select.min.js')
21 |     with (dest_dir / 'bootstrap-select.min.js').open('wb') as f:
22 |         f.write(bootstrap_select_js.content)
23 |         print(f'Downloaded bootstrap_select js v{bootstrap_select_version}.')
24 | 
25 |     bootstrap_select_css = requests.get(f'https://cdn.jsdelivr.net/npm/bootstrap-select@{bootstrap_select_version}/dist/css/bootstrap-select.min.css')
26 |     with (dest_dir / 'bootstrap-select.min.css').open('wb') as f:
27 |         f.write(bootstrap_select_css.content)
28 |         print(f'Downloaded bootstrap_select css v{bootstrap_select_version}.')
29 | 
30 |     jquery = requests.get(f'https://code.jquery.com/jquery-{jquery_version}.min.js')
31 |     with (dest_dir / 'jquery.min.js').open('wb') as f:
32 |         f.write(jquery.content)
33 |         print(f'Downloaded jquery v{jquery_version}.')
34 | 
35 |     print('All 3rd party modules for the website were downloaded.')
36 | 


--------------------------------------------------------------------------------
/tools/clear_prepare_db.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import uuid
 4 | 
 5 | from redis import Redis
 6 | from bgpranking.default import get_socket_path
 7 | 
 8 | redis_sanitized = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
 9 | to_delete = []
10 | for name in redis_sanitized.scan_iter(_type='HASH', count=100):
11 |     try:
12 |         uuid.UUID(name)
13 |     except Exception as e:
14 |         continue
15 |     if not redis_sanitized.sismember('to_insert', name):
16 |         to_delete.append(name)
17 |     if len(to_delete) >= 100000:
18 |         redis_sanitized.delete(*to_delete)
19 |         to_delete = []
20 | if to_delete: 
21 |     redis_sanitized.delete(*to_delete)
22 | 


--------------------------------------------------------------------------------
/tools/migrate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from datetime import datetime
 5 | from typing import Set
 6 | 
 7 | from redis import Redis
 8 | 
 9 | redis_src = Redis(unix_socket_path='../storage/storage.sock', db=0)
10 | redis_dst = Redis('127.0.0.1', 5188)
11 | 
12 | chunk_size = 100000
13 | 
14 | 
15 | def process_chunk(src: Redis, dst: Redis, keys: Set[str]):
16 |     src_pipeline = src.pipeline()
17 |     [src_pipeline.type(key) for key in keys]
18 |     to_process = {key: key_type for key, key_type in zip(keys, src_pipeline.execute())}
19 | 
20 |     src_pipeline = src.pipeline()
21 |     for key, key_type in to_process.items():
22 |         if key_type == b"string":
23 |             src_pipeline.get(key)
24 |         elif key_type == b"list":
25 |             raise Exception(f'Lists should not be used: {key}.')
26 |         elif key_type == b"set":
27 |             src_pipeline.smembers(key)
28 |         elif key_type == b"zset":
29 |             src_pipeline.zrangebyscore(key, '-Inf', '+Inf', withscores=True)
30 |         elif key_type == b"hash":
31 |             src_pipeline.hgetall(key)
32 |         else:
33 |             raise Exception(f'{key_type} not supported {key}.')
34 | 
35 |     dest_pipeline = dst.pipeline()
36 |     for key, content in zip(to_process.keys(), src_pipeline.execute()):
37 |         if to_process[key] == b"string":
38 |             dest_pipeline.set(key, content)
39 |         elif to_process[key] == b"set":
40 |             dest_pipeline.sadd(key, *content)
41 |         elif to_process[key] == b"zset":
42 |             dest_pipeline.zadd(key, {value: rank for value, rank in content})
43 |         elif to_process[key] == b"hash":
44 |             dest_pipeline.hmset(key, content)
45 | 
46 |     dest_pipeline.execute()
47 | 
48 | 
49 | def migrate(src: Redis, dst: Redis):
50 |     keys = set()
51 |     pos = 0
52 |     for key in src.scan_iter(count=chunk_size, match='2017*'):
53 |         keys.add(key)
54 | 
55 |         if len(keys) == chunk_size:
56 |             process_chunk(src, dst, keys)
57 |             pos += len(keys)
58 |             print(f'{datetime.now()} - {pos} keys done.')
59 |             keys = set()
60 | 
61 |     # migrate remaining keys
62 |     process_chunk(src, dst, keys)
63 |     pos += len(keys)
64 |     print(f'{datetime.now()} - {pos} keys done.')
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     migrate(redis_src, redis_dst)
69 | 


--------------------------------------------------------------------------------
/tools/monitoring.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import json
 4 | 
 5 | from redis import Redis
 6 | from bgpranking.default import get_socket_path
 7 | from bgpranking.helpers import get_ipasn
 8 | 
 9 | 
10 | class Monitor():
11 | 
12 |     def __init__(self):
13 |         self.intake = Redis(unix_socket_path=get_socket_path('intake'), db=0, decode_responses=True)
14 |         self.sanitize = Redis(unix_socket_path=get_socket_path('prepare'), db=0, decode_responses=True)
15 |         self.cache = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
16 |         self.ipasn = get_ipasn()
17 | 
18 |     def get_values(self):
19 |         ips_in_intake = self.intake.scard('intake')
20 |         ready_to_insert = self.sanitize.scard('to_insert')
21 |         ipasn_meta = self.ipasn.meta()
22 |         if len(ipasn_meta['cached_dates']['caida']['v4']['cached']) > 15:
23 |             ipasn_meta['cached_dates']['caida']['v4']['cached'] = 'Too many entries'
24 |         if len(ipasn_meta['cached_dates']['caida']['v6']['cached']) > 15:
25 |             ipasn_meta['cached_dates']['caida']['v6']['cached'] = 'Too many entries'
26 |         return json.dumps({'Non-parsed IPs': ips_in_intake, 'Parsed IPs': ready_to_insert,
27 |                            'running': self.cache.zrangebyscore('running', '-inf', '+inf', withscores=True),
28 |                            'IPASN History': ipasn_meta},
29 |                           indent=2)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     m = Monitor()
34 |     print(m.get_values())
35 | 


--------------------------------------------------------------------------------
/tools/validate_config_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import json
 5 | import logging
 6 | import argparse
 7 | 
 8 | from bgpranking.default import get_homedir
 9 | 
10 | 
11 | def validate_generic_config_file():
12 |     user_config = get_homedir() / 'config' / 'generic.json'
13 |     with user_config.open() as f:
14 |         generic_config = json.load(f)
15 |     with (get_homedir() / 'config' / 'generic.json.sample').open() as f:
16 |         generic_config_sample = json.load(f)
17 |     # Check documentation
18 |     for key in generic_config_sample.keys():
19 |         if key == '_notes':
20 |             continue
21 |         if key not in generic_config_sample['_notes']:
22 |             raise Exception(f'###### - Documentation missing for {key}')
23 | 
24 |     # Check all entries in the sample files are in the user file, and they have the same type
25 |     for key in generic_config_sample.keys():
26 |         if key == '_notes':
27 |             continue
28 |         if generic_config.get(key) is None:
29 |             logger.warning(f'Entry missing in user config file: {key}. Will default to: {generic_config_sample[key]}')
30 |             continue
31 |         if not isinstance(generic_config[key], type(generic_config_sample[key])):
32 |             raise Exception(f'Invalid type for {key}. Got: {type(generic_config[key])} ({generic_config[key]}), expected: {type(generic_config_sample[key])} ({generic_config_sample[key]})')
33 | 
34 |         if isinstance(generic_config[key], dict):
35 |             # Check entries
36 |             for sub_key in generic_config_sample[key].keys():
37 |                 if sub_key not in generic_config[key]:
38 |                     raise Exception(f'{sub_key} is missing in generic_config[key]. Default from sample file: {generic_config_sample[key][sub_key]}')
39 |                 if not isinstance(generic_config[key][sub_key], type(generic_config_sample[key][sub_key])):
40 |                     raise Exception(f'Invalid type for {sub_key} in {key}. Got: {type(generic_config[key][sub_key])} ({generic_config[key][sub_key]}), expected: {type(generic_config_sample[key][sub_key])} ({generic_config_sample[key][sub_key]})')
41 | 
42 |     # Make sure the user config file doesn't have entries missing in the sample config
43 |     for key in generic_config.keys():
44 |         if key not in generic_config_sample:
45 |             raise Exception(f'{key} is missing in the sample config file. You need to compare {user_config} with {user_config}.sample.')
46 | 
47 |     return True
48 | 
49 | 
50 | def update_user_configs():
51 |     for file_name in ['generic']:
52 |         with (get_homedir() / 'config' / f'{file_name}.json').open() as f:
53 |             try:
54 |                 generic_config = json.load(f)
55 |             except Exception:
56 |                 generic_config = {}
57 |         with (get_homedir() / 'config' / f'{file_name}.json.sample').open() as f:
58 |             generic_config_sample = json.load(f)
59 | 
60 |         has_new_entry = False
61 |         for key in generic_config_sample.keys():
62 |             if key == '_notes':
63 |                 continue
64 |             if generic_config.get(key) is None:
65 |                 print(f'{key} was missing in {file_name}, adding it.')
66 |                 print(f"Description: {generic_config_sample['_notes'][key]}")
67 |                 generic_config[key] = generic_config_sample[key]
68 |                 has_new_entry = True
69 |             elif isinstance(generic_config[key], dict):
70 |                 for sub_key in generic_config_sample[key].keys():
71 |                     if sub_key not in generic_config[key]:
72 |                         print(f'{sub_key} was missing in {key} from {file_name}, adding it.')
73 |                         generic_config[key][sub_key] = generic_config_sample[key][sub_key]
74 |                         has_new_entry = True
75 |         if has_new_entry:
76 |             with (get_homedir() / 'config' / f'{file_name}.json').open('w') as fw:
77 |                 json.dump(generic_config, fw, indent=2, sort_keys=True)
78 |     return has_new_entry
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     logger = logging.getLogger('Config validator')
83 |     parser = argparse.ArgumentParser(description='Check the config files.')
84 |     parser.add_argument('--check', default=False, action='store_true', help='Check if the sample config and the user config are in-line')
85 |     parser.add_argument('--update', default=False, action='store_true', help='Update the user config with the entries from the sample config if entries are missing')
86 |     args = parser.parse_args()
87 | 
88 |     if args.check:
89 |         if validate_generic_config_file():
90 |             print(f"The entries in {get_homedir() / 'config' / 'generic.json'} are valid.")
91 | 
92 |     if args.update:
93 |         if not update_user_configs():
94 |             print(f"No updates needed in {get_homedir() / 'config' / 'generic.json'}.")
95 | 


--------------------------------------------------------------------------------
/website/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/__init__.py


--------------------------------------------------------------------------------
/website/readme.md:
--------------------------------------------------------------------------------
 1 | # Usage
 2 | 
 3 | Run:
 4 | 
 5 | ```bash
 6 | start_website.py
 7 | ```
 8 | 
 9 | Un debug mode:
10 | 
11 | ```bash
12 | export FLASK_APP=${BGPRANKING_HOME}/website/web/__init__.py
13 | flask run -h 0.0.0.0 -p 5005
14 | ```
15 | 
16 | 


--------------------------------------------------------------------------------
/website/web/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import pkg_resources
  5 | 
  6 | from collections import defaultdict
  7 | from datetime import date, timedelta
  8 | from typing import Dict, Any, Tuple, List, Optional
  9 | 
 10 | from flask import Flask, render_template, request, session, redirect, url_for
 11 | from flask_bootstrap import Bootstrap5  # type: ignore
 12 | from flask_restx import Api  # type: ignore
 13 | 
 14 | from bgpranking.bgpranking import BGPRanking
 15 | from bgpranking.helpers import get_ipasn
 16 | 
 17 | from .genericapi import api as generic_api
 18 | from .helpers import get_secret_key, load_session, get_country_codes
 19 | from .proxied import ReverseProxied
 20 | 
 21 | app = Flask(__name__)
 22 | 
 23 | app.wsgi_app = ReverseProxied(app.wsgi_app)  # type: ignore
 24 | 
 25 | app.config['SECRET_KEY'] = get_secret_key()
 26 | 
 27 | Bootstrap5(app)
 28 | app.config['BOOTSTRAP_SERVE_LOCAL'] = True
 29 | 
 30 | bgpranking = BGPRanking()
 31 | 
 32 | 
 33 | # ############# Web UI #############
 34 | 
 35 | @app.route('/', methods=['GET', 'POST'])
 36 | def index():
 37 |     if request.method == 'HEAD':
 38 |         # Just returns ack if the webserver is running
 39 |         return 'Ack'
 40 |     load_session()
 41 |     sources = bgpranking.get_sources(date=session['date'])['response']
 42 |     session.pop('asn', None)
 43 |     session.pop('country', None)
 44 |     ranks = bgpranking.asns_global_ranking(limit=100, **session)['response']
 45 |     r = [(asn, rank, bgpranking.get_asn_descriptions(int(asn))['response']) for asn, rank in ranks]
 46 |     return render_template('index.html', ranks=r, sources=sources, countries=get_country_codes(), **session)
 47 | 
 48 | 
 49 | @app.route('/asn', methods=['GET', 'POST'])
 50 | def asn_details():
 51 |     load_session()
 52 |     if 'asn' not in session:
 53 |         return redirect(url_for('index'))
 54 |     asn_descriptions = bgpranking.get_asn_descriptions(asn=session['asn'], all_descriptions=True)['response']
 55 |     sources = bgpranking.get_sources(date=session['date'])['response']
 56 |     prefix = session.pop('prefix', None)
 57 |     ranks = bgpranking.asn_details(**session)['response']
 58 |     if prefix:
 59 |         prefix_ips = bgpranking.get_prefix_ips(prefix=prefix, **session)['response']
 60 |         prefix_ips = [(ip, sorted(sources)) for ip, sources in prefix_ips.items()]
 61 |         prefix_ips.sort(key=lambda entry: len(entry[1]), reverse=True)
 62 |     else:
 63 |         prefix_ips = []
 64 |     return render_template('asn.html', sources=sources, ranks=ranks,
 65 |                            prefix_ips=prefix_ips, asn_descriptions=asn_descriptions, **session)
 66 | 
 67 | 
 68 | @app.route('/country', methods=['GET', 'POST'])
 69 | def country():
 70 |     load_session()
 71 |     sources = bgpranking.get_sources(date=session['date'])['response']
 72 |     return render_template('country.html', sources=sources, countries=get_country_codes(), **session)
 73 | 
 74 | 
 75 | @app.route('/country_history_callback', methods=['GET', 'POST'])
 76 | def country_history_callback():
 77 |     history_data: Dict[str, Tuple[str, str, List[Any]]]
 78 |     history_data = request.get_json(force=True)
 79 |     to_display = []
 80 |     mapping: Dict[str, Any] = defaultdict(dict)
 81 |     dates = []
 82 |     all_asns = set([])
 83 |     for country, foo in history_data.items():
 84 |         for d, r_sum, details in foo:
 85 |             dates.append(d)
 86 |             for detail in details:
 87 |                 asn, r = detail
 88 |                 all_asns.add(asn)
 89 |                 mapping[asn][d] = r
 90 | 
 91 |         to_display_temp = [[country] + dates]
 92 |         for a in sorted(list(all_asns), key=int):
 93 |             line = [a]
 94 |             for d in dates:
 95 |                 if mapping[a].get(d) is not None:
 96 |                     line.append(round(mapping[a].get(d), 3))
 97 |                 else:
 98 |                     line.append('N/A')
 99 |             to_display_temp.append(line)
100 |         to_display.append(to_display_temp)
101 |     return render_template('country_asn_map.html', to_display=to_display)
102 | 
103 | 
104 | @app.route('/ipasn', methods=['GET', 'POST'])
105 | def ipasn():
106 |     d: Optional[Dict] = None
107 |     if request.method == 'POST':
108 |         d = request.form
109 |     elif request.method == 'GET':
110 |         d = request.args
111 | 
112 |     if not d or 'ip' not in d:
113 |         return render_template('ipasn.html')
114 |     else:
115 |         if isinstance(d['ip'], list):
116 |             ip = d['ip'][0]
117 |         else:
118 |             ip = d['ip']
119 |     ipasn = get_ipasn()
120 |     response = ipasn.query(first=(date.today() - timedelta(days=60)).isoformat(),
121 |                            aggregate=True, ip=ip)
122 |     for r in response['response']:
123 |         r['asn_descriptions'] = []
124 |         asn_descriptions = bgpranking.get_asn_descriptions(asn=r['asn'], all_descriptions=True)['response']
125 |         for timestamp in sorted(asn_descriptions.keys()):
126 |             if r['first_seen'] <= timestamp <= r['last_seen']:
127 |                 r['asn_descriptions'].append(asn_descriptions[timestamp])
128 | 
129 |         if not r['asn_descriptions'] and timestamp <= r['last_seen']:
130 |             r['asn_descriptions'].append(asn_descriptions[timestamp])
131 | 
132 |     return render_template('ipasn.html', ipasn_details=response['response'],
133 |                            **response['meta'])
134 | 
135 | 
136 | # ############# Web UI #############
137 | 
138 | # Query API
139 | 
140 | api = Api(app, title='BGP Ranking API',
141 |           description='API to query BGP Ranking.',
142 |           doc='/doc/',
143 |           version=pkg_resources.get_distribution('bgpranking').version)
144 | 
145 | api.add_namespace(generic_api)
146 | 


--------------------------------------------------------------------------------
/website/web/genericapi.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from typing import Dict, Any, Union
  5 | from urllib.parse import urljoin
  6 | 
  7 | import requests
  8 | 
  9 | from flask import request, session
 10 | from flask_restx import Namespace, Resource, fields  # type: ignore
 11 | 
 12 | from bgpranking.default import get_config
 13 | from bgpranking.bgpranking import BGPRanking
 14 | 
 15 | from .helpers import load_session
 16 | 
 17 | api = Namespace('BGP Ranking API', description='API to query BGP Ranking.', path='/')
 18 | 
 19 | bgpranking: BGPRanking = BGPRanking()
 20 | 
 21 | 
 22 | @api.route('/redis_up')
 23 | @api.doc(description='Check if redis is up and running')
 24 | class RedisUp(Resource):
 25 | 
 26 |     def get(self):
 27 |         return bgpranking.check_redis_up()
 28 | 
 29 | 
 30 | @api.route('/ipasn_history/')
 31 | @api.route('/ipasn_history/<path:path>')
 32 | class IPASNProxy(Resource):
 33 | 
 34 |     def _proxy_url(self):
 35 |         if request.full_path[-1] == '?':
 36 |             full_path = request.full_path[:-1]
 37 |         else:
 38 |             full_path = request.full_path
 39 |         path_for_ipasnhistory = full_path.replace('/ipasn_history/', '')
 40 |         if path_for_ipasnhistory.startswith('?'):
 41 |             path_for_ipasnhistory = path_for_ipasnhistory.replace('?', 'ip?')
 42 |         if not path_for_ipasnhistory:
 43 |             path_for_ipasnhistory = 'ip'
 44 |         return urljoin(get_config('generic', 'ipasnhistory_url'), path_for_ipasnhistory)
 45 | 
 46 |     def get(self, path=''):
 47 |         url = self._proxy_url()
 48 |         return requests.get(url).json()
 49 | 
 50 |     def post(self, path=''):
 51 |         url = self._proxy_url()
 52 |         return requests.post(url, data=request.data).json()
 53 | 
 54 | 
 55 | # TODO: Add other parameters for asn_rank
 56 | asn_query_fields = api.model('ASNQueryFields', {
 57 |     'asn': fields.String(description='The Autonomus System Number to search', required=True)
 58 | })
 59 | 
 60 | 
 61 | @api.route('/json/asn')
 62 | class ASNRank(Resource):
 63 | 
 64 |     @api.doc(body=asn_query_fields)
 65 |     def post(self):
 66 |         # TODO
 67 |         # * Filter on date => if only returning one descr, return the desription at that date
 68 |         query: Dict[str, Any] = request.get_json(force=True)
 69 |         to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
 70 |         if 'asn' not in query:
 71 |             to_return['error'] = f'You need to pass an asn - {query}'
 72 |             return to_return
 73 | 
 74 |         asn_description_query = {'asn': query['asn']}
 75 |         responses = bgpranking.get_asn_descriptions(**asn_description_query)['response']
 76 |         to_return['response']['asn_description'] = responses  # type: ignore
 77 | 
 78 |         asn_rank_query = {'asn': query['asn']}
 79 |         if 'date' in query:
 80 |             asn_rank_query['date'] = query['date']
 81 |         if 'source' in query:
 82 |             asn_rank_query['source'] = query['source']
 83 |         else:
 84 |             asn_rank_query['with_position'] = True
 85 |         if 'ipversion' in query:
 86 |             asn_rank_query['ipversion'] = query['ipversion']
 87 | 
 88 |         to_return['response']['ranking'] = bgpranking.asn_rank(**asn_rank_query)['response']  # type: ignore
 89 |         return to_return
 90 | 
 91 | 
 92 | asn_descr_fields = api.model('ASNDescriptionsFields', {
 93 |     'asn': fields.String(description='The Autonomus System Number to search', required=True),
 94 |     'all_descriptions': fields.Boolean(description='If true, returns all the descriptions instead of only the last one', default=False)
 95 | })
 96 | 
 97 | 
 98 | @api.route('/json/asn_descriptions')
 99 | class ASNDescription(Resource):
100 | 
101 |     @api.doc(body=asn_descr_fields)
102 |     def post(self):
103 |         query: Dict = request.get_json(force=True)
104 |         to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
105 |         if 'asn' not in query:
106 |             to_return['error'] = f'You need to pass an asn - {query}'
107 |             return to_return
108 | 
109 |         to_return['response']['asn_descriptions'] = bgpranking.get_asn_descriptions(**query)['response']  # type: ignore
110 |         return to_return
111 | 
112 | 
113 | # TODO: Add other parameters for get_asn_history
114 | asn_history_fields = api.model('ASNQueryFields', {
115 |     'asn': fields.String(description='The Autonomus System Number to search', required=True)
116 | })
117 | 
118 | 
119 | @api.route('/json/asn_history')
120 | class ASNHistory(Resource):
121 | 
122 |     def get(self):
123 |         load_session()
124 |         if 'asn' in session:
125 |             return bgpranking.get_asn_history(**session)
126 | 
127 |     @api.doc(body=asn_history_fields)
128 |     def post(self):
129 |         query: Dict = request.get_json(force=True)
130 |         to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
131 |         if 'asn' not in query:
132 |             to_return['error'] = f'You need to pass an asn - {query}'
133 |             return to_return
134 | 
135 |         to_return['response']['asn_history'] = bgpranking.get_asn_history(**query)['response']  # type: ignore
136 |         return to_return
137 | 
138 | 
139 | # TODO: Add other parameters for country_history
140 | coutry_history_fields = api.model('CountryHistoryFields', {
141 |     'country': fields.String(description='The Country Code', required=True)
142 | })
143 | 
144 | 
145 | @api.route('/json/country_history')
146 | class CountryHistory(Resource):
147 | 
148 |     def get(self):
149 |         load_session()
150 |         return bgpranking.country_history(**session)
151 | 
152 |     @api.doc(body=coutry_history_fields)
153 |     def post(self):
154 |         query: Dict = request.get_json(force=True)
155 |         to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
156 |         to_return['response']['country_history'] = bgpranking.country_history(**query)['response']  # type: ignore
157 |         return to_return
158 | 
159 | 
160 | # TODO: Add other parameters for asns_global_ranking
161 | asns_global_ranking_fields = api.model('ASNsGlobalRankingFields', {
162 |     'date': fields.String(description='The date')
163 | })
164 | 
165 | 
166 | @api.route('/json/asns_global_ranking')
167 | class ASNsGlobalRanking(Resource):
168 | 
169 |     @api.doc(body=asns_global_ranking_fields)
170 |     def post(self):
171 |         query: Dict = request.get_json(force=True)
172 |         to_return: Dict[str, Union[str, Dict[str, Any]]] = {'meta': query, 'response': {}}
173 |         to_return['response'] = bgpranking.asns_global_ranking(**query)['response']
174 |         return to_return
175 | 


--------------------------------------------------------------------------------
/website/web/helpers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | 
 6 | from datetime import date, timedelta
 7 | from functools import lru_cache
 8 | from pathlib import Path
 9 | 
10 | import pycountry
11 | 
12 | from flask import request, session
13 | 
14 | from bgpranking.default import get_homedir
15 | 
16 | 
17 | def src_request_ip(request) -> str:
18 |     # NOTE: X-Real-IP is the IP passed by the reverse proxy in the headers.
19 |     real_ip = request.headers.get('X-Real-IP')
20 |     if not real_ip:
21 |         real_ip = request.remote_addr
22 |     return real_ip
23 | 
24 | 
25 | @lru_cache(64)
26 | def get_secret_key() -> bytes:
27 |     secret_file_path: Path = get_homedir() / 'secret_key'
28 |     if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
29 |         if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
30 |             with secret_file_path.open('wb') as f:
31 |                 f.write(os.urandom(64))
32 |     with secret_file_path.open('rb') as f:
33 |         return f.read()
34 | 
35 | 
36 | def load_session():
37 |     if request.method == 'POST':
38 |         d = request.form
39 |     elif request.method == 'GET':
40 |         d = request.args  # type: ignore
41 | 
42 |     for key in d:
43 |         if '_all' in d.getlist(key):
44 |             session.pop(key, None)
45 |         else:
46 |             values = [v for v in d.getlist(key) if v]
47 |             if values:
48 |                 if len(values) == 1:
49 |                     session[key] = values[0]
50 |                 else:
51 |                     session[key] = values
52 | 
53 |     # Edge cases
54 |     if 'asn' in session:
55 |         session.pop('country', None)
56 |     elif 'country' in session:
57 |         session.pop('asn', None)
58 |     if 'date' not in session:
59 |         session['date'] = (date.today() - timedelta(days=1)).isoformat()
60 | 
61 | 
62 | def get_country_codes():
63 |     for c in pycountry.countries:
64 |         yield c.alpha_2, c.name
65 | 


--------------------------------------------------------------------------------
/website/web/proxied.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from typing import Any, MutableMapping
 4 | 
 5 | 
 6 | class ReverseProxied():
 7 |     def __init__(self, app: Any) -> None:
 8 |         self.app = app
 9 | 
10 |     def __call__(self, environ: MutableMapping[str, Any], start_response: Any) -> Any:
11 |         scheme = environ.get('HTTP_X_FORWARDED_PROTO')
12 |         if not scheme:
13 |             scheme = environ.get('HTTP_X_SCHEME')
14 | 
15 |         if scheme:
16 |             environ['wsgi.url_scheme'] = scheme
17 |         return self.app(environ, start_response)
18 | 


--------------------------------------------------------------------------------
/website/web/static/forkme_right_darkblue_121621.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/D4-project/BGP-Ranking/5f4e051f940b1c79ec5c6b432db1c7b2bdcbc781/website/web/static/forkme_right_darkblue_121621.png


--------------------------------------------------------------------------------
/website/web/static/linegraph.css:
--------------------------------------------------------------------------------
 1 | 
 2 | body { font: 12px Arial;}
 3 | 
 4 | path {
 5 |     stroke: steelblue;
 6 |     stroke-width: 2;
 7 |     fill: none;
 8 | }
 9 | 
10 | .axis path,
11 | .axis line {
12 |     fill: none;
13 |     stroke: grey;
14 |     stroke-width: 1;
15 |     shape-rendering: crispEdges;
16 | }
17 | 


--------------------------------------------------------------------------------
/website/web/static/linegraph.js:
--------------------------------------------------------------------------------
  1 | function linegraph(call_path) {
  2 |     var canvas = document.querySelector("canvas"),
  3 |         context = canvas.getContext("2d");
  4 | 
  5 |     // set the dimensions and margins of the graph
  6 |     var margin = {top: 20, right: 20, bottom: 30, left: 50},
  7 |         width = canvas.width - margin.left - margin.right,
  8 |         height = canvas.height - margin.top - margin.bottom;
  9 | 
 10 |     // parse the date / time
 11 |     var parseTime = d3.timeParse("%Y-%m-%d");
 12 | 
 13 |     // set the ranges
 14 |     var x = d3.scaleTime().range([0, width]);
 15 |     var y = d3.scaleLinear().range([height, 0]);
 16 | 
 17 |     // define the line
 18 |     var line = d3.line()
 19 |         .x(function(d) { return x(parseTime(d[0])); })
 20 |         .y(function(d) { return y(d[1]); })
 21 |         .curve(d3.curveStep)
 22 |         .context(context);
 23 | 
 24 |     context.translate(margin.left, margin.top);
 25 | 
 26 |     // Get the data
 27 |     d3.json(call_path, {credentials: 'same-origin'}).then(function(data) {
 28 |       x.domain(d3.extent(data.response, function(d) { return parseTime(d[0]); }));
 29 |       y.domain(d3.extent(data.response, function(d) { return d[1]; }));
 30 | 
 31 |       xAxis();
 32 |       yAxis();
 33 | 
 34 |       context.beginPath();
 35 |       line(data.response);
 36 |       context.lineWidth = 1.5;
 37 |       context.strokeStyle = "steelblue";
 38 |       context.stroke();
 39 |     });
 40 | 
 41 |     function xAxis() {
 42 |       var tickCount = 10,
 43 |           tickSize = .1,
 44 |           ticks = x.ticks(tickCount),
 45 |           tickFormat = x.tickFormat();
 46 | 
 47 |       context.beginPath();
 48 |       ticks.forEach(function(d) {
 49 |         context.moveTo(x(d), height);
 50 |         context.lineTo(x(d), height + tickSize);
 51 |       });
 52 |       context.strokeStyle = "black";
 53 |       context.stroke();
 54 | 
 55 |       context.textAlign = "center";
 56 |       context.textBaseline = "top";
 57 |       ticks.forEach(function(d) {
 58 |         context.fillText(tickFormat(d), x(d), height + tickSize);
 59 |       });
 60 |     }
 61 | 
 62 |     function yAxis() {
 63 |       var tickCount = 20,
 64 |           tickSize = 1,
 65 |           tickPadding = 1,
 66 |           ticks = y.ticks(tickCount),
 67 |           tickFormat = y.tickFormat(tickCount);
 68 | 
 69 |       context.beginPath();
 70 |       ticks.forEach(function(d) {
 71 |         context.moveTo(0, y(d));
 72 |         context.lineTo(-6, y(d));
 73 |       });
 74 |       context.strokeStyle = "black";
 75 |       context.stroke();
 76 | 
 77 |       context.beginPath();
 78 |       context.moveTo(-tickSize, 0);
 79 |       context.lineTo(0.5, 0);
 80 |       context.lineTo(0.5, height);
 81 |       context.lineTo(-tickSize, height);
 82 |       context.strokeStyle = "black";
 83 |       context.stroke();
 84 | 
 85 |       context.textAlign = "right";
 86 |       context.textBaseline = "middle";
 87 |       ticks.forEach(function(d) {
 88 |         context.fillText(tickFormat(d), -tickSize - tickPadding, y(d));
 89 |       });
 90 | 
 91 |       context.save();
 92 |       context.rotate(-Math.PI / 2);
 93 |       context.textAlign = "right";
 94 |       context.textBaseline = "top";
 95 |       context.font = "bold 10px sans-serif";
 96 |       context.fillText("Rank", -10, 10);
 97 |       context.restore();
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/website/web/static/linegraph_country.css:
--------------------------------------------------------------------------------
 1 | .axis--x path {
 2 |   display: none;
 3 | }
 4 | 
 5 | .line {
 6 |   fill: none;
 7 |   stroke: steelblue;
 8 |   stroke-width: 1.5px;
 9 | }
10 | 


--------------------------------------------------------------------------------
/website/web/static/linegraph_country.js:
--------------------------------------------------------------------------------
 1 | "use strict";
 2 | 
 3 | function linegraph(call_path) {
 4 | 	var svg = d3.select("svg"),
 5 | 		margin = {top: 20, right: 80, bottom: 30, left: 50},
 6 | 		width = svg.attr("width") - margin.left - margin.right,
 7 | 		height = svg.attr("height") - margin.top - margin.bottom,
 8 | 		g = svg.append("g").attr("transform", "translate(" + margin.left + "," + margin.top + ")");
 9 | 
10 | 	var parseTime = d3.timeParse("%Y-%m-%d");
11 | 
12 | 	var x = d3.scaleTime().range([0, width]),
13 | 		y = d3.scaleLinear().range([height, 0]),
14 | 		z = d3.scaleOrdinal(d3.schemeCategory10);
15 | 
16 | 	var line = d3.line()
17 | 		.curve(d3.curveLinear)
18 | 		.x(function(d) { return x(d.date); })
19 | 		.y(function(d) { return y(d.rank); });
20 | 
21 | 	d3.json(call_path, {credentials: 'same-origin'}).then(data => {
22 | 	  var country_ranks = $.map(data.response, function(value, key) {
23 | 		return {
24 | 		  country: key,
25 | 		  values: $.map(value, function(d) {
26 | 			return {date: parseTime(d[0]), rank: d[1]};
27 | 		  })
28 | 		};
29 | 	  });
30 | 
31 |       x.domain(d3.extent(country_ranks[0].values, function(d) { return d.date; }));
32 | 	  y.domain([
33 | 		d3.min(country_ranks, function(c) { return d3.min(c.values, function(d) { return d.rank; }); }),
34 | 		d3.max(country_ranks, function(c) { return d3.max(c.values, function(d) { return d.rank; }); })
35 | 	  ]);
36 | 
37 | 	  z.domain(country_ranks.map(function(c) { return c.country; }));
38 | 
39 | 	  g.append("g")
40 | 		  .attr("class", "axis axis--x")
41 | 		  .attr("transform", "translate(0," + height + ")")
42 | 		  .call(d3.axisBottom(x));
43 | 
44 | 	  g.append("g")
45 | 		  .attr("class", "axis axis--y")
46 | 		  .call(d3.axisLeft(y))
47 | 		.append("text")
48 | 		  .attr("transform", "rotate(-90)")
49 | 		  .attr("y", 6)
50 | 		  .attr("dy", "0.71em")
51 | 		  .attr("fill", "#000")
52 | 		  .text("Rank");
53 | 
54 | 	  var country = g.selectAll(".country")
55 | 		.data(country_ranks)
56 | 		.enter().append("g")
57 | 		  .attr("class", "country");
58 | 
59 | 	  country.append("path")
60 | 		  .attr("class", "line")
61 | 		  .attr("d", function(d) { return line(d.values); })
62 | 		  .style("stroke", function(d) { return z(d.country); });
63 | 
64 | 	  country.append("text")
65 | 		  .datum(function(d) { return {id: d.country, value: d.values[d.values.length - 1]}; })
66 | 		  .attr("transform", function(d) { return "translate(" + x(d.value.date) + "," + y(d.value.rank) + ")"; })
67 | 		  .attr("x", 3)
68 | 		  .attr("dy", "0.35em")
69 | 		  .style("font", "10px sans-serif")
70 | 		  .text(function(d) { return d.id; });
71 | 
72 |       d3.text('/country_history_callback',
73 |                   {credentials: 'same-origin',
74 |                    method: 'POST',
75 |                    body: JSON.stringify(data.response),
76 |                   })
77 |         .then(function(data) {
78 |             d3.select('#asn_details').html(data);
79 |         });
80 |     });
81 | };
82 | 


--------------------------------------------------------------------------------
/website/web/templates/asn.html:
--------------------------------------------------------------------------------
 1 | {% extends "main.html" %}
 2 | 
 3 | {% block head %}
 4 |     {{ super() }}
 5 | {% endblock %}
 6 | 
 7 | 
 8 | {% block title %}
 9 |     Ranking - {{ asn }}
10 | {% endblock %}
11 | 
12 | {% block scripts %}
13 |     {{ super() }}
14 |     <script src='{{ url_for('static', filename='linegraph.js') }}'></script>
15 |     <script>linegraph('/json/asn_history');</script>
16 | {% endblock %}
17 | 
18 | {% block content %}
19 |   <center>
20 |       <h1>Ranking - {{asn}}</h1></br></br>
21 |       <button data-html="true" type="button" class="btn btn-secondary" data-container="body"
22 |          data-toggle="popover" data-placement="bottom"
23 |                                data-content="<center><b>sum</b>(IP * weight of the list) </br><hr><b>sum</b>(IP in the prefix)</center>">
24 |         <h4>How is this ranking computed?</h4>
25 |      </button>
26 |   </center>
27 |   {% include ['top_forms.html'] %}
28 |   <table class="table">
29 |       <tr>
30 |           <th>Timestamp</th>
31 |           <th>ASN Description</th>
32 |       </tr>
33 |       {% for timestamp in asn_descriptions.keys()|sort %}
34 |         <tr>
35 |             <td>{{ timestamp }}</td>
36 |             <td>{{ asn_descriptions[timestamp] }}</td>
37 |         </tr>
38 |       {% endfor %}
39 |   </table>
40 |   <table class="table">
41 |       <tr>
42 |           <th>Prefix</th>
43 |           <th>Rank</th>
44 |       </tr>
45 |       {% for prefix, rank in ranks %}
46 |         <tr>
47 |             <td><a href="{{ url_for('asn_details', asn=asn, prefix=prefix) }}">{{ prefix }}</a></td>
48 |             <td>{{ rank }}</td>
49 |         </tr>
50 |       {% endfor %}
51 |   </table>
52 |   <canvas width="960" height="500"></canvas>
53 |   {% if prefix_ips %}
54 |   <table class="table">
55 |       <tr>
56 |           <th>IP</th>
57 |           <th>Source(s)</th>
58 |       </tr>
59 |       {% for ip, sources in prefix_ips %}
60 |         <tr>
61 |             <td>{{ ip }}</td>
62 |             <td>{{ ', '.join(sources) }}</td>
63 |         </tr>
64 |       {% endfor %}
65 |   </table>
66 |   {% endif %}
67 | {% endblock %}
68 | 


--------------------------------------------------------------------------------
/website/web/templates/country.html:
--------------------------------------------------------------------------------
 1 | {% extends "main.html" %}
 2 | 
 3 | {% block head %}
 4 |     {{ super() }}
 5 |     <link rel="stylesheet" href="{{ url_for('static', filename='linegraph_country.css') }}">
 6 | {% endblock %}
 7 | 
 8 | 
 9 | {% block title %}Ranking - {{ ' '.join(country) }}{% endblock %}
10 | 
11 | {% block scripts %}
12 |     {{ super() }}
13 |     <script src='{{ url_for('static', filename='linegraph_country.js') }}'></script>
14 |     <script>linegraph('/json/country_history');</script>
15 | {% endblock %}
16 | 
17 | {% block content %}
18 |   <center>
19 |       <h1>Ranking - {{ ' '.join(country) }}</h1></br></br>
20 |   </center>
21 |   {% include ['top_forms.html'] %}
22 |   <svg width="1024" height="800"></svg>
23 |   <div id="asn_details"></div>
24 | {% endblock %}
25 | 


--------------------------------------------------------------------------------
/website/web/templates/country_asn_map.html:
--------------------------------------------------------------------------------
 1 | {% for to_display_country in to_display%}
 2 | <table class="table">
 3 |   <tr>
 4 |   {% for date in to_display_country[0] %}
 5 |     <td>{{ date }}</td>
 6 |   {% endfor %}
 7 |   </tr>
 8 |   {% for line in to_display_country[1:] %}
 9 |   <tr>
10 |     <td><a href="{{ url_for('asn_details', asn=line[0]) }}">{{ line[0] }}</a></td>
11 |     {% for rank in line[1:] %}
12 |     <td>{{ rank }}</td>
13 |     {% endfor %}
14 |   </tr>
15 |   {% endfor %}
16 | </table>
17 | {% endfor %}
18 | 


--------------------------------------------------------------------------------
/website/web/templates/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "main.html" %}
 2 | 
 3 | {% block title %}BGP Ranking{% endblock %}
 4 | 
 5 | {% block content %}
 6 |   <a href="https://github.com/D4-project/BGP-Ranking">
 7 |       <img style="position: absolute; top: 0; right: 0; border: 0;" src="{{ url_for('static', filename='forkme_right_darkblue_121621.png') }}" alt="Fork me on GitHub">
 8 |   </a>
 9 |   <center>
10 |     <h1>BGP Ranking</h1></br></br>
11 |     <button data-html="true" type="button" class="btn btn-secondary" data-container="body"
12 |         data-toggle="popover" data-placement="bottom"
13 |                               data-content="<center><b>sum</b>(IP * weight of the list) </br><hr><b>sum</b>(IP announced by the ASN)</center>">
14 |        <h4>How is this ranking computed?</h4>
15 |     </button>
16 |   </center>
17 |   {% include ['top_forms.html'] %}
18 |   <table class="table">
19 |       <tr>
20 |           <th>ASN</th>
21 |           <th>Rank</th>
22 |           <th>Description</th>
23 |       </tr>
24 |       {% for asn, rank, description in ranks %}
25 |         <tr>
26 |             <td><a href="{{ url_for('asn_details', asn=asn) }}">{{ asn }}</a></td>
27 |             <td>{{ rank }}</td>
28 |             <td>{{ description }}</td>
29 |         </tr>
30 |       {% endfor %}
31 |   </table>
32 | {% endblock %}
33 | 


--------------------------------------------------------------------------------
/website/web/templates/ipasn.html:
--------------------------------------------------------------------------------
 1 | {% extends "main.html" %}
 2 | 
 3 | {% block head %}
 4 |     {{ super() }}
 5 | {% endblock %}
 6 | 
 7 | 
 8 | {% block title %} IP-ASN History {% endblock %}
 9 | 
10 | {% block scripts %}
11 |     {{ super() }}
12 | {% endblock %}
13 | 
14 | {% block content %}
15 |   <center>
16 |       <h1>IP-ASN History</h1>
17 |   </center>
18 |   <p>
19 |   <form class="form-group" style="width:250px; display:inline-block;" action="" method=post>
20 |     <label for="ip">IP Address</label>
21 |     <input name="ip" class="form-control my-1 mr-sm-2" value="{{ ip }}"/>
22 |     <button type="submit" class="btn btn-primary my-1">Search</button>
23 |   </form>
24 |   </p>
25 |   {% if ipasn_details %}
26 |   <table class="table">
27 |       <tr>
28 |           <th>First Seen</th>
29 |           <th>Last Seen</th>
30 |           <th>ASN</th>
31 |           <th>ASN Description</th>
32 |           <th>Prefix</th>
33 |       </tr>
34 |       {% for entry in ipasn_details %}
35 |       <tr>
36 |           <td>{{ entry['first_seen'] }}</td>
37 |           <td>{{ entry['last_seen'] }}</td>
38 |           <td><a href="{{ url_for('asn_details', asn=entry['asn']) }}">{{ entry['asn'] }}</td>
39 |           <td>
40 |               {% for asn_description in entry['asn_descriptions'] %}
41 |                 {{ asn_description }} </br>
42 |               {% endfor %}
43 |           </td>
44 |           <td><a href="{{ url_for('asn_details', asn=entry['asn'], prefix=entry['prefix']) }}">{{ entry['prefix'] }}</td>
45 |       </tr>
46 |       {% endfor %}
47 |   </table>
48 |   {% endif %}
49 | {% endblock %}
50 | 


--------------------------------------------------------------------------------
/website/web/templates/main.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 |   <head>
 4 |     {% block head %}
 5 |     <!-- Required meta tags -->
 6 |     <meta charset="utf-8">
 7 |       {% block styles %}
 8 |         {{ bootstrap.load_css() }}
 9 |         <link rel="stylesheet" href="{{ url_for('static', filename='bootstrap-select.min.css') }}">
10 |       {% endblock %}
11 |     {% endblock %}
12 |   </head>
13 |   <body>
14 |     <div class="container">
15 |       {% block content %}{% endblock%}
16 |     </div>
17 |     {% block scripts %}
18 |       {{ bootstrap.load_js() }}
19 |       <script src='{{ url_for('static', filename='jquery.min.js') }}'></script>
20 |       <script src='{{ url_for('static', filename='bootstrap-select.min.js') }}'></script>
21 |       <script src='{{ url_for('static', filename='d3.v7.min.js') }}'></script>
22 |       <script>
23 |           $(document).ready(function(){
24 |               $('[data-toggle="popover"]').popover();
25 |           });
26 |       </script>
27 |     {% endblock %}
28 |   </body>
29 | </html>
30 | 


--------------------------------------------------------------------------------
/website/web/templates/top_forms.html:
--------------------------------------------------------------------------------
 1 | <p>
 2 |     <form class="form-group" style="width:250px; display:inline-block;" action="" method=post>
 3 |         <input name="date" class="form-control my-1 mr-sm-2" type="date" value="{{ date }}">
 4 |         <button type="submit" class="btn btn-primary my-1">Set date</button>
 5 |     </form>
 6 |     <form class="form-group" style="width:270px; display:inline-block;" action="" method=post>
 7 |         <select name="ipversion" class="selectpicker">
 8 |             <option value="v4" {% if ipversion == 'v4' %} selected="selected"{% endif %}>v4</option>
 9 |             <option value="v6" {% if ipversion == 'v6' %} selected="selected"{% endif %}>v6</option>
10 |         </select>
11 |         <button type="submit" class="btn btn-primary my-1">IP version</button>
12 |     </form>
13 |     <form class="form-group" style="width:280px; display:inline-block;" action="" method=post>
14 |       <select name="source" class="selectpicker" data-live-search="true" multiple>
15 |         <option value="_all" data-tokens="all" {% if not source %} selected="selected"{% endif %}>all</option>
16 |         {% for s in sources %}
17 |         <option value="{{ s }}" data-tokens="{{ s }}" {% if s in source %} selected="selected"{% endif %}>{{ s }}</option>
18 |         {% endfor %}
19 |       </select>
20 |       <input type="submit" class="btn btn-primary my-1"value="Set source">
21 |     </form>
22 |     <form class="form-group" style="width:270px; display:inline-block;" action="{{ url_for('asn_details') }}" method=post>
23 |         <input type=number name=asn class="form-control my-1 mr-sm-2">
24 |         <button type="submit" class="btn btn-primary my-1">Search ASN</button>
25 |     </form>
26 |     <form class="form-group"style="width:300px; display:inline-block;" action="{{ url_for('country') }}" method=post>
27 |       <select name="country" class="selectpicker" data-live-search="true" multiple>
28 |         <option value="_all" data-tokens="all" {% if not country %} selected="selected"{% endif %}>all</option>
29 |         {% for cc, name in countries %}
30 |         <option value="{{ cc }}" data-tokens="{{ cc }} {{ name }}" {% if cc in country %} selected="selected"{% endif %}>{{ name }}</option>
31 |         {% endfor %}
32 |     </select>
33 |     <input type="submit" class="btn btn-primary my-1"value="Set country">
34 |     </form>
35 |     <form class="form-group" style="width:270px; display:inline-block;" action="{{ url_for('ipasn') }}" method=post>
36 |         <input type=string name=ip class="form-control my-1 mr-sm-2">
37 |         <button type="submit" class="btn btn-primary my-1">Search an IP</button>
38 |     </form>
39 | <p>
40 | <br/>
41 | 


--------------------------------------------------------------------------------