├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── pip-audit.yml
    │   ├── pythonpublish.yml
    │   └── tests.yml
├── LICENSE
├── README.md
├── fluxture
    ├── __init__.py
    ├── __main__.py
    ├── async_utils.py
    ├── bitcoin.py
    ├── blockchain.py
    ├── crawl_schema.py
    ├── crawler.py
    ├── db.py
    ├── fluxture.py
    ├── geolocation.py
    ├── kml.py
    ├── messaging.py
    ├── serialization.py
    ├── shodan.py
    ├── statistics.py
    ├── structures.py
    └── topology.py
├── setup.py
└── test
    ├── __init__.py
    ├── test_async_utils.py
    ├── test_bitcoin.py
    ├── test_db.py
    ├── test_statistics.py
    └── test_types.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | updates:
4 |   - package-ecosystem: github-actions
5 |     directory: /
6 |     schedule:
7 |       interval: daily
8 | 


--------------------------------------------------------------------------------
/.github/workflows/pip-audit.yml:
--------------------------------------------------------------------------------
 1 | name: Scan dependencies for vulnerabilities with pip-audit
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 |   schedule:
 9 |     - cron: "0 12 * * *"
10 | 
11 | jobs:
12 |   pip-audit:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |       - name: Checkout repository
17 |         uses: actions/checkout@v3
18 | 
19 |       - name: Install Python
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: "3.x"
23 | 
24 |       - name: Install project
25 |         run: |
26 |           python -m venv --upgrade-deps /tmp/pip-audit-env
27 |           source /tmp/pip-audit-env/bin/activate
28 | 
29 |           python -m pip install --upgrade wheel
30 |           python -m pip install .[dev]
31 | 
32 | 
33 |       - name: Run pip-audit
34 |         uses: pypa/gh-action-pip-audit@v1.0.8
35 |         with:
36 |           virtual-environment: /tmp/pip-audit-env
37 | 
38 | 


--------------------------------------------------------------------------------
/.github/workflows/pythonpublish.yml:
--------------------------------------------------------------------------------
 1 | # This workflows will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [published]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v3
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v4
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Tests
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: ["3.7", "3.8", "3.9", "3.10"]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v4
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install setuptools wheel
30 |         pip install .[dev]
31 |     - name: Lint with flake8
32 |       run: |
33 |         pip install flake8
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 fluxture test --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 fluxture test --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Test with pytest
39 |       run: |
40 |         pip install pytest
41 |         pytest
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright 2019 Trail of Bits
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fluxture
 2 | 
 3 | [![PyPI version](https://badge.fury.io/py/fluxture.svg)](https://badge.fury.io/py/fluxture)
 4 | [![Tests](https://github.com/trailofbits/fluxture/workflows/Tests/badge.svg)](https://github.com/trailofbits/fluxture/actions)
 5 | [![Slack Status](https://slack.empirehacking.nyc/badge.svg)](https://slack.empirehacking.nyc)
 6 | 
 7 | Fluxture is a lightweight crawler for peer-to-peer networks like Blockchains. It currently supports the latest version
 8 | of the Bitcoin protocol: 70015. It implements the minimum amount of the Bitcoin protocol necessary to collect geographic
 9 | and topological information.
10 | 
11 | ## Quickstart
12 | 
13 | ```commandline
14 | pip3 install fluxture
15 | ```
16 | 
17 | Or, to install from source (_e.g._, for development):
18 | 
19 | ```commandline
20 | $ git clone https://github.com/trailofbits/fluxture
21 | $ cd fluxture
22 | $ pip3 install -e '.[dev]'
23 | ```
24 | 
25 | ## Usage
26 | 
27 | To crawl the Bitcoin network, run:
28 | 
29 | ```commandline
30 | fluxture crawl bitcoin --database crawl.db
31 | ```
32 | 
33 | The crawl database is a SQLite database that can be reused between crawls.
34 | 
35 | ## Geolocation
36 | 
37 | Fluxture uses the MaxMind GeoLite2 City database for geolocating nodes based upon their IP address. Various Fluxture
38 | commands will either require a path to the database, or a MaxMind license key (which will be used to automatically
39 | download the database). You can sign up for a free MaxMind license key,
40 | [here](https://www.maxmind.com/en/geolite2/signup).
41 | 
42 | A KML file (which can be imported to Google Maps or Google Earth) can be generated from a crawl using:
43 | 
44 | ```commandline
45 | fluxture kml --group-by ip crawl.db output.kml
46 | ```
47 | 
48 | The geolocation database can be updated from MaxMind by running:
49 | 
50 | ```commandline
51 | fluxture update-geo-db
52 | ```
53 | 
54 | An existing crawl database can be re-analyzed for missing or updated geolocations (_e.g._, from an updated MaxMind database) by running:
55 | 
56 | ```commandline
57 | fluxture geolocate crawl.db
58 | ```
59 | 
60 | ## Topological Analysis
61 | 
62 | Fluxture can calculate topological statistics about the centrality of a crawled network by running:
63 | 
64 | ```commandline
65 | fluxture topology crawl.db
66 | ```
67 | 
68 | ## Programmatic Interface
69 | 
70 | ```python
71 | from fluxture.crawl_schema import CrawlDatabase
72 | 
73 | with CrawlDatabase("crawl.db") as db:
74 |     for node in db.nodes:
75 |         print(f"Node {node.ip}:{node.port} {node.state!s}")
76 |         location = node.get_location()
77 |         if location is not None:
78 |             print(f"\tLocation:\t{location.continent_code}\t{location.country_code}\t{location.city}")
79 |         else:
80 |             print("\tLocation:\t?")
81 |         version = node.get_version()
82 |         if version is not None:
83 |             print(f"\tVersion:\t{version.version!s}")
84 |         else:
85 |             print("\tVersion:\t?")
86 |         print(f"\tOut-Edges:\t{', '.join(str(neighbor.ip) for neighbor in node.get_latest_edges())}")
87 | ```
88 | 
89 | ## License and Acknowledgements
90 | 
91 | This research was developed by [Trail of Bits](https://www.trailofbits.com/) based upon work supported by DARPA under
92 | Contract No. HR001120C0084.  Any opinions, findings and conclusions or recommendations expressed in this material are
93 | those of the authors and do not necessarily reflect the views of the United States Government or DARPA.
94 | It is licensed under the [Apache 2.0 license](LICENSE). © 2020–2021, Trail of Bits.
95 | 


--------------------------------------------------------------------------------
/fluxture/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib import import_module
 2 | from inspect import isclass
 3 | from pathlib import Path
 4 | from pkgutil import iter_modules
 5 | 
 6 | # Automatically load all modules in the `fluxture` package,
 7 | # so all Fluxture plugins will auto-register themselves:
 8 | package_dir = Path(__file__).resolve().parent
 9 | for (_, module_name, _) in iter_modules([str(package_dir)]):
10 |     # import the module and iterate through its attributes
11 |     module = import_module(f"{__name__}.{module_name}")
12 |     for attribute_name in dir(module):
13 |         attribute = getattr(module, attribute_name)
14 | 
15 |         if isclass(attribute):
16 |             # Add the class to this package's variables
17 |             globals()[attribute_name] = attribute
18 | 


--------------------------------------------------------------------------------
/fluxture/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | from typing import Union
 4 | 
 5 | from .fluxture import add_command_subparsers
 6 | 
 7 | 
 8 | def get_root_logger() -> logging.Logger:
 9 |     l = logging.getLogger(__name__)
10 |     while l.parent:
11 |         l = l.parent
12 |     return l
13 | 
14 | 
15 | def setLevel(level: Union[int, str]):
16 |     get_root_logger().setLevel(level)
17 | 
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser(
21 |         description="Fluxture: a peer-to-peer network crawler"
22 |     )
23 |     parser.add_argument(
24 |         "--debug", action="store_true", help="set the log level to debug"
25 |     )
26 | 
27 |     add_command_subparsers(parser)
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     if args.debug:
32 |         setLevel(logging.DEBUG)
33 |     else:
34 |         setLevel(logging.INFO)
35 | 
36 |     return args.func(args)
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/fluxture/async_utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from collections import deque
 3 | from functools import partial, wraps
 4 | from threading import Condition, Lock, Thread
 5 | from typing import (Any, AsyncIterator, Callable, Coroutine, Deque, Dict,
 6 |                     Generic, Iterable, Iterator, Optional, Tuple, TypeVar)
 7 | 
 8 | T = TypeVar("T")
 9 | 
10 | 
11 | class SyncIteratorWrapper(Generic[T]):
12 |     def __init__(
13 |         self,
14 |         to_wrap: Callable[..., Iterator[T]],
15 |         args: Iterable[Any] = (),
16 |         kwargs: Dict[str, Any] = {},
17 |         poll_interval: float = 0.5,
18 |     ):
19 |         self.wrapped: Callable[..., Iterator[T]] = to_wrap
20 |         self.args: Tuple[Any, ...] = tuple(args)
21 |         self.kwargs: Dict[str, Any] = kwargs
22 |         self.thread: Optional[Thread] = None
23 |         self.condition: Optional[Condition] = None
24 |         self.result_queue: Deque[T] = deque()
25 |         self.poll_interval: float = poll_interval
26 | 
27 |     def __getattr__(self, item):
28 |         return getattr(self.wrapped, item)
29 | 
30 |     def __iter__(self):
31 |         return self.wrapped(*self.args, **self.kwargs)
32 | 
33 |     def _run(self):
34 |         for result in self.wrapped(*self.args, **self.kwargs):
35 |             with self.condition:
36 |                 self.result_queue.append(result)
37 | 
38 |     def __aiter__(self):
39 |         if self.thread is None:
40 |             self.condition: Condition = Condition(Lock())
41 |             self.thread = Thread(target=self._run)
42 |             self.thread.start()
43 |         return self
44 | 
45 |     async def __anext__(self):
46 |         while True:
47 |             with self.condition:
48 |                 if self.result_queue:
49 |                     return self.result_queue.popleft()
50 |                 elif self.thread is None or not self.thread.is_alive():
51 |                     # The thread finished and there are no more results
52 |                     self.thread = None
53 |                     raise StopAsyncIteration()
54 |             await asyncio.sleep(self.poll_interval)
55 | 
56 | 
57 | def iterator_to_async(
58 |     to_wrap: Optional[Callable[..., Iterator[T]]] = None, *, poll_interval: float = 0.5
59 | ):
60 |     """Decorator to automatically convert a synchronous function that returns an iterator to be asynchronous"""
61 |     if to_wrap is None:
62 |         # this will happen if the user optionally passes a `poll_interval` argument
63 |         return partial(iterator_to_async, poll_interval=poll_interval)
64 | 
65 |     @wraps(to_wrap)
66 |     def wrapper(*args, **kwargs):
67 |         return SyncIteratorWrapper(to_wrap, args, kwargs, poll_interval=poll_interval)
68 | 
69 |     return wrapper
70 | 
71 | 
72 | def sync_to_async(
73 |     to_wrap: Optional[Callable[..., T]] = None, *, poll_interval: float = 0.5
74 | ) -> Callable[..., Coroutine[Any, Any, T]]:
75 |     """Decorator to automatically convert a synchronous function to be asynchronous"""
76 |     if to_wrap is None:
77 |         # this will happen if the user optionally passes a `poll_interval` argument
78 |         return partial(sync_to_async, poll_interval=poll_interval)
79 | 
80 |     class WrapperThread(Thread):
81 |         def __init__(self, *args, **kwargs):
82 |             super().__init__()
83 |             self.args = args
84 |             self.kwargs = kwargs
85 |             self.result: Optional[T] = None
86 | 
87 |         def run(self):
88 |             self.result = to_wrap(*self.args, **self.kwargs)
89 | 
90 |     @wraps(to_wrap)
91 |     async def wrapper(*args, **kwargs) -> T:
92 |         thread = WrapperThread(*args, **kwargs)
93 |         thread.start()
94 |         while thread.is_alive():
95 |             await asyncio.sleep(poll_interval)
96 |         return thread.result
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/fluxture/bitcoin.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import socket
  3 | import sys
  4 | from abc import ABC
  5 | from hashlib import sha256
  6 | from ipaddress import IPv4Address, IPv6Address
  7 | from logging import getLogger
  8 | from time import time as current_time
  9 | from typing import (AsyncIterator, Dict, FrozenSet, Generic, KeysView, List,
 10 |                     Optional, Set, Tuple, Type, TypeVar, Union)
 11 | 
 12 | import fluxture.structures
 13 | 
 14 | from . import serialization
 15 | from .blockchain import (Blockchain, BlockchainError, Miner, Node, Version,
 16 |                          get_public_ip)
 17 | from .messaging import BinaryMessage
 18 | from .serialization import ByteOrder, P, UnpackError
 19 | from .shodan import SearchQuery, ShodanResult, get_api
 20 | 
 21 | log = getLogger(__file__)
 22 | 
 23 | BITCOIN_MAINNET_MAGIC = b"\xf9\xbe\xb4\xd9"
 24 | 
 25 | 
 26 | NODE_QUERY = SearchQuery.register(
 27 |     name="BitcoinNode", query='port:8333 product:"/Satoshi:*/"'
 28 | )
 29 | MINER_QUERY = SearchQuery.register(name="BitcoinMiner", query="antminer")
 30 | 
 31 | 
 32 | B = TypeVar("B", bound="BitcoinMessage")
 33 | 
 34 | 
 35 | class BitcoinError(BlockchainError):
 36 |     pass
 37 | 
 38 | 
 39 | class BitcoinMessageHeader(BinaryMessage):
 40 |     non_serialized = "byte_order"
 41 |     byte_order = ByteOrder.LITTLE
 42 | 
 43 |     magic: serialization.SizedByteArray[4]
 44 |     command: serialization.SizedByteArray[12]
 45 |     length: serialization.UInt32
 46 |     checksum: serialization.SizedByteArray[4]
 47 | 
 48 |     @property
 49 |     def decoded_command(self) -> str:
 50 |         decoded = self.command.decode("utf-8")
 51 |         first_null_byte = decoded.find("\0")
 52 |         if any(c != "\0" for c in decoded[first_null_byte:]):
 53 |             raise ValueError(
 54 |                 f"Command name {self.command!r} includes bytes after the null terminator!"
 55 |             )
 56 |         return decoded[:first_null_byte]
 57 | 
 58 |     @classmethod
 59 |     async def next_message(
 60 |         cls, reader: asyncio.StreamReader
 61 |     ) -> Optional["BitcoinMessageHeader"]:
 62 |         data = await reader.read(4 + 12 + serialization.UInt32.BYTES + 4)
 63 |         if not data:
 64 |             return None
 65 |         return cls.deserialize(data)
 66 | 
 67 |     def __repr__(self):
 68 |         return (
 69 |             f"{self.__class__.__name__}(magic={self.magic!r}, command={self.decoded_command!r}, "
 70 |             f"length={self.length!r}, checksum={self.checksum!r})"
 71 |         )
 72 | 
 73 | 
 74 | MESSAGES_BY_COMMAND: Dict[str, Type["BitcoinMessage"]] = {}
 75 | 
 76 | 
 77 | def bitcoin_checksum(payload: bytes) -> bytes:
 78 |     return sha256(sha256(payload).digest()).digest()[:4]
 79 | 
 80 | 
 81 | class BitcoinMessage(BinaryMessage, ABC):
 82 |     non_serialized = "byte_order", "command"
 83 |     byte_order = ByteOrder.LITTLE
 84 |     command: Optional[str] = None
 85 | 
 86 |     def __init_subclass__(cls, **kwargs):
 87 |         if cls.command is None:
 88 |             raise TypeError(
 89 |                 f"{cls.__name__} extends BitcoinMessage but does not speficy a command string!"
 90 |             )
 91 |         elif cls.command in MESSAGES_BY_COMMAND:
 92 |             raise TypeError(
 93 |                 f"The command {cls.command} is already registered to message class "
 94 |                 f"{MESSAGES_BY_COMMAND[cls.command]}"
 95 |             )
 96 |         MESSAGES_BY_COMMAND[cls.command] = cls
 97 | 
 98 |     def serialize(self) -> bytes:
 99 |         payload = super().serialize()
100 |         return (
101 |             BitcoinMessageHeader(
102 |                 magic=BITCOIN_MAINNET_MAGIC,
103 |                 command=self.command.encode("utf-8"),
104 |                 length=len(payload),
105 |                 checksum=bitcoin_checksum(payload),
106 |             ).serialize()
107 |             + payload
108 |         )
109 | 
110 |     @classmethod
111 |     def deserialize_partial(
112 |         cls, data: bytes, header: Optional[BitcoinMessageHeader] = None
113 |     ) -> Tuple["BitcoinMessage", bytes]:
114 |         if header is None:
115 |             header, payload = BitcoinMessageHeader.unpack_partial(
116 |                 data, byte_order=BitcoinMessageHeader.byte_order
117 |             )
118 |         else:
119 |             payload = data
120 |         if header.magic != BITCOIN_MAINNET_MAGIC:
121 |             raise ValueError(
122 |                 f"Message header magic was {header.magic}, but expected {BITCOIN_MAINNET_MAGIC!r} "
123 |                 "for Bitcoin mainnet!"
124 |             )
125 |         elif header.length < len(payload):
126 |             raise ValueError(
127 |                 f"Invalid payload length of {len(payload)}; expected at least {header.length} bytes"
128 |             )
129 |         elif header.decoded_command not in MESSAGES_BY_COMMAND:
130 |             raise NotImplementedError(
131 |                 f'TODO: Implement Bitcoin command "{header.command}"'
132 |             )
133 |         payload, remainder = payload[: header.length], payload[header.length :]
134 |         decoded_command = header.decoded_command
135 |         expected_checksum = bitcoin_checksum(payload)
136 |         if header.checksum != expected_checksum:
137 |             raise ValueError(
138 |                 f"Invalid message checksum; got {header.checksum!r} but expected {expected_checksum!r}"
139 |             )
140 |         return (
141 |             MESSAGES_BY_COMMAND[decoded_command].unpack(
142 |                 payload, MESSAGES_BY_COMMAND[decoded_command].byte_order
143 |             ),
144 |             remainder,
145 |         )
146 | 
147 |     @classmethod
148 |     def deserialize(cls, data: bytes) -> "BitcoinMessage":
149 |         message, remainder = cls.deserialize_partial(data)
150 |         if remainder:
151 |             raise ValueError(f"Unexpected bytes trailing message: {remainder!r}")
152 |         return message
153 | 
154 |     @classmethod
155 |     async def next_message(
156 |         cls, reader: asyncio.StreamReader
157 |     ) -> Optional["BitcoinMessage"]:
158 |         header = await BitcoinMessageHeader.next_message(reader)
159 |         if header is None:
160 |             return None
161 |         try:
162 |             payload = await reader.readexactly(header.length)
163 |         except asyncio.IncompleteReadError:
164 |             raise ValueError(
165 |                 f"Expected {header.length} bytes when reading the message with header {header!r}"
166 |             )
167 |         msg, remainder = cls.deserialize_partial(payload, header=header)
168 |         assert len(remainder) == 0
169 |         return msg
170 | 
171 | 
172 | class VarInt(int, serialization.AbstractPackable):
173 |     def __new__(cls, value: int):
174 |         return int.__new__(cls, value)
175 | 
176 |     def pack(
177 |         self, byte_order: serialization.ByteOrder = serialization.ByteOrder.LITTLE
178 |     ) -> bytes:
179 |         value = int(self)
180 |         if value < 0xFD:
181 |             return bytes([value])
182 |         elif value <= 0xFFFF:
183 |             return b"\xFD" + serialization.UInt16(value).pack(byte_order)
184 |         elif value <= 0xFFFFFFFF:
185 |             return b"\xFE" + serialization.UInt32(value).pack(byte_order)
186 |         elif value <= serialization.UInt64.MAX_VALUE:
187 |             return b"\xFF" + serialization.UInt64(value).pack(byte_order)
188 |         else:
189 |             raise ValueError(
190 |                 f"Value {value} must be less than {serialization.UInt64.MAX_VALUE}"
191 |             )
192 | 
193 |     @classmethod
194 |     def unpack_partial(
195 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.LITTLE
196 |     ) -> Tuple[P, bytes]:
197 |         if data[0] < 0xFD:
198 |             return cls(data[0]), data[1:]
199 |         elif data[0] == 0xFD:
200 |             return (
201 |                 cls(serialization.UInt16.unpack(data[1:3], byte_order=byte_order)),
202 |                 data[3:],
203 |             )
204 |         elif data[0] == 0xFE:
205 |             return (
206 |                 cls(serialization.UInt32.unpack(data[1:5], byte_order=byte_order)),
207 |                 data[5:],
208 |             )
209 |         elif data[0] == 0xFF:
210 |             return (
211 |                 cls(serialization.UInt64.unpack(data[1:9], byte_order=byte_order)),
212 |                 data[9:],
213 |             )
214 |         else:
215 |             raise UnpackError(f"Unexpected data: {data!r}")
216 | 
217 |     @classmethod
218 |     async def read(
219 |         cls: Type[P],
220 |         reader: asyncio.StreamReader,
221 |         byte_order: ByteOrder = ByteOrder.NETWORK,
222 |     ) -> P:
223 |         first_byte = await reader.read(1)
224 |         if len(first_byte) < 1:
225 |             raise BitcoinError()
226 |         elif first_byte[0] < 0xFD:
227 |             return cls(first_byte[0])
228 |         elif first_byte[0] == 0xFD:
229 |             data_type = serialization.UInt16
230 |         elif first_byte[0] == 0xFE:
231 |             data_type = serialization.UInt32
232 |         elif first_byte[0] == 0xFF:
233 |             data_type = serialization.UInt64
234 |         else:
235 |             raise BitcoinError()
236 |         return cls(await data_type.read(reader, byte_order=byte_order))
237 | 
238 | 
239 | class VarStr(bytes, serialization.AbstractPackable):
240 |     def __new__(cls, value: bytes):
241 |         return bytes.__new__(cls, value)
242 | 
243 |     def pack(
244 |         self, byte_order: serialization.ByteOrder = serialization.ByteOrder.LITTLE
245 |     ) -> bytes:
246 |         return VarInt(len(self)).pack(byte_order) + self
247 | 
248 |     @classmethod
249 |     def unpack_partial(
250 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.LITTLE
251 |     ) -> Tuple[P, bytes]:
252 |         length, remainder = VarInt.unpack_partial(data, byte_order=byte_order)
253 |         if len(remainder) < length:
254 |             raise UnpackError(
255 |                 f"Expected a byte sequence of length {length} but instead got {remainder!r}"
256 |             )
257 |         return remainder[:length], remainder[length:]
258 | 
259 |     @classmethod
260 |     async def read(
261 |         cls: Type[P],
262 |         reader: asyncio.StreamReader,
263 |         byte_order: ByteOrder = ByteOrder.NETWORK,
264 |     ) -> P:
265 |         length = await VarInt.read(reader, byte_order=byte_order)
266 |         string = await reader.read(length)
267 |         if len(string) < length:
268 |             raise UnpackError(
269 |                 f"Expected a byte sequence of length {length} but instead got {string!r}"
270 |             )
271 |         return string
272 | 
273 | 
274 | class NetAddr(fluxture.structures.PackableStruct):
275 |     services: serialization.UInt64
276 |     ip: serialization.BigEndian[serialization.IPv6Address]
277 |     port: serialization.BigEndian[serialization.UInt16]
278 | 
279 |     def __init__(
280 |         self,
281 |         services: int = 0,
282 |         ip: Optional[Union[serialization.IPv6Address, str, bytes]] = None,
283 |         port: int = 8333,
284 |     ):
285 |         if ip is None:
286 |             ip = get_public_ip()
287 |         if not isinstance(ip, serialization.IPv6Address):
288 |             # IP is big-endian in Bitcoin
289 |             ip = serialization.IPv6Address(ip)
290 |         super().__init__(services=services, ip=ip, port=port)
291 | 
292 | 
293 | class NetIP(fluxture.structures.PackableStruct):
294 |     time: serialization.UInt32
295 |     addr: NetAddr
296 | 
297 |     def __init__(self, time: Optional[int] = None, addr: Optional[NetAddr] = None):
298 |         if time is None:
299 |             time = int(current_time())
300 |         if addr is None:
301 |             addr = NetAddr()
302 |         super().__init__(time=time, addr=addr)
303 | 
304 | 
305 | class VerackMessage(BitcoinMessage):
306 |     command = "verack"
307 | 
308 | 
309 | class SendHeaders(BitcoinMessage):
310 |     command = "sendheaders"
311 | 
312 | 
313 | class SendCmpct(BitcoinMessage):
314 |     command = "sendcmpct"
315 | 
316 |     announce: serialization.Bool
317 |     version: serialization.UInt64
318 | 
319 | 
320 | class Ping(BitcoinMessage):
321 |     command = "ping"
322 | 
323 |     nonce: serialization.UInt64
324 | 
325 | 
326 | class Pong(BitcoinMessage):
327 |     command = "pong"
328 | 
329 |     nonce: serialization.UInt64
330 | 
331 | 
332 | class VersionMessage(BitcoinMessage):
333 |     command = "version"
334 | 
335 |     version: serialization.Int32
336 |     services: serialization.UInt64
337 |     timestamp: serialization.Int64
338 |     addr_recv: NetAddr
339 |     addr_from: NetAddr
340 |     nonce: serialization.UInt64
341 |     user_agent: VarStr
342 |     start_height: serialization.Int32
343 |     relay: serialization.Bool
344 | 
345 |     def __str__(self):
346 |         try:
347 |             s = self.user_agent.decode("utf-8")
348 |         except UnicodeDecodeError:
349 |             s = repr(self.user_agent)
350 |         return f"{int(self.version)} {s}"
351 | 
352 | 
353 | class FeeFilter(BitcoinMessage):
354 |     command = "feefilter"
355 | 
356 |     feerate: serialization.UInt64
357 | 
358 | 
359 | class GetAddrMessage(BitcoinMessage):
360 |     command = "getaddr"
361 | 
362 | 
363 | class AbstractList(list, Generic[P], List[P], serialization.AbstractPackable, ABC):
364 |     ELEMENT_TYPE: Type[P]
365 | 
366 |     def __new__(cls, *args, **kwargs):
367 |         return list.__new__(cls, *args, **kwargs)
368 | 
369 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
370 |         return VarInt(len(self)).pack(byte_order) + b"".join(
371 |             element.pack(byte_order) for element in self
372 |         )
373 | 
374 |     @classmethod
375 |     def unpack_partial(
376 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
377 |     ) -> Tuple[P, bytes]:
378 |         length, remainder = VarInt.unpack_partial(data, byte_order)
379 |         num_bytes = length * cls.ELEMENT_TYPE.num_bytes
380 |         if num_bytes > len(remainder):
381 |             raise UnpackError(f"Expected {num_bytes} bytes, but got {remainder!r}")
382 |         iters = [iter(remainder[:num_bytes])] * cls.ELEMENT_TYPE.num_bytes
383 |         return (
384 |             cls(
385 |                 cls.ELEMENT_TYPE.unpack(bytes(data), byte_order=byte_order)
386 |                 for data in zip(*iters)
387 |             ),
388 |             remainder[num_bytes:],
389 |         )
390 | 
391 |     @classmethod
392 |     async def read(
393 |         cls: Type[P],
394 |         reader: asyncio.StreamReader,
395 |         byte_order: ByteOrder = ByteOrder.NETWORK,
396 |     ) -> P:
397 |         length = await VarInt.read(reader, byte_order)
398 |         return cls(
399 |             cls.ELEMENT_TYPE.read(reader, byte_order=byte_order) for _ in range(length)
400 |         )
401 | 
402 | 
403 | class AddressList(AbstractList[NetIP]):
404 |     ELEMENT_TYPE = NetIP
405 | 
406 | 
407 | class Identifier(serialization.UInt32):
408 |     MSG_TX = serialization.UInt32(1)
409 |     MSG_BLOCK = serialization.UInt32(2)
410 |     MSG_FILTERED_BLOCK = serialization.UInt32(3)
411 |     MSG_CMPCT_BLOCK = serialization.UInt32(4)
412 |     MSG_WITNESS_TX = serialization.UInt32((1 << 30) | 1)
413 |     MSG_WITNESS_BLOCK = serialization.UInt32((1 << 30) | 2)
414 |     MSG_FILTERED_WITNESS_BLOCK = serialization.UInt32((1 << 30) | 3)
415 | 
416 | 
417 | class Inventory(fluxture.structures.PackableStruct):
418 |     identifier: Identifier
419 |     hash: serialization.SizedByteArray[32]
420 | 
421 | 
422 | class Inventories(AbstractList[Inventory]):
423 |     ELEMENT_TYPE = Inventory
424 | 
425 | 
426 | class InvMessage(BitcoinMessage):
427 |     command = "inv"
428 | 
429 |     inventories: Inventories
430 | 
431 | 
432 | class AddrMessage(BitcoinMessage):
433 |     command = "addr"
434 | 
435 |     addresses: AddressList
436 | 
437 | 
438 | class BitcoinNode(Node):
439 |     def __init__(
440 |         self,
441 |         address: Union[str, IPv4Address, IPv6Address],
442 |         port: int = 8333,
443 |         source: str = "peer",
444 |     ):
445 |         super().__init__(address, port, source)
446 |         self.connected: bool = False
447 |         self.connecting: bool = False
448 |         self.version: Optional[VersionMessage] = None
449 | 
450 |     async def receive_message(self) -> Optional["BitcoinMessage"]:
451 |         return await BitcoinMessage.next_message(await self.reader)
452 | 
453 |     async def connect(self):
454 |         if self.connected or self.connecting:
455 |             return
456 |         await super().connect()
457 |         self.connecting = True
458 |         t = int(current_time())
459 |         await self.send_message(
460 |             VersionMessage(
461 |                 version=70015,
462 |                 services=0,
463 |                 timestamp=t,
464 |                 addr_recv=NetAddr(ip=self.address, port=self.port),
465 |                 addr_from=NetAddr(ip="::ffff:127.0.0.1", port=8333),
466 |                 nonce=0,
467 |                 user_agent=b"fluxture",
468 |                 start_height=0,
469 |                 relay=True,
470 |             )
471 |         )
472 |         async for reply in self.run():
473 |             if isinstance(reply, VerackMessage):
474 |                 self.connected = True
475 |                 break
476 |         if not self.connected:
477 |             raise BitcoinError(
478 |                 f"Did not receive a Verack message from client {self.address}:{self.port}"
479 |             )
480 |         self.connecting = False
481 | 
482 |     async def get_neighbors(self) -> AddrMessage:
483 |         async with self:
484 |             await self.send_message(GetAddrMessage())
485 |             async for msg in self.run():
486 |                 if isinstance(msg, AddrMessage):
487 |                     return msg
488 |         raise BitcoinError(
489 |             f"Node {self.address}:{self.port} closed the connection before replying to our "
490 |             "GetAddr message"
491 |         )
492 | 
493 |     async def get_version(self) -> VersionMessage:
494 |         if self.version is not None:
495 |             return self.version
496 |         async with self:
497 |             async for _ in self.run():
498 |                 if self.version is not None:
499 |                     return self.version
500 |         raise BitcoinError(
501 |             f"Node {self.address}:{self.port} closed the connection before sending us a VersionMessage"
502 |         )
503 | 
504 |     async def run(self) -> AsyncIterator["BitcoinMessage"]:
505 |         async with self:
506 |             await self.connect()
507 |             while True:
508 |                 done, pending = await asyncio.wait(
509 |                     [self.join(), self.receive_message()],
510 |                     return_when=asyncio.FIRST_COMPLETED,
511 |                 )
512 |                 gather = asyncio.gather(*pending)
513 |                 gather.cancel()
514 |                 try:
515 |                     await gather
516 |                 except asyncio.CancelledError:
517 |                     pass
518 |                 got_message = False
519 |                 for result in done:
520 |                     try:
521 |                         message = result.result()
522 |                     except (NotImplementedError, ValueError, UnpackError) as e:
523 |                         sys.stderr.write(f"Warning: {e!s}")
524 |                         continue
525 |                     if not isinstance(message, BitcoinMessage):
526 |                         continue
527 |                     got_message = True
528 |                     if self.is_running:
529 |                         # print(f"{self.address}:{self.port} {message}")
530 |                         if isinstance(message, VersionMessage):
531 |                             self.version = message
532 |                             await self.send_message(VerackMessage())
533 |                         elif isinstance(message, Ping):
534 |                             await self.send_message(Pong(nonce=message.nonce))
535 |                     yield message
536 |                 if not got_message:
537 |                     break
538 | 
539 | 
540 | async def collect_addresses(url: str, port: int = 8333) -> Tuple[BitcoinNode, ...]:
541 |     return tuple(
542 |         BitcoinNode(addr[4][0], source="seed")
543 |         for addr in await asyncio.get_running_loop().getaddrinfo(
544 |             url, port, proto=socket.IPPROTO_TCP
545 |         )
546 |     )
547 | 
548 | 
549 | async def collect_defaults(
550 |     *args: Union[Tuple[str], Tuple[str, int]], use_shodan: bool = True
551 | ) -> AsyncIterator[BitcoinNode]:
552 |     yielded: Set[IPv6Address] = set()
553 |     futures = [asyncio.ensure_future(collect_addresses(*arg)) for arg in args]
554 |     if use_shodan:
555 |         shodan_iterator: Optional[AsyncIterator[ShodanResult]] = NODE_QUERY.run_async(
556 |             get_api()
557 |         ).__aiter__()
558 |         futures.append(asyncio.ensure_future(shodan_iterator.__anext__()))
559 |     else:
560 |         shodan_iterator = None
561 |     shodan_results = 0
562 |     bitcoin_seeds = 0
563 |     while futures:
564 |         done, pending = await asyncio.wait(futures, return_when=asyncio.FIRST_COMPLETED)
565 |         futures = list(pending)
566 |         for result in await asyncio.gather(*done, return_exceptions=True):
567 |             if isinstance(result, StopAsyncIteration):
568 |                 shodan_iterator = None
569 |                 continue
570 |             elif isinstance(result, ShodanResult) and shodan_iterator is not None:
571 |                 if result.ip not in yielded:
572 |                     yield BitcoinNode(result.ip, source="shodan")
573 |                     yielded.add(result.ip)
574 |                 shodan_results += 1
575 |                 futures.append(asyncio.ensure_future(shodan_iterator.__anext__()))
576 |             elif isinstance(result, Exception):
577 |                 sys.stderr.write(f"{result!s}\n")
578 |                 continue
579 |             else:
580 |                 # this should be an iterable of BitcoinNodes
581 |                 for node in result:  # type: ignore
582 |                     assert isinstance(node, BitcoinNode)
583 |                     bitcoin_seeds += 1
584 |                     if node.address not in yielded:
585 |                         yield node
586 |                         yielded.add(node.address)
587 |             sys.stderr.write("Got ")
588 |             if use_shodan:
589 |                 sys.stderr.write(f"{shodan_results} seed nodes from Shodan and ")
590 |             sys.stderr.write(f"{bitcoin_seeds} official Bitcoin seed nodes\n")
591 | 
592 | 
593 | class Bitcoin(Blockchain[BitcoinNode]):
594 |     name = "bitcoin"
595 |     node_type = BitcoinNode
596 |     _miner_query_lock: Optional[asyncio.Lock] = None
597 |     _miners: Optional[Dict[IPv6Address, ShodanResult]] = None
598 |     _finished_miners_query: bool = False
599 | 
600 |     @classmethod
601 |     async def default_seeds(cls) -> AsyncIterator[BitcoinNode]:
602 |         return collect_defaults(
603 |             ("dnsseed.bitcoin.dashjr.org",),
604 |             ("dnsseed.bluematt.me",),
605 |             ("seed.bitcoin.jonasschnelli.ch",),
606 |             ("seed.bitcoin.sipa.be",),
607 |             ("seed.bitcoinstats.com",),
608 |             ("seed.btc.petertodd.org",),
609 |         )
610 | 
611 |     async def get_version(self, node: BitcoinNode) -> Optional[Version]:
612 |         try:
613 |             version = await node.get_version()
614 |             return Version(str(version), version.timestamp)
615 |         except BitcoinError:
616 |             return None
617 | 
618 |     async def get_neighbors(self, node: BitcoinNode) -> FrozenSet[BitcoinNode]:
619 |         assert node.is_running
620 |         neighbor_addrs = await node.get_neighbors()
621 |         return frozenset(
622 |             BitcoinNode(addr.addr.ip, addr.addr.port)
623 |             for addr in neighbor_addrs.addresses
624 |             if addr.addr.ip != node.address or addr.addr.port != node.port
625 |         )
626 | 
627 |     async def get_miner_ips(self) -> KeysView[IPv6Address]:
628 |         if self._miner_query_lock is None:
629 |             self._miner_query_lock = asyncio.Lock()
630 |         await self._miner_query_lock.acquire()
631 |         if self._miners is None:
632 |             self._miners = {}
633 |             self._miner_query_lock.release()
634 |             async for miner in MINER_QUERY.run_async(get_api()):
635 |                 async with self._miner_query_lock:
636 |                     self._miners[miner.ip] = miner
637 |             async with self._miner_query_lock:
638 |                 self._finished_miners_query = True
639 |         else:
640 |             self._miner_query_lock.release()
641 |         return self._miners.keys()
642 | 
643 |     async def get_miners(self) -> FrozenSet[BitcoinNode]:
644 |         return frozenset(BitcoinNode(ip) for ip in await self.get_miner_ips())
645 | 
646 |     async def is_miner(self, node: BitcoinNode) -> Miner:
647 |         if self._miner_query_lock is None:
648 |             self._miner_query_lock = asyncio.Lock()
649 |         async with self._miner_query_lock:
650 |             is_miner = (
651 |                 self._miners is not None
652 |                 and self._finished_miners_query
653 |                 and node.address in self._miners
654 |             )
655 |         if is_miner or node.address in await self.get_miner_ips():
656 |             return Miner.MINER
657 |         else:
658 |             return Miner.UNKNOWN
659 | 


--------------------------------------------------------------------------------
/fluxture/blockchain.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import socket
  3 | from abc import ABCMeta, abstractmethod
  4 | from ipaddress import IPv4Address, IPv6Address, ip_address
  5 | from typing import (AsyncIterator, Dict, FrozenSet, Generic, Optional, Tuple,
  6 |                     Type, TypeVar, Union)
  7 | 
  8 | from . import serialization
  9 | from .messaging import Message
 10 | 
 11 | 
 12 | class BlockchainError(RuntimeError):
 13 |     pass
 14 | 
 15 | 
 16 | class Miner(serialization.IntEnum):
 17 |     UNKNOWN = 0
 18 |     MINER = 1
 19 |     NOT_MINER = 2
 20 | 
 21 | 
 22 | def get_public_ip() -> Union[IPv4Address, IPv6Address]:
 23 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 24 |     s.connect(("8.8.8.8", 80))
 25 |     try:
 26 |         return ip_address(s.getsockname()[0])
 27 |     finally:
 28 |         s.close()
 29 | 
 30 | 
 31 | class Version:
 32 |     def __init__(self, version: str, timestamp: int):
 33 |         self.version: str = version
 34 |         self.timestamp: int = timestamp
 35 | 
 36 | 
 37 | class Node(metaclass=ABCMeta):
 38 |     def __init__(
 39 |         self,
 40 |         address: Union[str, bytes, IPv4Address, IPv6Address],
 41 |         port: int,
 42 |         source: str = "peer",
 43 |     ):
 44 |         if not isinstance(address, IPv6Address):
 45 |             self.address: IPv6Address = serialization.IPv6Address(address)
 46 |         else:
 47 |             self.address = address
 48 |         self.port: int = port
 49 |         self.source: str = source
 50 |         self._reader: Optional[asyncio.StreamReader] = None
 51 |         self._writer: Optional[asyncio.StreamWriter] = None
 52 |         self._entries: int = 0
 53 |         self._stop: Optional[asyncio.Event] = None
 54 | 
 55 |     @property
 56 |     def is_running(self) -> bool:
 57 |         return (
 58 |             self._reader is not None
 59 |             and self._stop is not None
 60 |             and not self._stop.is_set()
 61 |         )
 62 | 
 63 |     def terminate(self):
 64 |         if self._stop is not None:
 65 |             self._stop.set()
 66 | 
 67 |     async def join(self):
 68 |         if self._stop is not None:
 69 |             await self._stop.wait()
 70 | 
 71 |     @property
 72 |     async def reader(self) -> asyncio.StreamReader:
 73 |         if self._reader is None:
 74 |             await self.connect()
 75 |         return self._reader
 76 | 
 77 |     @property
 78 |     async def writer(self) -> asyncio.StreamWriter:
 79 |         if self._writer is None:
 80 |             await self.connect()
 81 |         return self._writer
 82 | 
 83 |     async def connect(self):
 84 |         if self._reader is None:
 85 |             self._reader, self._writer = await asyncio.open_connection(
 86 |                 str(self.address),
 87 |                 self.port,
 88 |                 happy_eyeballs_delay=0.25,  # this causes IPv4 and IPv6 attempts to be interleaved
 89 |             )
 90 |             if self._stop is None:
 91 |                 self._stop = asyncio.Event()
 92 |             elif self._stop.is_set():
 93 |                 self._stop.clear()
 94 | 
 95 |     async def close(self):
 96 |         if self._writer is not None:
 97 |             self._writer.close()
 98 |             try:
 99 |                 await self._writer.wait_closed()
100 |             except BrokenPipeError:
101 |                 # this is expected
102 |                 pass
103 |             self._reader = None
104 |             self._writer = None
105 |             if not self._stop.is_set():
106 |                 self._stop.set()
107 | 
108 |     async def __aenter__(self):
109 |         self._entries += 1
110 |         if self._entries == 1 and self._reader is None:
111 |             await self.connect()
112 | 
113 |     async def __aexit__(self, exc_type, exc_val, exc_tb):
114 |         self._entries -= 1
115 |         if self._entries == 0 and self._reader is not None:
116 |             await self.close()
117 | 
118 |     async def send_message(self, message: Message):
119 |         writer = await self.writer
120 |         writer.write(message.serialize())
121 |         await writer.drain()
122 | 
123 |     def __hash__(self):
124 |         return hash((self.address, self.port))
125 | 
126 |     def __eq__(self, other):
127 |         return (
128 |             isinstance(other, Node)
129 |             and other.address == self.address
130 |             and other.port == self.port
131 |         )
132 | 
133 |     def __repr__(self):
134 |         return (
135 |             f"{self.__class__.__name__}(address={self.address!r}, port={self.port!r})"
136 |         )
137 | 
138 |     @abstractmethod
139 |     async def run(self) -> AsyncIterator[Message]:
140 |         raise NotImplementedError()
141 | 
142 | 
143 | N = TypeVar("N", bound=Node)
144 | 
145 | 
146 | BLOCKCHAINS: Dict[str, Type["Blockchain[Node]"]] = {}
147 | 
148 | 
149 | class Blockchain(Generic[N], metaclass=ABCMeta):
150 |     DEFAULT_SEEDS: Tuple[N, ...] = ()
151 |     name: str
152 |     node_type: Type[N]
153 | 
154 |     def __init_subclass__(cls, **kwargs):
155 |         if not hasattr(cls, "name") or cls.name is None:
156 |             raise TypeError("Subclasses of `Blockchain` must define a `name`")
157 |         if not hasattr(cls, "node_type") or cls.node_type is None:
158 |             raise TypeError("Subclasses of `Blockchain` must define a `node_type`")
159 |         BLOCKCHAINS[cls.name] = cls
160 | 
161 |     @classmethod
162 |     @abstractmethod
163 |     async def default_seeds(cls) -> AsyncIterator[N]:
164 |         raise NotImplementedError()
165 | 
166 |     @abstractmethod
167 |     async def get_neighbors(self, node: N) -> FrozenSet[N]:
168 |         raise NotImplementedError()
169 | 
170 |     @abstractmethod
171 |     async def get_version(self, node: N) -> Optional[Version]:
172 |         raise NotImplementedError()
173 | 
174 |     @abstractmethod
175 |     async def is_miner(self, node: N) -> Miner:
176 |         raise NotImplementedError()
177 | 
178 |     @abstractmethod
179 |     async def get_miners(self) -> FrozenSet[N]:
180 |         raise NotImplementedError()
181 | 


--------------------------------------------------------------------------------
/fluxture/crawl_schema.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | from abc import abstractmethod
  3 | from ipaddress import IPv4Address
  4 | from ipaddress import IPv6Address as IPv6AddressPython
  5 | from typing import (Callable, FrozenSet, Generic, Optional, Set, Sized,
  6 |                     TypeVar, Union)
  7 | 
  8 | from .blockchain import Miner, Node, Version
  9 | from .db import Cursor, Database, ForeignKey, Model, Table
 10 | from .geolocation import Geolocation
 11 | from .serialization import DateTime, IntFlag, IPv6Address
 12 | 
 13 | N = TypeVar("N", bound=Node)
 14 | 
 15 | 
 16 | class HostInfo(Model):
 17 |     ip: IPv6Address
 18 |     isp: str
 19 |     os: str
 20 |     timestamp: DateTime
 21 | 
 22 |     def __hash__(self):
 23 |         return hash(self.ip)
 24 | 
 25 | 
 26 | class CrawlState(IntFlag):
 27 |     UNKNOWN = 0
 28 |     DISCOVERED = 1
 29 |     GEOLOCATED = 2
 30 |     ATTEMPTED_CONNECTION = DISCOVERED | 4
 31 |     CONNECTION_FAILED = ATTEMPTED_CONNECTION | 8
 32 |     CONNECTED = ATTEMPTED_CONNECTION | 16
 33 |     CONNECTION_RESET = CONNECTED | 32
 34 |     REQUESTED_NEIGHBORS = CONNECTED | 64
 35 |     GOT_NEIGHBORS = REQUESTED_NEIGHBORS | 128
 36 |     REQUESTED_VERSION = CONNECTED | 256
 37 |     GOT_VERSION = REQUESTED_VERSION | 512
 38 | 
 39 | 
 40 | class CrawledNode(Model["CrawlDatabase"]):
 41 |     ip: IPv6Address
 42 |     port: int
 43 |     is_miner: Miner
 44 |     state: CrawlState
 45 |     source: str
 46 | 
 47 |     def __hash__(self):
 48 |         return hash((self.ip, self.port))
 49 | 
 50 |     def get_events(self) -> Cursor["CrawlEvent"]:
 51 |         return self.db.events.select(
 52 |             node=self.rowid, order_by="timestamp", order_direction="DESC"
 53 |         )
 54 | 
 55 |     def get_version(self) -> Optional[Version]:
 56 |         for version_event in self.db.events.select(
 57 |             node=self.rowid,
 58 |             order_by="timestamp",
 59 |             order_direction="DESC",
 60 |             limit=1,
 61 |             event="version",
 62 |         ):
 63 |             return Version(version_event.description, version_event.timestamp)
 64 |         return None
 65 | 
 66 |     def get_location(self) -> Optional[Geolocation]:
 67 |         return self.db.locations.select(
 68 |             ip=self.ip, order_by="timestamp DESC", limit=1
 69 |         ).fetchone()
 70 | 
 71 |     def last_crawled(self) -> Optional[DateTime]:
 72 |         max_edge = Cursor(
 73 |             self.db.edges,
 74 |             "SELECT a.* FROM edges a LEFT OUTER JOIN edges b ON a.rowid = b.rowid AND a.timestamp < b.timestamp "
 75 |             "WHERE b.rowid is NULL AND a.from_node = ? LIMIT 1",
 76 |             (self.rowid,),
 77 |         ).fetchone()
 78 |         if max_edge is None:
 79 |             return None
 80 |         return max_edge.timestamp
 81 | 
 82 |     def get_latest_edges(self) -> Set["CrawledNode"]:
 83 |         return {
 84 |             edge.to_node.row
 85 |             for edge in Cursor(
 86 |                 self.db.edges,
 87 |                 "SELECT a.* FROM edges a LEFT OUTER JOIN edges b ON a.rowid = b.rowid AND a.timestamp < b.timestamp "
 88 |                 "WHERE b.rowid is NULL AND a.from_node = ?",
 89 |                 (self.rowid,),
 90 |             )
 91 |         }
 92 | 
 93 |     def out_degree(self) -> int:
 94 |         cur = self.db.con.cursor()
 95 |         try:
 96 |             result = cur.execute(
 97 |                 "SELECT count(a.*) FROM edges a "
 98 |                 "LEFT OUTER JOIN edges b ON a.rowid = b.rowid AND a.timestamp < b.timestamp "
 99 |                 "WHERE b.rowid is NULL AND a.from_node = ?",
100 |                 (self.rowid,),
101 |             )
102 |             return result.fetchone()[0]
103 |         finally:
104 |             cur.close()
105 | 
106 | 
107 | class Edge(Model):
108 |     from_node: ForeignKey["nodes", CrawledNode]  # noqa: F821
109 |     to_node: ForeignKey["nodes", CrawledNode]  # noqa: F821
110 |     timestamp: DateTime
111 | 
112 | 
113 | class CrawlEvent(Model):
114 |     node: ForeignKey["nodes", CrawledNode]  # noqa: F821
115 |     timestamp: DateTime
116 |     event: str
117 |     description: str
118 | 
119 | 
120 | class CrawlDatabase(Database):
121 |     nodes: Table[CrawledNode]
122 |     events: Table[CrawlEvent]
123 |     locations: Table[Geolocation]
124 |     edges: Table[Edge]
125 |     hosts: Table[HostInfo]
126 | 
127 |     def __init__(self, path: str = ":memory:"):
128 |         super().__init__(path)
129 | 
130 |     @property
131 |     def crawled_nodes(self) -> Cursor[CrawledNode]:
132 |         return Cursor(
133 |             self.nodes,
134 |             f"SELECT DISTINCT n.*, n.rowid FROM {self.nodes.name} n WHERE n.state >= ?",
135 |             (CrawlState.CONNECTED,),
136 |         )
137 | 
138 | 
139 | class Crawl(Generic[N], Sized):
140 |     @abstractmethod
141 |     def __contains__(self, node: N) -> bool:
142 |         raise NotImplementedError()
143 | 
144 |     @abstractmethod
145 |     def __getitem__(self, node: N) -> CrawledNode:
146 |         raise NotImplementedError()
147 | 
148 |     @abstractmethod
149 |     def get_node(self, node: N) -> CrawledNode:
150 |         raise NotImplementedError()
151 | 
152 |     @abstractmethod
153 |     def add_event(
154 |         self,
155 |         node: CrawledNode,
156 |         event: str,
157 |         description: str,
158 |         timestamp: Optional[DateTime] = None,
159 |     ):
160 |         raise NotImplementedError()
161 | 
162 |     @abstractmethod
163 |     def set_location(self, ip: IPv6Address, location: Geolocation):
164 |         raise NotImplementedError()
165 | 
166 |     @abstractmethod
167 |     def get_neighbors(self, node: N) -> FrozenSet[N]:
168 |         raise NotImplementedError()
169 | 
170 |     @abstractmethod
171 |     def set_neighbors(self, node: N, neighbors: FrozenSet[N]):
172 |         raise NotImplementedError()
173 | 
174 |     @abstractmethod
175 |     def set_miner(self, node: N, miner: Miner):
176 |         raise NotImplementedError()
177 | 
178 |     @abstractmethod
179 |     def set_host_info(self, host_info: HostInfo):
180 |         raise NotImplementedError()
181 | 
182 |     @abstractmethod
183 |     def add_state(self, node: Union[N, CrawledNode], state: CrawlState):
184 |         raise NotImplementedError()
185 | 
186 |     @abstractmethod
187 |     def update_node(self, node: CrawledNode):
188 |         raise NotImplementedError()
189 | 
190 |     def commit(self):
191 |         pass
192 | 
193 | 
194 | class DatabaseCrawl(Generic[N], Crawl[N]):
195 |     def __init__(
196 |         self,
197 |         constructor: Callable[[Union[str, IPv4Address, IPv6AddressPython], int], N],
198 |         db: CrawlDatabase,
199 |     ):
200 |         super().__init__()
201 |         self.constructor: Callable[
202 |             [Union[str, IPv4Address, IPv6AddressPython], int], N
203 |         ] = constructor
204 |         self.db: CrawlDatabase = db
205 | 
206 |     def __contains__(self, node: N) -> bool:
207 |         return self.db.nodes.select(ip=node.ip, port=node.port).fetchone() is not None
208 | 
209 |     def __getitem__(self, node: N) -> CrawledNode:
210 |         try:
211 |             return next(iter(self.db.nodes.select(ip=node.address, port=node.port)))
212 |         except StopIteration:
213 |             pass
214 |         raise KeyError(node)
215 | 
216 |     def commit(self):
217 |         self.db.con.commit()
218 | 
219 |     def get_node(self, node: N) -> CrawledNode:
220 |         try:
221 |             return self[node]
222 |         except KeyError:
223 |             # this is a new node
224 |             pass
225 |         ret = CrawledNode(ip=node.address, port=node.port, source=node.source)
226 |         self.db.nodes.append(ret)
227 |         return ret
228 | 
229 |     def update_node(self, node: CrawledNode):
230 |         with self.db:
231 |             self.db.nodes.update(node)
232 | 
233 |     def add_event(
234 |         self,
235 |         node: CrawledNode,
236 |         event: str,
237 |         description: str,
238 |         timestamp: Optional[DateTime] = None,
239 |     ):
240 |         with self.db:
241 |             if timestamp is None:
242 |                 timestamp = DateTime()
243 |             self.db.events.append(
244 |                 CrawlEvent(
245 |                     node=node.rowid,
246 |                     event=event,
247 |                     description=description,
248 |                     timestamp=timestamp,
249 |                 )
250 |             )
251 | 
252 |     def get_neighbors(self, node: N) -> FrozenSet[N]:
253 |         return frozenset(
254 |             {
255 |                 self.constructor(neighbor.ip, neighbor.port)
256 |                 for neighbor in self.get_node(node).get_latest_edges()
257 |             }
258 |         )
259 | 
260 |     def set_neighbors(self, node: N, neighbors: FrozenSet[N]):
261 |         with self.db:
262 |             crawled_node = self.get_node(node)
263 |             timestamp = DateTime()
264 |             self.db.edges.extend(
265 |                 [
266 |                     Edge(
267 |                         from_node=crawled_node,
268 |                         to_node=self.get_node(neighbor),
269 |                         timestamp=timestamp,
270 |                     )
271 |                     for neighbor in neighbors
272 |                 ]
273 |             )
274 |             self.add_state(node, CrawlState.GOT_NEIGHBORS)
275 |             for neighbor in neighbors:
276 |                 # Make sure we record that we discovered the neighbor
277 |                 _ = self.get_node(neighbor)
278 |                 # (simply getting the node for the neighbor will ensure that its state's "discovered" flag is set)
279 | 
280 |     def set_location(self, ip: IPv6Address, location: Geolocation):
281 |         with self.db:
282 |             self.db.locations.append(location)
283 | 
284 |     def set_miner(self, node: N, miner: Miner):
285 |         with self.db:
286 |             crawled_node = self.get_node(node)
287 |             crawled_node.is_miner = miner
288 |             self.db.nodes.update(crawled_node)
289 | 
290 |     def set_host_info(self, host_info: HostInfo):
291 |         with self.db:
292 |             self.db.hosts.append(host_info)
293 | 
294 |     def add_state(self, node: Union[N, CrawledNode], state: CrawlState):
295 |         with self.db:
296 |             if isinstance(node, CrawledNode):
297 |                 crawled_node = node
298 |             else:
299 |                 crawled_node = self.get_node(node)
300 |             if crawled_node.state & state != state:
301 |                 crawled_node.state = crawled_node.state | state
302 |                 self.db.nodes.update(crawled_node)
303 | 
304 |     def __len__(self) -> int:
305 |         return len(self.db.nodes)
306 | 


--------------------------------------------------------------------------------
/fluxture/crawler.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import errno
  3 | import resource
  4 | import sys
  5 | import traceback
  6 | from abc import ABCMeta
  7 | from argparse import ArgumentParser, Namespace
  8 | from asyncio import Future, ensure_future
  9 | from collections import deque
 10 | from inspect import isabstract
 11 | from typing import (Any, AsyncIterator, Coroutine, Deque, Dict, FrozenSet,
 12 |                     Generic, Iterable, List, Optional, Union)
 13 | 
 14 | from geoip2.errors import AddressNotFoundError
 15 | from tqdm import tqdm
 16 | 
 17 | from .blockchain import BLOCKCHAINS, Blockchain, BlockchainError, Miner, Node
 18 | from .crawl_schema import (Crawl, CrawlDatabase, CrawlState, DatabaseCrawl,
 19 |                            DateTime, N)
 20 | from .fluxture import Command
 21 | from .geolocation import (GeoIP2Error, GeoIP2Locator, Geolocator,
 22 |                           download_maxmind_db)
 23 | 
 24 | CRAWL_LISTENERS: List["CrawlListener"] = []
 25 | 
 26 | 
 27 | class CrawlListener:
 28 |     has_on_crawl_node: bool = False
 29 |     has_on_miner: bool = False
 30 |     has_on_complete: bool = False
 31 | 
 32 |     async def on_crawl_node(self, crawler: "Crawler", node: Node):
 33 |         pass
 34 | 
 35 |     async def on_miner(self, crawler: "Crawler", node: Node, miner: Miner):
 36 |         pass
 37 | 
 38 |     async def on_complete(self, crawler: "Crawler"):
 39 |         pass
 40 | 
 41 |     def __init_subclass__(cls, **kwargs):
 42 |         if not isabstract(cls):
 43 |             for func in dir(cls):
 44 |                 if func.startswith("on_") and hasattr(CrawlListener, func):
 45 |                     setattr(
 46 |                         cls,
 47 |                         f"has_{func}",
 48 |                         getattr(cls, func) != getattr(CrawlListener, func),
 49 |                     )
 50 |             CRAWL_LISTENERS.append(cls())
 51 | 
 52 | 
 53 | class MinerTask(CrawlListener):
 54 |     async def on_crawl_node(self, crawler: "Crawler", node: Node):
 55 |         is_miner = await crawler.blockchain.is_miner(node)
 56 |         crawler.crawl.set_miner(node, is_miner)
 57 |         crawler.add_tasks(
 58 |             *(
 59 |                 listener.on_miner(crawler, node, is_miner)
 60 |                 for listener in CRAWL_LISTENERS
 61 |                 if listener.has_on_miner
 62 |             )
 63 |         )
 64 |         if is_miner == Miner.MINER:
 65 |             print(f"Node {node} is a miner")
 66 |         elif is_miner == Miner.NOT_MINER:
 67 |             print(f"Node {node} is not a miner")
 68 | 
 69 | 
 70 | class Crawler(Generic[N], metaclass=ABCMeta):
 71 |     def __init__(
 72 |         self,
 73 |         blockchain: Blockchain[N],
 74 |         crawl: Crawl[N],
 75 |         geolocator: Optional[Geolocator] = None,
 76 |         max_connections: Optional[int] = None,
 77 |     ):
 78 |         self.blockchain: Blockchain[N] = blockchain
 79 |         self.crawl: Crawl[N] = crawl
 80 |         self.geolocator: Optional[Geolocator] = geolocator
 81 |         self.nodes: Dict[N, N] = {}
 82 |         if max_connections is None:
 83 |             max_connections = resource.getrlimit(resource.RLIMIT_NOFILE)[0] // 3 * 2
 84 |         max_connections = max(max_connections, 1)
 85 |         self.max_connections: int = max_connections
 86 |         self.listener_tasks: List[Future] = []
 87 | 
 88 |     async def _crawl_node(self, node: N) -> FrozenSet[N]:
 89 |         crawled_node = self.crawl.get_node(node)
 90 |         if (
 91 |             self.geolocator is not None
 92 |             and crawled_node.state & CrawlState.GEOLOCATED != CrawlState.GEOLOCATED
 93 |         ):
 94 |             try:
 95 |                 self.crawl.set_location(
 96 |                     node.address, self.geolocator.locate(node.address)
 97 |                 )
 98 |                 self.crawl.add_state(crawled_node, CrawlState.GEOLOCATED)
 99 |             except AddressNotFoundError:
100 |                 pass
101 |         if (
102 |             crawled_node.state & CrawlState.ATTEMPTED_CONNECTION
103 |             == CrawlState.ATTEMPTED_CONNECTION
104 |         ):
105 |             raise ValueError(f"Node {node} was already crawled!")
106 |         self.crawl.add_state(crawled_node, CrawlState.ATTEMPTED_CONNECTION)
107 |         try:
108 |             async with node:
109 |                 self.crawl.add_state(crawled_node, CrawlState.CONNECTED)
110 |                 neighbors = []
111 |                 new_neighbors = set()
112 |                 self.crawl.add_state(crawled_node, CrawlState.REQUESTED_NEIGHBORS)
113 |                 for neighbor in await self.blockchain.get_neighbors(node):
114 |                     if neighbor in self.nodes:
115 |                         # we have already seen this node
116 |                         neighbors.append(self.nodes[neighbor])
117 |                     else:
118 |                         self.nodes[neighbor] = neighbor
119 |                         neighbors.append(neighbor)
120 |                         new_neighbors.add(neighbor)
121 |                 self.crawl.set_neighbors(node, frozenset(neighbors))
122 |                 self.crawl.add_state(
123 |                     crawled_node,
124 |                     CrawlState.GOT_NEIGHBORS | CrawlState.REQUESTED_VERSION,
125 |                 )
126 |                 version = await self.blockchain.get_version(node)
127 |                 if version is not None:
128 |                     self.crawl.add_state(crawled_node, CrawlState.GOT_VERSION)
129 |                     crawled_node = self.crawl.get_node(node)
130 |                     self.crawl.add_event(
131 |                         crawled_node,
132 |                         event="version",
133 |                         description=version.version,
134 |                         timestamp=DateTime(version.timestamp),
135 |                     )
136 |                 return frozenset(new_neighbors)
137 |         except BrokenPipeError:
138 |             self.crawl.add_state(crawled_node, CrawlState.CONNECTION_RESET)
139 |             raise
140 |         except OSError as e:
141 |             if e.errno in (
142 |                 errno.ETIMEDOUT,
143 |                 errno.ECONNREFUSED,
144 |                 errno.EHOSTDOWN,
145 |                 errno.EHOSTUNREACH,
146 |             ):
147 |                 # Connection failed
148 |                 self.crawl.add_state(crawled_node, CrawlState.CONNECTION_FAILED)
149 |             else:
150 |                 # Something happened after we connected (e.g., connection reset by peer)
151 |                 self.crawl.add_state(crawled_node, CrawlState.CONNECTION_RESET)
152 |             raise
153 |         finally:
154 |             await node.close()
155 | 
156 |     def add_tasks(self, *tasks: Union[Future, Coroutine[Any, Any, None]]):
157 |         for task in tasks:
158 |             if isinstance(task, Coroutine):
159 |                 self.listener_tasks.append(ensure_future(task))
160 |             else:
161 |                 self.listener_tasks.append(task)
162 | 
163 |     async def _check_miner(self, node: N):
164 |         is_miner = await self.blockchain.is_miner(node)
165 |         self.crawl.set_miner(node, is_miner)
166 |         return node, is_miner
167 | 
168 |     async def _crawl(self, seeds: Optional[Iterable[N]] = None):
169 |         if seeds is None:
170 |             seed_iter: Optional[
171 |                 AsyncIterator[N]
172 |             ] = await self.blockchain.default_seeds()
173 |             queue: Deque[N] = deque()
174 |             futures: List[Future] = [ensure_future(seed_iter.__anext__())]
175 |             num_seeds = 0
176 |         else:
177 |             seed_iter = None
178 |             queue = deque(seeds)
179 |             futures: List[Future] = []
180 |             num_seeds = len(seeds)
181 |         num_connected_to = 0
182 |         while futures or queue or self.listener_tasks:
183 |             print(
184 |                 f"Discovered {len(self.nodes)} nodes ({num_seeds} seeds); crawled {num_connected_to}; "
185 |                 f"crawling {len(futures)}; waiting to crawl {len(queue)}..."
186 |             )
187 |             if futures:
188 |                 waiting_on = futures
189 |                 done, pending = await asyncio.wait(
190 |                     waiting_on, return_when=asyncio.FIRST_COMPLETED
191 |                 )
192 |                 futures = list(pending)
193 |                 for result in await asyncio.gather(*done, return_exceptions=True):
194 |                     # iterate over all of the new neighbors of the node
195 |                     if isinstance(result, StopAsyncIteration) and seed_iter is not None:
196 |                         seed_iter = None
197 |                     elif isinstance(result, Exception):
198 |                         # TODO: Save the exception to the database
199 |                         # self.crawl.add_event(node, event="Exception", description=str(result))
200 |                         if isinstance(
201 |                             result,
202 |                             (
203 |                                 ConnectionError,
204 |                                 OSError,
205 |                                 BrokenPipeError,
206 |                                 BlockchainError,
207 |                             ),
208 |                         ):
209 |                             print(str(result))
210 |                         else:
211 |                             traceback.print_tb(result.__traceback__)
212 |                             print(result)
213 |                     elif seed_iter is not None and isinstance(result, Node):
214 |                         # This is a seed
215 |                         crawled_node = self.crawl.get_node(result)
216 |                         if crawled_node.source != result.source:
217 |                             # this means we already organically encountered this node from another peer
218 |                             # so update its source to be the seed
219 |                             crawled_node.source = result.source
220 |                             self.crawl.update_node(crawled_node)
221 |                         self.crawl.add_state(crawled_node, CrawlState.DISCOVERED)
222 |                         # Check if we have already encountered this node
223 |                         queue.append(result)
224 |                         num_seeds += 1
225 |                         futures.append(ensure_future(seed_iter.__anext__()))
226 |                     else:
227 |                         num_connected_to += 1
228 |                         queue.extend(result)
229 |             if self.listener_tasks:
230 |                 waiting_on = self.listener_tasks
231 |                 done, pending = await asyncio.wait(
232 |                     waiting_on, return_when=asyncio.FIRST_COMPLETED, timeout=0.5
233 |                 )
234 |                 for result in await asyncio.gather(*done, return_exceptions=True):
235 |                     if isinstance(result, Exception):
236 |                         # TODO: Save the exception to the database
237 |                         # self.crawl.add_event(node, event="Exception", description=str(result))
238 |                         traceback.print_tb(result.__traceback__)
239 |                         print(result)
240 |                 self.listener_tasks = list(pending)
241 |             new_nodes_to_crawl = min(self.max_connections - len(futures), len(queue))
242 |             if new_nodes_to_crawl:
243 |                 nodes_to_crawl = []
244 |                 for i in range(new_nodes_to_crawl):
245 |                     node = queue.popleft()
246 |                     if node in self.nodes:
247 |                         nodes_to_crawl.append(self.nodes[node])
248 |                     else:
249 |                         nodes_to_crawl.append(node)
250 |                         self.nodes[node] = node
251 |                 futures.extend(
252 |                     ensure_future(self._crawl_node(node)) for node in nodes_to_crawl
253 |                 )
254 |                 self.add_tasks(
255 |                     *(
256 |                         listener.on_crawl_node(crawler=self, node=node)
257 |                         for node in nodes_to_crawl
258 |                         for listener in CRAWL_LISTENERS
259 |                         if listener.has_on_crawl_node
260 |                     )
261 |                 )
262 |             self.crawl.commit()
263 | 
264 |         for miner in await self.blockchain.get_miners():
265 |             self.crawl.set_miner(miner, Miner.MINER)
266 | 
267 |         for node in self.nodes.values():
268 |             if node.is_running:
269 |                 node.terminate()
270 |                 await node.join()
271 | 
272 |         self.add_tasks(
273 |             *(
274 |                 listener.on_complete(crawler=self)
275 |                 for listener in CRAWL_LISTENERS
276 |                 if listener.has_on_complete
277 |             )
278 |         )
279 | 
280 |         # wait for the on_complete tasks to finish:
281 |         while self.listener_tasks:
282 |             waiting_on = self.listener_tasks
283 |             done, pending = await asyncio.wait(
284 |                 waiting_on, return_when=asyncio.FIRST_COMPLETED, timeout=0.5
285 |             )
286 |             for result in await asyncio.gather(*done, return_exceptions=True):
287 |                 if isinstance(result, Exception):
288 |                     # TODO: Save the exception to the database
289 |                     # self.crawl.add_event(node, event="Exception", description=str(result))
290 |                     traceback.print_tb(result.__traceback__)
291 |                     print(result)
292 |             self.listener_tasks = list(pending)
293 | 
294 |     def do_crawl(self, seeds: Optional[Iterable[N]] = None):
295 |         asyncio.run(self._crawl(seeds))
296 | 
297 |     def crawl_node(self, node: N) -> FrozenSet[N]:
298 |         """Return the neighbors for a single node"""
299 |         return asyncio.run(self._crawl_node(node))
300 | 
301 | 
302 | CITY_DB_PARSER: ArgumentParser = ArgumentParser(add_help=False)
303 | 
304 | CITY_DB_PARSER.add_argument(
305 |     "--city-db-path",
306 |     "-c",
307 |     type=str,
308 |     default=None,
309 |     help="path to a MaxMind GeoLite2 City database (default is "
310 |     "`~/.config/fluxture/geolite2/GeoLite2-City.mmdb`); "
311 |     "if omitted and `--maxmind-license-key` is provided, the latest database will be "
312 |     "downloaded and saved to the default location; "
313 |     "if both options are omttied, then geolocation will not be performed",
314 | )
315 | CITY_DB_PARSER.add_argument(
316 |     "--maxmind-license-key",
317 |     type=str,
318 |     default=None,
319 |     help="License key for automatically downloading a GeoLite2 City database; you generate get "
320 |     "a free license key by registering at https://www.maxmind.com/en/geolite2/signup",
321 | )
322 | 
323 | 
324 | class UpdateMaxmindDBCommand(Command):
325 |     name = "update-geo-db"
326 |     help = "download the latest MaxMind GeoLite2 database"
327 |     parent_parsers = (CITY_DB_PARSER,)
328 | 
329 |     def run(self, args: Namespace):
330 |         if args.maxmind_license_key is None:
331 |             sys.stderr.write("Error: --maxmind-license-key must be provided\n\n")
332 |             sys.exit(1)
333 |         save_path = download_maxmind_db(args.maxmind_license_key, args.city_db_path)
334 |         print(f"Geolocation database saved to {save_path}")
335 | 
336 | 
337 | class NodeCommand(Command):
338 |     name = "node"
339 |     help = "connect to and interrogate a specific node"
340 | 
341 |     def __init_arguments__(self, parser: ArgumentParser):
342 |         parser.add_argument(
343 |             "BLOCKCHAIN_NAME",
344 |             type=str,
345 |             help="the name of the blockchain to crawl",
346 |             choices=BLOCKCHAINS.keys(),
347 |         )
348 |         parser.add_argument(
349 |             "IP_ADDRESS", type=str, help="IP address of the node to interrogate"
350 |         )
351 | 
352 |     def run(self, args: Namespace):
353 |         blockchain_type = BLOCKCHAINS[args.BLOCKCHAIN_NAME]
354 |         with CrawlDatabase() as db:
355 |             for neighbor in sorted(
356 |                 str(n.address)
357 |                 for n in Crawler(
358 |                     blockchain=blockchain_type(),
359 |                     crawl=DatabaseCrawl(blockchain_type.node_type, db),
360 |                 ).crawl_node(blockchain_type.node_type(args.IP_ADDRESS))
361 |             ):
362 |                 print(neighbor)
363 | 
364 | 
365 | class GeolocateCommand(Command):
366 |     name = "geolocate"
367 |     help = "re-run geolocation for already crawled nodes (e.g., after a call to the `update-geo-db` command)"
368 |     parent_parsers = (CITY_DB_PARSER,)
369 | 
370 |     def __init_arguments__(self, parser: ArgumentParser):
371 |         parser.add_argument(
372 |             "CRAWL_DATABASE", type=str, help="path to the crawl database to update"
373 |         )
374 |         parser.add_argument(
375 |             "--process-all",
376 |             "-a",
377 |             action="store_true",
378 |             help="by default, this command only geolocates "
379 |             "nodes that do not already have a "
380 |             "location; this option will re-process "
381 |             "all nodes",
382 |         )
383 | 
384 |     def run(self, args: Namespace):
385 |         geo = GeoIP2Locator(args.city_db_path, args.maxmind_license_key)
386 | 
387 |         with CrawlDatabase(args.CRAWL_DATABASE) as db:
388 |             added = 0
389 |             updated = 0
390 |             with tqdm(db.nodes, leave=False, desc="geolocating", unit=" nodes") as t:
391 |                 for node in t:
392 |                     old_location = node.get_location()
393 |                     was_none = old_location is None
394 |                     if not args.process_all and not was_none:
395 |                         continue
396 |                     try:
397 |                         new_location = geo.locate(node.ip)
398 |                     except AddressNotFoundError:
399 |                         continue
400 |                     if new_location is not None:
401 |                         if was_none:
402 |                             db.locations.append(new_location)
403 |                             added += 1
404 |                         elif any(
405 |                             a != b
406 |                             for (field_name_a, a), (field_name_b, b) in zip(
407 |                                 new_location.items(), old_location.items()
408 |                             )
409 |                             if (
410 |                                 field_name_a != "rowid"
411 |                                 and field_name_b != "rowid"
412 |                                 and field_name_a != "timestamp"
413 |                                 and field_name_b != "timestamp"
414 |                             )
415 |                         ):
416 |                             # the location was updated
417 |                             new_location.rowid = old_location.rowid
418 |                             new_location.db = db
419 |                             db.locations.update(new_location)
420 |                             updated += 1
421 |                         else:
422 |                             continue
423 |                         t.desc = f"geolocating ({added} added, {updated} updated)"
424 |             print(f"Added {added} new locations and updated {updated} existing ones")
425 | 
426 | 
427 | class CrawlCommand(Command):
428 |     name = "crawl"
429 |     help = "crawl a blockchain"
430 |     parent_parsers = (CITY_DB_PARSER,)
431 | 
432 |     def __init_arguments__(self, parser: ArgumentParser):
433 |         parser.add_argument(
434 |             "--database",
435 |             "-db",
436 |             type=str,
437 |             default=":memory:",
438 |             help="path to the crawl database (default is to run in memory)",
439 |         )
440 |         max_file_descriptors, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
441 |         parser.add_argument(
442 |             "--max-connections",
443 |             "-m",
444 |             type=int,
445 |             default=None,
446 |             help="the maximum number of connections to open at once during the crawl, capped at "
447 |             f"⅔ of `ulimit -n` = {max(max_file_descriptors // 3 * 2, 1)} (default is to use the "
448 |             "maximum possible)",
449 |         )
450 |         parser.add_argument(
451 |             "BLOCKCHAIN_NAME",
452 |             type=str,
453 |             help="the name of the blockchain to crawl",
454 |             choices=BLOCKCHAINS.keys(),
455 |         )
456 | 
457 |     def run(self, args: Namespace):
458 |         try:
459 |             geo = GeoIP2Locator(args.city_db_path, args.maxmind_license_key)
460 |         except GeoIP2Error as e:
461 |             sys.stderr.write(f"Warning: {e}\nCrawl IPs will not be geolocated!\n")
462 |             geo = None
463 | 
464 |         if args.database == ":memory:":
465 |             sys.stderr.write(
466 |                 "Warning: Using an in-memory crawl database. Results will not be saved!\n"
467 |                 "Run with `--database` to set a path for the database to be saved.\n"
468 |             )
469 | 
470 |         blockchain_type = BLOCKCHAINS[args.BLOCKCHAIN_NAME]
471 | 
472 |         if args.max_connections is None:
473 |             max_file_handles, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
474 |             if sys.stderr.isatty() and sys.stdin.isatty():
475 |                 if max_file_handles < 1024:
476 |                     while True:
477 |                         sys.stderr.write(
478 |                             f"`uname -n` is {max_file_handles}, which is low and will cause the crawl to "
479 |                             "be very slow.\nWould you like to increase this value to 32768? [Yn] "
480 |                         )
481 |                         choice = input("")
482 |                         if choice.lower() == "y" or len(choice.strip()) == 0:
483 |                             resource.setrlimit(
484 |                                 resource.RLIMIT_NOFILE, (32768, resource.RLIM_INFINITY)
485 |                             )
486 |                             max_file_handles, _ = resource.getrlimit(
487 |                                 resource.RLIMIT_NOFILE
488 |                             )
489 |                             break
490 |                         elif choice.lower() == "n":
491 |                             break
492 |             max_connections = max(max_file_handles // 3 * 2, 1)
493 |         else:
494 |             max_connections = args.max_connections
495 | 
496 |         def crawl():
497 |             with CrawlDatabase(args.database) as db:
498 |                 Crawler(
499 |                     blockchain=blockchain_type(),
500 |                     crawl=DatabaseCrawl(blockchain_type.node_type, db),
501 |                     geolocator=geo,
502 |                     max_connections=max_connections,
503 |                 ).do_crawl()
504 | 
505 |         if geo is None:
506 |             crawl()
507 |         else:
508 |             with geo:
509 |                 crawl()
510 | 


--------------------------------------------------------------------------------
/fluxture/db.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | from typing import (Any, Dict, Generic, Iterable, Iterator, List, Optional,
  3 |                     OrderedDict, Tuple, Type, TypeVar, Union, cast)
  4 | 
  5 | from .serialization import Packable
  6 | from .structures import Struct, StructMeta
  7 | 
  8 | FieldType = Union[bool, int, float, str, bytes, Packable, "ForeignKey"]
  9 | 
 10 | T = TypeVar("T", bound=FieldType)
 11 | D = TypeVar("D")
 12 | 
 13 | 
 14 | class AutoIncrement(int):
 15 |     initialized: bool = False
 16 | 
 17 |     def __new__(cls, *args):
 18 |         if args and (
 19 |             len(args) > 1
 20 |             or not isinstance(args[0], AutoIncrement)
 21 |             or args[0].initialized
 22 |         ):
 23 |             retval = int.__new__(cls, *args)
 24 |             setattr(retval, "initialized", True)
 25 |         else:
 26 |             retval = int.__new__(cls, 0)
 27 |             setattr(retval, "initialized", False)
 28 |         return retval
 29 | 
 30 |     def __repr__(self):
 31 |         if self.initialized:
 32 |             return f"{self.__class__.__name__}({int(self)})"
 33 |         else:
 34 |             return f"{self.__class__.__name__}()"
 35 | 
 36 | 
 37 | class RowId(int):
 38 |     initialized: bool = False
 39 | 
 40 |     def __new__(cls, *args):
 41 |         if args and (
 42 |             len(args) > 1 or not isinstance(args[0], RowId) or args[0].initialized
 43 |         ):
 44 |             retval = int.__new__(cls, *args)
 45 |             setattr(retval, "initialized", True)
 46 |         else:
 47 |             retval = int.__new__(cls, 0)
 48 |             setattr(retval, "initialized", False)
 49 |         return retval
 50 | 
 51 |     def __repr__(self):
 52 |         if self.initialized:
 53 |             return f"{self.__class__.__name__}({int(self)})"
 54 |         else:
 55 |             return f"{self.__class__.__name__}()"
 56 | 
 57 |     def __eq__(self, other):
 58 |         return isinstance(other, RowId) and (
 59 |             not self.initialized or not other.initialized or int(self) == int(other)
 60 |         )
 61 | 
 62 | 
 63 | class ColumnOptions:
 64 |     def __init__(
 65 |         self,
 66 |         primary_key: Optional[bool] = None,
 67 |         unique: Optional[bool] = None,
 68 |         not_null: Optional[bool] = None,
 69 |         default: Optional[FieldType] = None,
 70 |         auto_increment: Optional[bool] = None,
 71 |     ):
 72 |         self.primary_key: Optional[bool] = primary_key
 73 |         self.unique: Optional[bool] = unique
 74 |         self.not_null: Optional[bool] = not_null
 75 |         self.default: Optional[FieldType] = default
 76 |         self.auto_increment: Optional[bool] = auto_increment
 77 |         if self.auto_increment and not self.default:
 78 |             self.default = AutoIncrement()
 79 | 
 80 |     def is_set(self, option_name: str):
 81 |         a = getattr(self, option_name)
 82 |         return a is not None and not callable(a)
 83 | 
 84 |     def set_options(self) -> Iterator[str]:
 85 |         return iter(
 86 |             key_name
 87 |             for key_name in dir(self)
 88 |             if not key_name.startswith("_") and self.is_set(key_name)
 89 |         )
 90 | 
 91 |     def items(self) -> Iterator[Tuple[str, Any]]:
 92 |         return iter(
 93 |             (key_name, getattr(self, key_name)) for key_name in self.set_options()
 94 |         )
 95 | 
 96 |     def __or__(self, other: "ColumnOptions") -> "ColumnOptions":
 97 |         new_options = ColumnOptions(**dict(other.items()))
 98 |         for key_name, value in self.items():
 99 |             if not other.is_set(key_name):
100 |                 setattr(new_options, key_name, value)
101 |         return new_options
102 | 
103 |     def __sub__(self, other: "ColumnOptions") -> "ColumnOptions":
104 |         return ColumnOptions(
105 |             **{
106 |                 key_name: value
107 |                 for key_name, value in self.items()
108 |                 if not other.is_set(key_name)
109 |             }
110 |         )
111 | 
112 |     def type_suffix(self) -> str:
113 |         return "".join(
114 |             [
115 |                 f"{''.join(key.capitalize() for key in key_name.split('_'))}"
116 |                 f"{''.join(val.capitalize() for val in str(value).replace('(', '').replace(')', '').split(' '))}"
117 |                 for key_name, value in self.items()
118 |             ]
119 |         )
120 | 
121 |     def sql_modifiers(self) -> str:
122 |         modifiers = []
123 |         if self.primary_key:
124 |             modifiers.append("PRIMARY KEY")
125 |         if self.auto_increment:
126 |             modifiers.append("AUTOINCREMENT")
127 |         if self.unique:
128 |             modifiers.append("UNIQUE")
129 |         if self.not_null:
130 |             modifiers.append("NOT NULL")
131 |         if self.default is not None and not isinstance(self.default, AutoIncrement):
132 |             modifiers.append(f"DEFAULT {self.default}")
133 |         return " ".join(modifiers)
134 | 
135 |     def __repr__(self):
136 |         args = [f"{key}={value!r}" for key, value in self.items()]
137 |         return f"{self.__class__.__name__}({', '.join(args)})"
138 | 
139 | 
140 | def column_options(ty: Type[T], options: ColumnOptions) -> Type[T]:
141 |     if hasattr(ty, "column_options") and ty.column_options is not None:
142 |         options = ty.column_options | options
143 |         type_suffix = (options - ty.column_options).type_suffix()
144 |     else:
145 |         type_suffix = options.type_suffix()
146 |     return cast(
147 |         Type[T], type(f"{ty.__name__}{type_suffix}", (ty,), {"column_options": options})
148 |     )
149 | 
150 | 
151 | def primary_key(ty: Type[T]) -> Type[T]:
152 |     return column_options(ty, ColumnOptions(primary_key=True))
153 | 
154 | 
155 | def unique(ty: Type[T]) -> Type[T]:
156 |     return column_options(ty, ColumnOptions(unique=True))
157 | 
158 | 
159 | def not_null(ty: Type[T]) -> Type[T]:
160 |     return column_options(ty, ColumnOptions(not_null=True))
161 | 
162 | 
163 | def default(ty: Type[T], default_value: FieldType) -> Type[T]:
164 |     return column_options(ty, ColumnOptions(default=default_value))
165 | 
166 | 
167 | COLUMN_TYPES: List[Type[Any]] = [int, str, bytes, float, Packable]
168 | 
169 | 
170 | S = TypeVar("S")
171 | D = TypeVar("D", bound="Database")
172 | 
173 | 
174 | class Model(Struct[FieldType], Generic[D]):
175 |     non_serialized = "primary_key_name", "_db", "rowid"
176 |     primary_key_name: str = "rowid"
177 |     _db: Optional[D] = None
178 |     rowid: RowId
179 | 
180 |     @staticmethod
181 |     def is_primary_key(cls) -> bool:
182 |         return (
183 |             hasattr(cls, "column_options")
184 |             and cls.column_options is not None
185 |             and cls.column_options.primary_key
186 |         )
187 | 
188 |     @classmethod
189 |     def validate_fields(cls, fields: OrderedDict[str, Type[FieldType]]):
190 |         primary_name = None
191 |         for field_name, field_type in fields.items():
192 |             if not issubclass(field_type, ForeignKey):
193 |                 for valid_type in COLUMN_TYPES:
194 |                     if issubclass(field_type, valid_type):
195 |                         break
196 |                 else:
197 |                     raise TypeError(
198 |                         f"Database field {field_name} of {cls.__name__} is type {field_type!r}, but "
199 |                         f"must be one of {COLUMN_TYPES!r}"
200 |                     )
201 |             if Model.is_primary_key(field_type):
202 |                 if primary_name is not None:
203 |                     raise TypeError(
204 |                         f"A model can have at most one primary key, but both {primary_name} and "
205 |                         f"{field_name} were specified in {cls.__name__}"
206 |                     )
207 |                 primary_name = field_name
208 |         if primary_name is not None:
209 |             cls.primary_key_name = primary_name
210 |         if "rowid" not in fields:
211 |             fields["rowid"] = default(RowId, RowId())
212 | 
213 |     @property
214 |     def in_db(self) -> bool:
215 |         return self.rowid.initialized and self._db is not None
216 | 
217 |     def uninitialized_auto_increments(self) -> Iterator[Tuple[str, AutoIncrement]]:
218 |         for key in self.keys():
219 |             value = getattr(self, key)
220 |             if isinstance(value, AutoIncrement) and not value.initialized:
221 |                 yield key, value
222 | 
223 |     @property
224 |     def db(self) -> D:
225 |         if self._db is None:
226 |             raise ValueError(f"Model {self!r} has not yet been added to a database")
227 |         return self._db
228 | 
229 |     @db.setter
230 |     def db(self, db: D):
231 |         if self._db is not None:
232 |             if self._db != db:
233 |                 raise ValueError(
234 |                     f"Model {self!r} is already associated with a different database: {db!r}"
235 |                 )
236 |         else:
237 |             self._db = db
238 | 
239 |     def to_row(self) -> Iterator[FieldType]:
240 |         for key in self.keys():
241 |             value = getattr(self, key)
242 |             if (
243 |                 isinstance(value, AutoIncrement) or isinstance(value, RowId)
244 |             ) and not value.initialized:
245 |                 yield None
246 |             else:
247 |                 yield getattr(self, key)
248 | 
249 | 
250 | M = TypeVar("M", bound=Model)
251 | 
252 | 
253 | def sql_format(
254 |     param: Optional[FieldType], expected_type: Optional[Type[FieldType]] = None
255 | ) -> Optional[Union[str, bytes, int, float]]:
256 |     if param is None:
257 |         if (
258 |             expected_type is not None
259 |             and hasattr(expected_type, "column_options")
260 |             and expected_type.column_options is not None
261 |             and expected_type.column_options.not_null
262 |         ):
263 |             raise ValueError(f"Field {expected_type!r} cannot be NULL")
264 |         return None
265 |     elif isinstance(param, Model) and expected_type is not None:
266 |         if not issubclass(expected_type, ForeignKey):
267 |             raise ValueError(
268 |                 f"Model {param!r} was expected to be of type {expected_type!r}"
269 |             )
270 |         return getattr(param, expected_type.key)
271 |     elif isinstance(param, int):
272 |         return int(param)
273 |     elif isinstance(param, float):
274 |         return float(param)
275 |     elif isinstance(param, str):
276 |         return str(param)
277 |     elif isinstance(param, bytes):
278 |         return bytes(param)
279 |     elif isinstance(param, ForeignKey):
280 |         return sql_format(param.key, expected_type)
281 |     elif isinstance(param, Packable):
282 |         return param.pack()
283 |     else:
284 |         raise ValueError(f"Unsupported parameter type: {param!r}")
285 | 
286 | 
287 | class DatabaseConnection(sqlite3.Connection):
288 |     def __init__(self, *args, rollback_on_exception: bool = False, **kwargs):
289 |         super().__init__(*args, **kwargs)
290 |         self.rollback_on_exception: bool = rollback_on_exception
291 | 
292 |     def execute(self, sql: str, *parameters: COLUMN_TYPES) -> sqlite3.Cursor:
293 |         params = [sql_format(p) for p in parameters]
294 |         try:
295 |             return super().execute(sql, params)
296 |         except sqlite3.Error as e:
297 |             raise ValueError(
298 |                 f"Error executing SQL {sql!r} with parameters {params!r}: {e!r}"
299 |             )
300 | 
301 |     def __enter__(self) -> "DatabaseConnection":
302 |         return self
303 | 
304 |     def __exit__(self, exc_type, exc_val, exc_tb):
305 |         if exc_type is None or not self.rollback_on_exception:
306 |             # no exception occurred
307 |             self.commit()
308 |         else:
309 |             # an exception occurred
310 |             self.rollback()
311 | 
312 | 
313 | class Cursor(Generic[M]):
314 |     def __init__(
315 |         self,
316 |         table: "Table[M]",
317 |         sql: str,
318 |         params: Iterable[Union[str, int, float, bytes, Packable]] = (),
319 |     ):
320 |         self.table: Table[M] = table
321 |         self.sql: str = sql
322 |         self.params: List[Union[str, int, float, bytes]] = []
323 |         for i, p in enumerate(params):
324 |             if (
325 |                 isinstance(p, str)
326 |                 or isinstance(p, int)
327 |                 or isinstance(p, float)
328 |                 or isinstance(p, bytes)
329 |             ):
330 |                 self.params.append(p)
331 |             elif isinstance(p, Packable):
332 |                 self.params.append(p.pack())
333 |             else:
334 |                 raise ValueError(
335 |                     f"Unsupported SQL parameter #{i+1}, {p!r}, when running {sql!r}"
336 |                 )
337 |         self._item_iter: Optional[Iterator[M]] = None
338 | 
339 |     def __iter__(self) -> Iterator[M]:
340 |         if self._item_iter is None:
341 |             self._item_iter = self._iter()
342 |         yield from self._item_iter
343 | 
344 |     def _iter(self) -> Iterator[M]:
345 |         with self.table.db:
346 |             cur = self.table.db.con.cursor()
347 |             try:
348 |                 for row in cur.execute(self.sql, self.params):
349 |                     r = self.table.model_type(*row)
350 |                     r.db = self.table.db
351 |                     for field_name, field_type in self.table.model_type.FIELDS.items():
352 |                         if issubclass(field_type, ForeignKey):
353 |                             getattr(r, field_name).table = self.table
354 |                     yield r
355 |             finally:
356 |                 cur.close()
357 | 
358 |     def fetchone(self) -> Optional[M]:
359 |         try:
360 |             return next(iter(self))
361 |         except StopIteration:
362 |             return None
363 | 
364 |     def fetchall(self) -> Iterator[M]:
365 |         yield from iter(self)
366 | 
367 | 
368 | class Table(Generic[M]):
369 |     model_type: Optional[Type[M]] = None
370 | 
371 |     def __init__(self, db: "Database", name: str):
372 |         if self.model_type is None:
373 |             raise TypeError(
374 |                 f"A Table must be instantiated by subclassing it with a model: `Table[ModelType]`"
375 |             )
376 |         self.db: Database = db
377 |         self.model_type: Type[M] = self.model_type
378 |         self.name: str = name
379 |         for field_type in self.model_type.FIELDS.values():
380 |             if isinstance(field_type, ForeignKey):
381 |                 field_type.table = self
382 | 
383 |     def __class_getitem__(cls, model_type: Type[M]) -> Type["Table[M]"]:
384 |         if isinstance(model_type, TypeVar) or isinstance(model_type, str):
385 |             return cls
386 |         return cast(
387 |             Type[Table[M]],
388 |             type(
389 |                 f"{cls.__name__}{model_type.__name__}",
390 |                 (cls,),
391 |                 {"model_type": model_type},
392 |             ),
393 |         )
394 | 
395 |     def __iter__(self) -> Iterator[M]:
396 |         yield from iter(self.select())
397 | 
398 |     def select(
399 |         self,
400 |         distinct: bool = False,
401 |         limit: Optional[int] = None,
402 |         order_by: Optional[str] = None,
403 |         order_direction: str = "ASC",
404 |         **kwargs,
405 |     ) -> Cursor[M]:
406 |         params = []
407 |         where_clauses = []
408 |         for col_name, value in kwargs.items():
409 |             if not isinstance(value, AutoIncrement) or value.initialized:
410 |                 where_clauses.append(f"{col_name}=?")
411 |                 params.append(value)
412 |         if where_clauses:
413 |             clauses = [f"WHERE {' AND '.join(where_clauses)}"]
414 |         else:
415 |             clauses = []
416 |         if order_by is not None:
417 |             clauses.append(f" ORDER BY ? {order_direction}")
418 |             params.append(order_by)
419 |         if limit is not None:
420 |             clauses.append(" LIMIT ?")
421 |             params.append(limit)
422 |         clauses = "".join(clauses)
423 |         if clauses:
424 |             clauses = " " + clauses
425 |         if distinct:
426 |             distinct_clause = " DISTINCT"
427 |         else:
428 |             distinct_clause = ""
429 |         return Cursor(
430 |             self, f"SELECT{distinct_clause} *, rowid from {self.name}{clauses}", params
431 |         )
432 | 
433 |     def __len__(self):
434 |         with self.db:
435 |             cur = self.db.con.cursor()
436 |             try:
437 |                 result = cur.execute(f"SELECT COUNT(*) from {self.name}")
438 |                 return result.fetchone()[0]
439 |             finally:
440 |                 cur.close()
441 | 
442 |     def _finalize_added_row(self, row: M):
443 |         to_update = [key for key, _ in row.uninitialized_auto_increments()]
444 |         if to_update:
445 |             try:
446 |                 obj_in_db = next(iter(self.select(**{"rowid": row.rowid})))
447 |             except StopIteration:
448 |                 raise ValueError(
449 |                     f"Row {row} was expected to be in the database in table {self.name}, but was not"
450 |                 )
451 |             for key in to_update:
452 |                 setattr(row, key, getattr(obj_in_db, key))
453 |         row.db = self.db
454 | 
455 |     def append(self, row: M):
456 |         self.extend((row,))
457 | 
458 |     def extend(self, rows: Iterable[M]):
459 |         with self.db:
460 |             rows = list(rows)
461 |             if not rows:
462 |                 return
463 |             cur = self.db.con.cursor()
464 |             try:
465 |                 # we have to add each row individually so we can set its rowid
466 |                 for row in rows:
467 |                     if row.in_db:
468 |                         raise ValueError(f"Row {row!r} is already in the database!")
469 |                     result = cur.execute(
470 |                         f"INSERT INTO {self.name} ({','.join(self.model_type.FIELDS.keys())}) "
471 |                         f"VALUES ({','.join(['?']*len(self.model_type.FIELDS))})",
472 |                         tuple(
473 |                             sql_format(param, expected_type)
474 |                             for param, expected_type in zip(
475 |                                 row.to_row(), self.model_type.FIELDS.values()
476 |                             )
477 |                         ),
478 |                     )
479 |                     setattr(row, "rowid", RowId(result.lastrowid))
480 |                     self._finalize_added_row(row)
481 |             finally:
482 |                 cur.close()
483 | 
484 |     def update(self, row: M):
485 |         if not row.in_db:
486 |             raise ValueError(f"Row {row!r} is not yet in the database!")
487 |         with self.db:
488 |             set_argument = ",".join(
489 |                 [
490 |                     f"{field_name} = ?"
491 |                     for field_name in self.model_type.FIELDS.keys()
492 |                     if field_name != "rowid"
493 |                 ]
494 |             )
495 |             new_values = tuple(
496 |                 sql_format(param, expected_type)
497 |                 for param, (field_name, expected_type) in zip(
498 |                     row.to_row(), self.model_type.FIELDS.items()
499 |                 )
500 |                 if field_name != "rowid"
501 |             )
502 |             cur = self.db.con.cursor()
503 |             try:
504 |                 cur.execute(
505 |                     f"UPDATE {self.name} SET {set_argument} WHERE rowid=?",
506 |                     new_values + (int(row.rowid),),
507 |                 )
508 |             finally:
509 |                 cur.close()
510 | 
511 | 
512 | class ForeignKey(Generic[M]):
513 |     row_type: Type[M]
514 |     foreign_table_name: str
515 |     foreign_col_name: str
516 |     table: Optional[Table[M]] = None
517 | 
518 |     def __init__(
519 |         self, key: Union[int, str, bytes, float, M], table: Optional[Table[M]] = None
520 |     ):
521 |         self._row: Optional[M] = None
522 |         if table is not None:
523 |             self.table = table
524 |         if isinstance(key, Model):
525 |             if not hasattr(self, "foreign_col_name"):
526 |                 raise ValueError(
527 |                     f"Foreign key {self!r} has not yet been assigned to a table!"
528 |                 )
529 |             elif not isinstance(key, self.row_type):
530 |                 raise ValueError(
531 |                     f"Foreign key {self!r} was expeted to be passed a value of type {self.row_type!r} "
532 |                     f"but was instead passed {key!r}"
533 |                 )
534 |             self.key: Union[int, str, bytes, float] = getattr(
535 |                 key, self.foreign_col_name
536 |             )
537 |         else:
538 |             self.key = key
539 | 
540 |     def __class_getitem__(
541 |         cls,
542 |         arguments: Union[
543 |             TypeVar, Tuple[str, Type[M]], Tuple[str, Type[M], str], Table[M]
544 |         ],
545 |     ) -> "ForeignKey[M]":
546 |         if isinstance(arguments, TypeVar):
547 |             return cls
548 |         elif isinstance(arguments, Table):
549 |             if not hasattr(cls, "foreign_table_name") or not cls.foreign_col_name:
550 |                 raise ValueError(
551 |                     "A table can only be passed to a ForeignKey that already has a `foreign_table_name`"
552 |                 )
553 |             return cast(
554 |                 ForeignKey[M],
555 |                 type(
556 |                     f"{cls.__name__}{arguments.model_type.__name__.capitalize()}"
557 |                     f"{cls.foreign_col_name.replace('_', '').capitalize()}",
558 |                     (cls,),
559 |                     {"table": arguments},
560 |                 ),
561 |             )
562 |         else:
563 |             if (
564 |                 not isinstance(arguments, tuple)
565 |                 or not (2 <= len(arguments) <= 3)
566 |                 or not isinstance(arguments[0], str)
567 |                 or not issubclass(arguments[1], Model)
568 |                 or (len(arguments) == 3 and not isinstance(arguments[2], str))
569 |             ):
570 |                 raise TypeError(
571 |                     f"Invalid ForeignKey arguments: {list(arguments)!r}. Expected either two or three "
572 |                     "arguments: (1) a string for the foreign table name; (2) the `Model` type for that "
573 |                     "table; and, optionally, (3) the foreign column name. If (3) is omitted, the primary "
574 |                     "key for the foreign table is used."
575 |                 )
576 |             if len(arguments) == 3:
577 |                 table_name, model, row_name = arguments
578 |             else:
579 |                 table_name, model = arguments
580 |                 row_name = model.primary_key_name
581 |         return cast(
582 |             ForeignKey[M],
583 |             type(
584 |                 f"{cls.__name__}{model.__name__.capitalize()}{row_name.replace('_', '').capitalize()}",
585 |                 (cls,),
586 |                 {
587 |                     "row_type": model,
588 |                     "foreign_col_name": row_name,
589 |                     "foreign_table_name": table_name,
590 |                 },
591 |             ),
592 |         )
593 | 
594 |     @classmethod
595 |     def key_type(cls) -> Type[Union[int, float, str, bytes, Packable]]:
596 |         foreign_type = cls.row_type.FIELDS[cls.foreign_col_name]
597 |         if hasattr(cls, "column_options"):
598 |             options = {"column_options": cls.column_options}
599 |         else:
600 |             options = {}
601 |         return cast(
602 |             Type[Union[int, float, str, bytes, Packable]],
603 |             type(f"{foreign_type.__name__}ForeignKey", (foreign_type,), options),
604 |         )
605 | 
606 |     @property
607 |     def row(self) -> M:
608 |         if self._row is None:
609 |             if self.table is None:
610 |                 raise ValueError(f"{self.__class__.__name__} must have a `table` set")
611 |             foreign_table = getattr(self.table.db, self.foreign_table_name)
612 |             self._row = next(
613 |                 iter(foreign_table.select(**{self.foreign_col_name: self.key}))
614 |             )
615 |         return self._row
616 | 
617 |     def __getattr__(self, item):
618 |         return getattr(self.row, item)
619 | 
620 |     def __eq__(self, other):
621 |         if isinstance(other, ForeignKey):
622 |             return self.key == other.key
623 |         else:
624 |             return self.row == other
625 | 
626 |     def __ne__(self, other):
627 |         return not (self == other)
628 | 
629 |     def __lt__(self, other):
630 |         if isinstance(other, ForeignKey):
631 |             return self.key < other.key
632 |         else:
633 |             return self.row < other
634 | 
635 |     def __hash__(self):
636 |         return hash(self.key)
637 | 
638 |     def __repr__(self):
639 |         return f"{self.__class__.__name__}(key={self.key!r})"
640 | 
641 | 
642 | class Database(metaclass=StructMeta[Model]):
643 |     def __init__(self, path: str = ":memory:", rollback_on_exception: bool = False):
644 |         self.path: str = path
645 |         self.con = DatabaseConnection(
646 |             self.path, rollback_on_exception=rollback_on_exception
647 |         )
648 |         self.tables: Dict[str, Table] = {}
649 |         if self.FIELDS:
650 |             with self:
651 |                 for table_name, table_type in self.FIELDS.items():
652 |                     setattr(self, table_name, self.create_table(table_name, table_type))
653 | 
654 |     @classmethod
655 |     def validate_fields(cls, fields: OrderedDict[str, Type[FieldType]]):
656 |         for field_name, field_type in fields.items():
657 |             if not issubclass(field_type, Table):
658 |                 raise TypeError(
659 |                     f"Database {cls!r} table `{field_name}` was expected to be of type `Table` but "
660 |                     f"was instead {field_type!r}"
661 |                 )
662 | 
663 |     def __enter__(self: D) -> D:
664 |         # self.con.__enter__()
665 |         return self
666 | 
667 |     def __exit__(self, exc_type, exc_val, exc_tb):
668 |         # self.con.__exit__(exc_type, exc_val, exc_tb)
669 |         self.con.commit()
670 | 
671 |     def create_table(self, table_name: str, table_type: Type[Table[M]]) -> Table[M]:
672 |         columns = []
673 |         table = table_type(self, table_name)
674 |         model_type = table_type.model_type
675 |         for field_name, field_type in model_type.FIELDS.items():
676 |             if issubclass(field_type, RowId):
677 |                 continue
678 |             elif issubclass(field_type, ForeignKey):
679 |                 old_field_type = field_type
680 |                 field_type = field_type.key_type()
681 |                 if hasattr(old_field_type, "column_options"):
682 |                     setattr(field_type, "column_options", old_field_type.column_options)
683 |                 else:
684 |                     setattr(field_type, "column_options", None)
685 |             if issubclass(field_type, int):
686 |                 data_type = "INTEGER"
687 |             elif issubclass(field_type, float):
688 |                 data_type = "REAL"
689 |             elif issubclass(field_type, str):
690 |                 data_type = "TEXT"
691 |             elif issubclass(field_type, bytes) or isinstance(field_type, Packable):
692 |                 data_type = "BLOB"
693 |             else:
694 |                 raise TypeError(
695 |                     f"Column {field_name} is of unsupported type {field_type!r}; it must be one of "
696 |                     f"{COLUMN_TYPES}"
697 |                 )
698 |             if (
699 |                 hasattr(field_type, "column_options")
700 |                 and field_type.column_options is not None
701 |             ):
702 |                 modifiers = field_type.column_options.sql_modifiers()
703 |                 if modifiers:
704 |                     modifiers = f" {modifiers}"
705 |             else:
706 |                 modifiers = ""
707 |             columns.append(f"{field_name} {data_type}{modifiers}")
708 |         column_constraints = ",\n    ".join(columns)
709 |         if len(columns) > 1:
710 |             column_constraints = "\n    " + column_constraints + "\n"
711 |         with self:
712 |             self.con.execute(
713 |                 f"CREATE TABLE IF NOT EXISTS {table.name} ({column_constraints});"
714 |             )
715 |         self.tables[table_name] = table
716 |         return table
717 | 


--------------------------------------------------------------------------------
/fluxture/fluxture.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, ABCMeta, abstractmethod
 2 | from argparse import ArgumentParser, Namespace
 3 | from inspect import isabstract
 4 | from typing import Dict, Tuple, Type
 5 | 
 6 | PLUGINS: Dict[str, Type["Plugin"]] = {}
 7 | COMMANDS: Dict[str, Type["Command"]] = {}
 8 | 
 9 | 
10 | class PluginMeta(ABCMeta):
11 |     def __init__(cls, name, bases, clsdict):
12 |         super().__init__(name, bases, clsdict)
13 |         if not isabstract(cls) and name not in ("Plugin", "Command"):
14 |             if "name" not in clsdict:
15 |                 raise TypeError(f"Fluxture plugin {name} does not define a name")
16 |             elif clsdict["name"] in PLUGINS:
17 |                 raise TypeError(
18 |                     f"Cannot instaitiate class {cls.__name__} because a plugin named {name} already exists,"
19 |                     f" implemented by class {PLUGINS[clsdict['name']]}"
20 |                 )
21 |             PLUGINS[clsdict["name"]] = cls
22 |             if issubclass(cls, Command):
23 |                 if "help" not in clsdict:
24 |                     raise TypeError(
25 |                         f"Fluxture command {name} does not define a help string"
26 |                     )
27 |                 COMMANDS[clsdict["name"]] = cls
28 | 
29 | 
30 | class Plugin(ABC, metaclass=PluginMeta):
31 |     name: str
32 | 
33 | 
34 | class Command(Plugin):
35 |     help: str
36 |     parent_parsers: Tuple[ArgumentParser, ...] = ()
37 | 
38 |     def __init__(self, argument_parser: ArgumentParser):
39 |         self.__init_arguments__(argument_parser)
40 | 
41 |     def __init_arguments__(self, parser: ArgumentParser):
42 |         pass
43 | 
44 |     @abstractmethod
45 |     def run(self, args: Namespace):
46 |         raise NotImplementedError()
47 | 
48 | 
49 | def add_command_subparsers(parser: ArgumentParser):
50 |     subparsers = parser.add_subparsers(
51 |         title="command",
52 |         description="valid fluxture commands",
53 |         help="run `fluxture command --help` for help on a specific command",
54 |     )
55 |     for name, command_type in COMMANDS.items():
56 |         p = subparsers.add_parser(
57 |             name, parents=command_type.parent_parsers, help=command_type.help
58 |         )
59 |         p.set_defaults(func=command_type(p).run)
60 | 


--------------------------------------------------------------------------------
/fluxture/geolocation.py:
--------------------------------------------------------------------------------
  1 | import tarfile
  2 | import urllib.request
  3 | from ipaddress import IPv4Address as PythonIPv4
  4 | from ipaddress import IPv6Address as PythonIPv6
  5 | from pathlib import Path
  6 | from tempfile import NamedTemporaryFile
  7 | from typing import Iterator, Optional, Tuple, Union
  8 | 
  9 | try:
 10 |     from typing import Protocol
 11 | except ImportError:
 12 |     from typing_extensions import Protocol
 13 | 
 14 | import geoip2.database
 15 | import great_circle_calculator.great_circle_calculator as gcc
 16 | from geoip2.errors import AddressNotFoundError
 17 | 
 18 | from .db import Model
 19 | from .serialization import DateTime, IPv6Address
 20 | 
 21 | 
 22 | class Location:
 23 |     lat: float
 24 |     lon: float
 25 | 
 26 |     def path_to(
 27 |         self, destination: "Location", intermediate_points: int = 20
 28 |     ) -> Iterator[Tuple[int, int]]:
 29 |         p1, p2 = (self.lon, self.lat), (destination.lon, destination.lat)
 30 |         yield self.lon, self.lat
 31 |         for i in range(intermediate_points):
 32 |             try:
 33 |                 yield gcc.intermediate_point(
 34 |                     p1, p2, (i + 1) / (intermediate_points + 2)
 35 |                 )
 36 |             except ZeroDivisionError:
 37 |                 # this probably means p1 == p2
 38 |                 yield self.lon, self.lat
 39 |         yield destination.lon, destination.lat
 40 | 
 41 |     def distance_to(self, destination: "Location", unit: str = "meters"):
 42 |         return gcc.distance_between_points(
 43 |             (self.lon, self.lat), (destination.lon, destination.lat), unit=unit
 44 |         )
 45 | 
 46 | 
 47 | class Geolocation(Model, Location):
 48 |     ip: IPv6Address
 49 |     city: str
 50 |     country_code: str
 51 |     continent_code: str
 52 |     lat: float
 53 |     lon: float
 54 |     timestamp: DateTime
 55 | 
 56 |     def __hash__(self):
 57 |         return hash(self.ip)
 58 | 
 59 | 
 60 | class Geolocator(Protocol):
 61 |     def locate(
 62 |         self, ip: Union[IPv6Address, str, PythonIPv4, PythonIPv6]
 63 |     ) -> Geolocation:
 64 |         ...
 65 | 
 66 | 
 67 | class GeoIP2Error(RuntimeError):
 68 |     pass
 69 | 
 70 | 
 71 | def download_maxmind_db(
 72 |     maxmind_license_key: str, city_db_path: Optional[str] = None, overwrite: bool = True
 73 | ) -> str:
 74 |     """
 75 |     Downloads the latest MaxMind GeoLite2 database returning the path to which it was saved.
 76 | 
 77 |     If the path is omitted, the default path is used and returned.
 78 | 
 79 |     """
 80 |     if city_db_path is None:
 81 |         city_db_path = (
 82 |             Path.home() / ".config" / "fluxture" / "geolite2" / "GeoLite2-City.mmdb"
 83 |         )
 84 |     else:
 85 |         city_db_path = Path(city_db_path)
 86 |     if overwrite or not city_db_path.exists():
 87 |         if maxmind_license_key is None:
 88 |             raise GeoIP2Error(
 89 |                 "No MaxMind GeoLite2 database provided; need a `maxmind_license_key` to download it. "
 90 |                 "Sign up for GeoLite2 for free here: https://www.maxmind.com/en/geolite2/signup "
 91 |                 "then, after logging in, generate a license key under the Services menu."
 92 |             )
 93 |         db_dir = city_db_path.parent
 94 |         if not db_dir.exists():
 95 |             db_dir.mkdir(parents=True)
 96 |         tmpfile = NamedTemporaryFile(mode="wb", delete=False)
 97 |         try:
 98 |             with urllib.request.urlopen(
 99 |                 r"https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City&"
100 |                 f"license_key={maxmind_license_key}&suffix=tar.gz"
101 |             ) as response:
102 |                 # We have to write this to a temp file because the gzip decompression requires seekability
103 |                 while True:
104 |                     chunk = response.read(1024**2)
105 |                     if not chunk:
106 |                         break
107 |                     tmpfile.write(chunk)
108 |             tmpfile.close()
109 |             with tarfile.open(tmpfile.name, mode="r:gz") as tar:
110 |                 geolite_dir = None
111 |                 for tarinfo in tar:
112 |                     if tarinfo.isdir():
113 |                         geolite_dir = tarinfo.name
114 |                 if geolite_dir is None:
115 |                     raise GeoIP2Error("Unexpected GeoLite2 database format")
116 |                 tar.extractall(str(city_db_path.parent))
117 |                 latest_dir = db_dir / "GeoLite2-City_latest"
118 |                 latest_dir.unlink(missing_ok=True)
119 |                 latest_dir.symlink_to(db_dir / geolite_dir)
120 |         finally:
121 |             Path(tmpfile.name).unlink(missing_ok=True)
122 |         city_db_path.unlink(missing_ok=True)
123 |         city_db_path.symlink_to(latest_dir / "GeoLite2-City.mmdb")
124 |     return str(city_db_path)
125 | 
126 | 
127 | class GeoIP2Locator:
128 |     def __init__(
129 |         self,
130 |         city_db_path: Optional[str] = None,
131 |         maxmind_license_key: Optional[str] = None,
132 |     ):
133 |         self.city_db_path: Path = download_maxmind_db(
134 |             maxmind_license_key, city_db_path, overwrite=False
135 |         )  # type: ignore
136 |         self._geoip: Optional[geoip2.database.Reader] = None
137 |         self._entries: int = 0
138 | 
139 |     def __enter__(self):
140 |         if self._entries == 0:
141 |             assert self._geoip is None
142 |             self._geoip = geoip2.database.Reader(str(self.city_db_path)).__enter__()
143 |         self._entries += 1
144 |         return self
145 | 
146 |     def __exit__(self, exc_type, exc_val, exc_tb):
147 |         if self._entries == 1:
148 |             assert self._geoip is not None
149 |             self._geoip.__exit__(exc_type, exc_val, exc_tb)
150 |             self._geoip = None
151 |         self._entries = max(0, self._entries - 1)
152 | 
153 |     def locate(
154 |         self, ip: Union[IPv6Address, str, PythonIPv4, PythonIPv6]
155 |     ) -> Geolocation:
156 |         with self:
157 |             ipv6 = IPv6Address(ip)
158 |             city = self._geoip.city(str(ipv6))
159 |             if city.location.latitude is None or city.location.longitude is None:
160 |                 raise AddressNotFoundError(str(ip))
161 |             return Geolocation(
162 |                 ip=ipv6,
163 |                 city=city.city.name,
164 |                 country_code=city.country.iso_code,
165 |                 continent_code=city.continent.code,
166 |                 lat=city.location.latitude,
167 |                 lon=city.location.longitude,
168 |                 timestamp=DateTime(),
169 |             )
170 | 


--------------------------------------------------------------------------------
/fluxture/kml.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from argparse import ArgumentParser, FileType
  3 | from collections import defaultdict
  4 | from math import pi, sin
  5 | from typing import Dict, Iterable, List, Optional, OrderedDict, Set
  6 | 
  7 | from fastkml import IconStyle, LineStyle, Placemark, Style, kml
  8 | from fastkml.geometry import Geometry, LineString, Point
  9 | from shapely.geometry import MultiPoint
 10 | from tqdm import tqdm
 11 | 
 12 | from .blockchain import Miner
 13 | from .crawl_schema import CrawledNode
 14 | from .crawler import CrawlDatabase
 15 | from .fluxture import Command
 16 | from .geolocation import Geolocation, Location
 17 | from .topology import CrawlGraph, ProbabilisticWeightedCrawlGraph
 18 | 
 19 | KML_NS = "{http://www.opengis.net/kml/2.2}"
 20 | EARTH_CIRCUMFERENCE = 40075000.0
 21 | EARTH_DIAMETER = 12742000.0
 22 | 
 23 | 
 24 | class KMLGraphNode(ABC, Location):
 25 |     @abstractmethod
 26 |     def neighbors(self) -> Iterable["KMLGraphNode"]:
 27 |         raise NotImplementedError()
 28 | 
 29 |     @abstractmethod
 30 |     def to_placemark(self, style: Optional[Style] = None) -> Placemark:
 31 |         raise NotImplementedError()
 32 | 
 33 |     @abstractmethod
 34 |     def description(self) -> str:
 35 |         raise NotImplementedError()
 36 | 
 37 |     @abstractmethod
 38 |     def uid(self) -> str:
 39 |         raise NotImplementedError()
 40 | 
 41 |     def __eq__(self, other):
 42 |         return isinstance(other, KMLGraphNode) and self.uid() == other.uid()
 43 | 
 44 |     def __ne__(self, other):
 45 |         return not (self == other)
 46 | 
 47 |     def __hash__(self):
 48 |         return hash(self.uid())
 49 | 
 50 |     def __str__(self):
 51 |         return self.uid()
 52 | 
 53 | 
 54 | class KMLGeolocation(KMLGraphNode):
 55 |     def __init__(
 56 |         self, location: Geolocation, db: CrawlDatabase, is_miner: bool = False
 57 |     ):
 58 |         self.location: Geolocation = location
 59 |         self.db: CrawlDatabase = db
 60 |         self.lat = location.lat
 61 |         self.lon = location.lon
 62 |         self.is_miner: bool = is_miner
 63 | 
 64 |     def __eq__(self, other):
 65 |         return (
 66 |             isinstance(other, KMLGeolocation) and other.location.ip == self.location.ip
 67 |         ) or super().__eq__(other)
 68 | 
 69 |     def uid(self) -> str:
 70 |         return str(self.location.ip)
 71 | 
 72 |     def neighbors(self) -> Iterable["KMLGeolocation"]:
 73 |         possible_nodes_by_port: Dict[int, CrawledNode] = {}
 74 |         for possible_node in self.db.nodes.select(ip=self.location.ip):
 75 |             port = possible_node.port
 76 |             if port in possible_nodes_by_port:
 77 |                 # choose the version that was crawled most recently
 78 |                 if (
 79 |                     possible_nodes_by_port[port].last_crawled()
 80 |                     >= possible_node.last_crawled()
 81 |                 ):
 82 |                     continue
 83 |             possible_nodes_by_port[port] = possible_node
 84 |         locations: Dict[Geolocation, Set[CrawledNode]] = defaultdict(set)
 85 |         for node in possible_nodes_by_port.values():
 86 |             for neighbor in node.get_latest_edges():
 87 |                 neighbor_location = self.db.locations.select(
 88 |                     ip=neighbor.ip,
 89 |                     limit=1,
 90 |                     order_by="timestamp",
 91 |                     order_direction="DESC",
 92 |                 ).fetchone()
 93 |                 if neighbor_location is None:
 94 |                     continue
 95 |                 locations[neighbor_location].add(neighbor)
 96 |         return (
 97 |             KMLGeolocation(
 98 |                 loc, self.db, is_miner=any(n.is_miner == Miner.MINER for n in nodes)
 99 |             )
100 |             for loc, nodes in locations.items()
101 |             if loc is not None
102 |         )
103 | 
104 |     @property
105 |     def ip_str(self) -> str:
106 |         if self.location.ip.ipv4_mapped is not None:
107 |             return str(self.location.ip.ipv4_mapped)
108 |         else:
109 |             return str(self.location.ip)
110 | 
111 |     def description(self) -> str:
112 |         if self.is_miner:
113 |             miner_str = f"Likely a Miner "
114 |         else:
115 |             miner_str = ""
116 |         return (
117 |             f"{miner_str}{self.ip_str}: {self.location.city} ({self.lat}°N, {self.lon}°E) @ "
118 |             f"{self.location.timestamp!s}"
119 |         )
120 | 
121 |     def to_placemark(self, style: Optional[Style] = None) -> kml.Placemark:
122 |         if style is None:
123 |             style = Style(KML_NS, styles=[IconStyle(KML_NS, id="ip")])
124 |         p = kml.Placemark(KML_NS, self.uid(), self.ip_str, self.description())
125 |         p.append_style(style)
126 |         p.geometry = Point(self.lon, self.lat)
127 |         return p
128 | 
129 | 
130 | class ScaledKMLGraphNode(KMLGraphNode):
131 |     def __init__(self, wrapped: KMLGraphNode, scale: float):
132 |         self.node: KMLGraphNode = wrapped
133 |         self.scale: float = scale
134 |         self.lat = wrapped.lat
135 |         self.lon = wrapped.lon
136 | 
137 |     def neighbors(self) -> Iterable["KMLGraphNode"]:
138 |         return self.node.neighbors()
139 | 
140 |     def to_placemark(self, style: Optional[Style] = None) -> Placemark:
141 |         scaled_style = Style(
142 |             KML_NS, styles=[IconStyle(KML_NS, id="ip", scale=self.scale)]
143 |         )
144 |         if style is not None:
145 |             scaled_style.append_style(style)
146 |         p = self.node.to_placemark(style=scaled_style)
147 |         return p
148 | 
149 |     def description(self) -> str:
150 |         return self.node.description()
151 | 
152 |     def uid(self) -> str:
153 |         return self.node.uid()
154 | 
155 | 
156 | class KMLGraphNodeCollection(KMLGraphNode):
157 |     def __init__(
158 |         self,
159 |         name: str,
160 |         subnodes: Iterable[KMLGraphNode] = (),
161 |         neighbors: Iterable[KMLGraphNode] = (),
162 |     ):
163 |         self.name: str = name
164 |         self._neighbors: List[KMLGraphNode] = list(neighbors)
165 |         self._subnodes: List[KMLGraphNode] = []
166 |         self.subnodes = subnodes
167 | 
168 |     def set_neighbors(self, neighbors: Iterable[KMLGraphNode]):
169 |         self._neighbors = list(neighbors)
170 |         assert self not in self._neighbors
171 | 
172 |     def uid(self) -> str:
173 |         return self.name
174 | 
175 |     @property
176 |     def subnodes(self) -> List[KMLGraphNode]:
177 |         return self._subnodes
178 | 
179 |     @subnodes.setter
180 |     def subnodes(self, nodes: Iterable[KMLGraphNode]):
181 |         self._subnodes = list(nodes)
182 |         if self._subnodes:
183 |             points = MultiPoint([(node.lon, node.lat) for node in self.subnodes])
184 |             centroid = points.convex_hull.centroid
185 |             self.lon = centroid.x
186 |             self.lat = centroid.y
187 | 
188 |     def neighbors(self) -> Iterable["KMLGraphNode"]:
189 |         return self._neighbors
190 | 
191 |     def description(self) -> str:
192 |         return "\n".join(n.description() for n in self.subnodes)
193 | 
194 |     def to_placemark(self, style: Optional[Style] = None) -> kml.Placemark:
195 |         if style is None:
196 |             style = Style(KML_NS, styles=[IconStyle(KML_NS, id="ip")])
197 |         p = kml.Placemark(KML_NS, self.uid(), self.name, self.description())
198 |         p.append_style(style)
199 |         p.geometry = Point(self.lon, self.lat)
200 |         return p
201 | 
202 | 
203 | def to_kml(
204 |     locations: Iterable[KMLGraphNode],
205 |     doc_id: str,
206 |     doc_name: str,
207 |     doc_description: str,
208 |     max_altitude: float = EARTH_DIAMETER / 4.0,
209 | ) -> kml.KML:
210 |     k = kml.KML()
211 |     d = kml.Document(KML_NS, doc_id, doc_name, doc_description)
212 |     k.append(d)
213 |     f = kml.Folder(KML_NS, "ips", "IPs", "Geolocalized IPs")
214 |     d.append(f)
215 |     edge_folder = kml.Folder(
216 |         KML_NS, "topology", "Topology", "The network topology discovered in the crawl"
217 |     )
218 |     d.append(edge_folder)
219 |     edge_color = (0, 255, 0)
220 |     edge_hex_color = "7f%02x%02x%02x" % tuple(reversed(edge_color))
221 |     edge_style = Style(
222 |         KML_NS, styles=[LineStyle(KML_NS, id="edge", color=edge_hex_color, width=3)]
223 |     )
224 |     for geolocation in tqdm(
225 |         locations, leave=False, desc="Generating KML", unit=" locations"
226 |     ):
227 |         f.append(geolocation.to_placemark())
228 |         for neighbor in geolocation.neighbors():
229 |             if neighbor is None or geolocation == neighbor:
230 |                 continue
231 |             p = kml.Placemark(
232 |                 KML_NS,
233 |                 f"{geolocation!s}->{neighbor!s}",
234 |                 f"{geolocation!s}->{neighbor!s}",
235 |                 f"Edge between {geolocation!s} and {neighbor!s}",
236 |             )
237 |             p.append_style(edge_style)
238 |             num_segments = 20
239 |             distance = geolocation.distance_to(neighbor)
240 |             peak_altitude = max_altitude * distance / (EARTH_CIRCUMFERENCE / 2.0)
241 |             p.geometry = Geometry(
242 |                 geometry=LineString(
243 |                     [
244 |                         (lon, lat, sin(i / (num_segments - 1) * pi) * peak_altitude)
245 |                         for i, (lon, lat) in enumerate(
246 |                             geolocation.path_to(
247 |                                 neighbor, intermediate_points=num_segments - 2
248 |                             )
249 |                         )
250 |                     ]
251 |                 ),
252 |                 tessellate=False,
253 |                 extrude=False,
254 |                 altitude_mode="relativeToGround",
255 |             )
256 |             edge_folder.append(p)
257 |     return k
258 | 
259 | 
260 | def calculate_rank(
261 |     loc: KMLGraphNode, pr: OrderedDict[CrawledNode, float], db: CrawlDatabase
262 | ) -> float:
263 |     if isinstance(loc, KMLGeolocation):
264 |         nodes = db.nodes.select(ip=loc.location.ip)
265 |     elif isinstance(loc, KMLGraphNodeCollection):
266 |         return sum(calculate_rank(subnode, pr, db) for subnode in loc.subnodes)
267 |     else:
268 |         raise NotImplementedError(f"Add support for locations of type {type(loc)}")
269 |     return sum(pr[node] for node in nodes if node in pr)
270 | 
271 | 
272 | class ToKML(Command):
273 |     name = "kml"
274 |     help = "export a KML file visualizing the crawled data"
275 | 
276 |     def __init_arguments__(self, parser: ArgumentParser):
277 |         parser.add_argument(
278 |             "CRAWL_DB_FILE", type=str, help="path to the crawl database"
279 |         )
280 |         parser.add_argument(
281 |             "KML_FILE",
282 |             type=FileType("w"),
283 |             help="path to which to save the KML, or '-' for STDOUT (the default)",
284 |         )
285 |         parser.add_argument(
286 |             "--no-pagerank",
287 |             action="store_true",
288 |             help="do not scale the placemarks by their pagerank in the network topology",
289 |         )
290 |         parser.add_argument(
291 |             "--group-by",
292 |             "-g",
293 |             default="ip",
294 |             choices=["ip", "city", "country", "continent"],
295 |             help="grouping of pins (default: %(default)s)",
296 |         )
297 | 
298 |     def run(self, args):
299 |         with CrawlDatabase(args.CRAWL_DB_FILE) as db:
300 |             locations = (KMLGeolocation(loc, db) for loc in db.locations)
301 |             if args.group_by != "ip":
302 |                 if args.group_by == "city":
303 | 
304 |                     def grouper(loc: KMLGeolocation) -> str:
305 |                         return loc.location.city
306 | 
307 |                 elif args.group_by == "country":
308 | 
309 |                     def grouper(loc: KMLGeolocation) -> str:
310 |                         return loc.location.country_code
311 | 
312 |                 elif args.group_by == "continent":
313 | 
314 |                     def grouper(loc: KMLGeolocation) -> str:
315 |                         return loc.location.continent_code
316 | 
317 |                 else:
318 |                     raise NotImplementedError(
319 |                         f"TODO: Implement support for --group-by={args.group_by}"
320 |                     )
321 |                 groups: Dict[str, List[KMLGeolocation]] = defaultdict(list)
322 |                 for loc in locations:
323 |                     groups[grouper(loc)].append(loc)
324 |                 collections: Dict[str, KMLGraphNodeCollection] = {
325 |                     group: KMLGraphNodeCollection(group, subnodes=subnodes)
326 |                     for group, subnodes in groups.items()
327 |                 }
328 |                 for group, c in collections.items():
329 |                     all_neighbors = set()
330 |                     for member in groups[group]:
331 |                         all_neighbors |= {
332 |                             grouper(neighbor) for neighbor in member.neighbors()
333 |                         }
334 |                     all_neighbors -= {group}
335 |                     c.set_neighbors(
336 |                         (
337 |                             collections[neighbor_group]
338 |                             for neighbor_group in all_neighbors
339 |                         )
340 |                     )
341 |                 locations = collections.values()
342 |             if not args.no_pagerank:
343 |                 graph = CrawlGraph.load(db)
344 |                 graph.prune()
345 |                 pr = ProbabilisticWeightedCrawlGraph(graph).pagerank()
346 |                 max_rank = max(pr.values())
347 |                 if max_rank == 0.0:
348 |                     max_rank = 1.0
349 |                 new_locations = []
350 |                 for loc in locations:
351 |                     scale = 1.0 + calculate_rank(loc, pr, db) / max_rank * 4.0
352 |                     new_locations.append(ScaledKMLGraphNode(loc, scale))
353 |                 locations = new_locations
354 |             args.KML_FILE.write(
355 |                 to_kml(
356 |                     locations=locations,
357 |                     doc_id=f"{args.CRAWL_DB_FILE}_IPs",
358 |                     doc_name=f"{args.CRAWL_DB_FILE} IPs",
359 |                     doc_description=f"Geolocalized IPs from crawl {args.CRAWL_DB_FILE}",
360 |                 ).to_string(prettyprint=True)
361 |             )
362 | 


--------------------------------------------------------------------------------
/fluxture/messaging.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from abc import ABC, abstractmethod
 3 | from typing import Optional, TypeVar
 4 | 
 5 | from fluxture.structures import PackableStruct
 6 | 
 7 | from .serialization import ByteOrder
 8 | 
 9 | M = TypeVar("M", bound="Message")
10 | B = TypeVar("B", bound="BinaryMessage")
11 | 
12 | 
13 | class Message(ABC):
14 |     @abstractmethod
15 |     def serialize(self) -> bytes:
16 |         raise NotImplementedError()
17 | 
18 |     @classmethod
19 |     @abstractmethod
20 |     def deserialize(cls: M, data: bytes) -> M:
21 |         raise NotImplementedError()
22 | 
23 |     @classmethod
24 |     @abstractmethod
25 |     async def next_message(cls: M, reader: asyncio.StreamReader) -> Optional[M]:
26 |         raise NotImplementedError()
27 | 
28 | 
29 | class BinaryMessage(PackableStruct, Message):
30 |     non_serialized = ("byte_order",)
31 |     byte_order: ByteOrder = ByteOrder.NETWORK
32 | 
33 |     def serialize(self) -> bytes:
34 |         return self.pack(self.byte_order)
35 | 
36 |     @classmethod
37 |     def deserialize(cls: B, data: bytes) -> B:
38 |         return cls.unpack(data, cls.byte_order)
39 | 
40 |     @classmethod
41 |     async def next_message(cls: B, reader: asyncio.StreamReader) -> Optional[B]:
42 |         return cls.read(reader, cls.byte_order)
43 | 


--------------------------------------------------------------------------------
/fluxture/serialization.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import datetime
  3 | import ipaddress
  4 | import struct
  5 | import time
  6 | from abc import ABC, ABCMeta, abstractmethod
  7 | from collections import OrderedDict
  8 | from enum import Enum as PythonEnum
  9 | from inspect import isabstract
 10 | from typing import Dict, Generic, Iterator
 11 | from typing import OrderedDict as OrderedDictType
 12 | from typing import Tuple, Type, TypeVar, Union
 13 | 
 14 | try:
 15 |     from typing import Protocol, runtime_checkable
 16 | except ImportError:
 17 |     from typing_extensions import Protocol, runtime_checkable
 18 | 
 19 | 
 20 | P = TypeVar("P")
 21 | 
 22 | 
 23 | class ByteOrder(PythonEnum):
 24 |     NATIVE = "@"
 25 |     LITTLE = "<"
 26 |     BIG = ">"
 27 |     NETWORK = "!"
 28 | 
 29 | 
 30 | class UnpackError(RuntimeError):
 31 |     pass
 32 | 
 33 | 
 34 | @runtime_checkable
 35 | class Packable(Protocol):
 36 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
 37 |         ...
 38 | 
 39 |     @classmethod
 40 |     def unpack(
 41 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
 42 |     ) -> P:
 43 |         ...
 44 | 
 45 |     @classmethod
 46 |     def unpack_partial(
 47 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
 48 |     ) -> Tuple[P, bytes]:
 49 |         ...
 50 | 
 51 |     @classmethod
 52 |     async def read(
 53 |         cls: Type[P],
 54 |         reader: asyncio.StreamReader,
 55 |         byte_order: ByteOrder = ByteOrder.NETWORK,
 56 |     ) -> P:
 57 |         ...
 58 | 
 59 | 
 60 | class BigEndian:
 61 |     def __class_getitem__(cls, item: Type[Packable]):
 62 |         def big_endian_pack(self, byte_order: ByteOrder = ByteOrder.BIG) -> bytes:
 63 |             return item.pack(self, byte_order=ByteOrder.BIG)
 64 | 
 65 |         def big_endian_unpack(data: bytes, byte_order: ByteOrder = ByteOrder.BIG):
 66 |             return item.unpack(data, byte_order=ByteOrder.BIG)
 67 | 
 68 |         def big_endian_unpack_partial(
 69 |             data: bytes, byte_order: ByteOrder = ByteOrder.BIG
 70 |         ):
 71 |             return item.unpack_partial(data, byte_order=ByteOrder.BIG)
 72 | 
 73 |         async def big_endian_read(
 74 |             reader: asyncio.StreamReader, byte_order: ByteOrder = ByteOrder.BIG
 75 |         ):
 76 |             return item.read(reader, byte_order=ByteOrder.BIG)
 77 | 
 78 |         return type(
 79 |             f"{item.__name__}BigEndian",
 80 |             (item,),
 81 |             {
 82 |                 "pack": big_endian_pack,
 83 |                 "unpack": big_endian_unpack,
 84 |                 "unpack_partial": big_endian_unpack_partial,
 85 |                 "read": big_endian_read,
 86 |             },
 87 |         )
 88 | 
 89 | 
 90 | class LittleEndian:
 91 |     def __class_getitem__(cls, item: Type[Packable]):
 92 |         def little_endian_pack(self, byte_order: ByteOrder = ByteOrder.LITTLE) -> bytes:
 93 |             return item.pack(self, byte_order=ByteOrder.LITTLE)
 94 | 
 95 |         def little_endian_unpack(data: bytes, byte_order: ByteOrder = ByteOrder.LITTLE):
 96 |             return item.unpack(data, byte_order=ByteOrder.LITTLE)
 97 | 
 98 |         def little_endian_unpack_partial(
 99 |             data: bytes, byte_order: ByteOrder = ByteOrder.LITTLE
100 |         ):
101 |             return item.unpack_partial(data, byte_order=ByteOrder.LITTLE)
102 | 
103 |         async def little_endian_read(
104 |             reader: asyncio.StreamReader, byte_order: ByteOrder = ByteOrder.LITTLE
105 |         ):
106 |             return item.read(reader, byte_order=ByteOrder.LITTLE)
107 | 
108 |         return type(
109 |             f"{item.__name__}LittleEndian",
110 |             (item,),
111 |             {
112 |                 "pack": little_endian_pack,
113 |                 "unpack": little_endian_unpack,
114 |                 "unpack_partial": little_endian_unpack_partial,
115 |                 "read": little_endian_read,
116 |             },
117 |         )
118 | 
119 | 
120 | class AbstractPackable(ABC):
121 |     @abstractmethod
122 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
123 |         raise NotImplementedError()
124 | 
125 |     @classmethod
126 |     @abstractmethod
127 |     def unpack_partial(
128 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
129 |     ) -> Tuple[P, bytes]:
130 |         raise NotImplementedError()
131 | 
132 |     @classmethod
133 |     def unpack(
134 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
135 |     ) -> P:
136 |         ret, remaining = cls.unpack_partial(data, byte_order)
137 |         if remaining:
138 |             raise ValueError(f"Unexpected trailing bytes: {remaining!r}")
139 |         return ret
140 | 
141 |     @classmethod
142 |     @abstractmethod
143 |     async def read(
144 |         cls: Type[P],
145 |         reader: asyncio.StreamReader,
146 |         byte_order: ByteOrder = ByteOrder.NETWORK,
147 |     ) -> P:
148 |         raise NotImplementedError()
149 | 
150 | 
151 | @runtime_checkable
152 | class FixedSize(Protocol):
153 |     num_bytes: int
154 | 
155 | 
156 | E = TypeVar("E")
157 | 
158 | 
159 | class IntEnumMeta(ABCMeta, Generic[E]):
160 |     __members__: OrderedDictType[str, E]
161 |     min_value: int
162 |     max_value: int
163 | 
164 |     def __init__(cls, name, bases, clsdict):
165 |         super().__init__(name, bases, clsdict)
166 |         cls.__members__ = OrderedDict()
167 |         if (
168 |             not isabstract(cls)
169 |             and name != "IntEnum"
170 |             and name != "IntFlag"
171 |             and name != "AbstractIntEnum"
172 |         ):
173 |             values: Dict[int, str] = {}
174 |             for v_name, value in clsdict.items():
175 |                 if v_name.startswith("_") or v_name == "DEFAULT":
176 |                     continue
177 |                 elif not isinstance(value, int):
178 |                     raise TypeError(
179 |                         f"{name}.{v_name} must be of type `int`, not {type(value)}"
180 |                     )
181 |                 elif value in values:
182 |                     raise TypeError(
183 |                         f"{name}.{v_name} has the same value as {name}.{values[value]}"
184 |                     )
185 |                 if not values:
186 |                     # this is the first value
187 |                     cls.min_value = value
188 |                     cls.max_value = value
189 |                 else:
190 |                     cls.min_value = min(cls.min_value, value)
191 |                     cls.max_value = max(cls.max_value, value)
192 |                 values[value] = v_name
193 |                 int_enum = cls(value, name=v_name)
194 |                 cls.__members__[v_name] = int_enum
195 |                 setattr(int_enum, "name", v_name)
196 |                 setattr(cls, v_name, int_enum)
197 | 
198 |             if "DEFAULT" in clsdict:
199 |                 if clsdict["DEFAULT"] not in cls.__members__:
200 |                     raise TypeError(
201 |                         f"Invalid default value {name}.DEFAULT = {clsdict['DEFAULT']!r}"
202 |                     )
203 |                 setattr(cls, "DEFAULT", cls.__members__[clsdict["DEFAULT"]])
204 |             else:
205 |                 setattr(cls, "DEFAULT", next(iter(cls.__members__.values())))
206 |             # call get_type() to ensure that all of the values are within range
207 |             getattr(cls, "DEFAULT").get_type()
208 | 
209 |     def __iter__(cls) -> Iterator[E]:
210 |         return iter(cls.__members__.values())
211 | 
212 |     def get(cls, name: str) -> E:
213 |         return cls.__members__[name]
214 | 
215 | 
216 | class AbstractIntEnum(int, AbstractPackable, Generic[E], metaclass=IntEnumMeta[E]):
217 |     name: str
218 |     DEFAULT: E
219 | 
220 |     def value(self) -> int:
221 |         return int(self)
222 | 
223 |     def __str__(self):
224 |         return f"{self.__class__.__name__}.{self.name}"
225 | 
226 |     def __repr__(self):
227 |         return f"<{self!s}: {self.value()}>"
228 | 
229 |     @classmethod
230 |     def get_type(cls: IntEnumMeta[E]) -> "Type[SizedInteger]":
231 |         for int_type in (UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64):
232 |             if (
233 |                 cls.min_value >= int_type.MIN_VALUE
234 |                 and cls.max_value <= int_type.MAX_VALUE
235 |             ):
236 |                 return int_type
237 |         raise TypeError(
238 |             "There is no SizedInteger type that can represent enum "
239 |             f"{cls.__name__} on the range [{cls.min_value}, {cls.max_value}]"
240 |         )
241 | 
242 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
243 |         int_type = self.get_type()(self.value())
244 |         return int_type.pack(byte_order)
245 | 
246 |     @classmethod
247 |     def unpack_partial(
248 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
249 |     ) -> Tuple[P, bytes]:
250 |         int_type, remainder = cls.get_type().unpack_partial(data, byte_order)
251 |         return cls(int(int_type)), remainder
252 | 
253 |     @classmethod
254 |     async def read(
255 |         cls: Type[P],
256 |         reader: asyncio.StreamReader,
257 |         byte_order: ByteOrder = ByteOrder.NETWORK,
258 |     ) -> P:
259 |         int_type = cls.get_type().read(reader, byte_order)
260 |         return cls(int_type)
261 | 
262 |     def __or__(self, other) -> E:
263 |         return self.__class__(int(self) | int(other))
264 | 
265 |     __ror__ = __or__
266 | 
267 |     def __and__(self, other) -> E:
268 |         return self.__class__(int(self) & int(other))
269 | 
270 |     __rand__ = __and__
271 | 
272 |     def __neg__(self) -> E:
273 |         return self.__class__(
274 |             int(self) ^ ((1 << self.__class__.max_value.bit_length()) - 1)
275 |         )
276 | 
277 |     def __xor__(self, other) -> E:
278 |         return self.__class__(int(self) ^ int(other))
279 | 
280 |     __rxor__ = __xor__
281 | 
282 | 
283 | class IntFlag(AbstractIntEnum["IntFlag"]):
284 |     def __new__(cls, *args, **kwargs):
285 |         if "name" in kwargs:
286 |             name = kwargs["name"]
287 |             del kwargs["name"]
288 |             if args:
289 |                 value = args[0]
290 |                 args = args[1:]
291 |                 result = int.__new__(cls, value, *args, **kwargs)
292 |             elif name in cls.__members__:
293 |                 return cls.__members__[name]
294 |             else:
295 |                 raise ValueError(
296 |                     f"Invalid enum name {name!r}; possibilities are {list(cls.__members__.keys())!r}"
297 |                 )
298 |         elif not args:
299 |             return cls.DEFAULT
300 |         else:
301 |             result = int.__new__(cls, *args, **kwargs)
302 |         setattr(result, "name", None)
303 |         return result
304 | 
305 |     @property
306 |     def names(self) -> Iterator[str]:
307 |         if self.name is not None:
308 |             yield self.name
309 |         else:
310 |             yielded = False
311 |             zero_value = ""
312 |             int_value = int(self)
313 |             for member_name, value in self.__class__.__members__.items():
314 |                 if int(value) & int_value == int(value):
315 |                     yield member_name
316 |                     yielded = True
317 |                 elif value == 0:
318 |                     zero_value = member_name
319 |             if not yielded and zero_value:
320 |                 yield zero_value
321 | 
322 |     def __str__(self):
323 |         return f"{self.__class__.__name__}.{'|'.join(self.names)}"
324 | 
325 | 
326 | class IntEnum(AbstractIntEnum["IntEnum"]):
327 |     def __new__(cls, *args, **kwargs):
328 |         if "name" in kwargs:
329 |             name = kwargs["name"]
330 |             del kwargs["name"]
331 |             if args:
332 |                 value = args[0]
333 |                 args = args[1:]
334 |             elif name in cls.__members__:
335 |                 return cls.__members__[name]
336 |             else:
337 |                 raise ValueError(
338 |                     f"Invalid enum name {name!r}; possibilities are {list(cls.__members__.keys())!r}"
339 |                 )
340 |         elif not args:
341 |             return cls.DEFAULT
342 |         else:
343 |             for member_name, value in cls.__members__.items():
344 |                 if value == args[0]:
345 |                     return value
346 |             raise ValueError(
347 |                 f'Invalid enum value "{args[0]}"; possibilities are '
348 |                 f"{list(cls.__members__.values())!r}"
349 |             )
350 |         result = int.__new__(cls, value, *args, **kwargs)
351 |         setattr(result, "name", name)
352 |         return result
353 | 
354 | 
355 | class IPv6Address(ipaddress.IPv6Address, AbstractPackable):
356 |     num_bytes: int = 16
357 | 
358 |     def __init__(
359 |         self,
360 |         address: Union[str, bytes, int, ipaddress.IPv6Address, ipaddress.IPv4Address],
361 |     ):
362 |         if (
363 |             isinstance(address, str)
364 |             or isinstance(address, bytes)
365 |             or isinstance(address, int)
366 |         ):
367 |             address = ipaddress.ip_address(address)
368 |         if isinstance(address, ipaddress.IPv4Address):
369 |             # convert ip4 to rfc 3056 IPv6 6to4 address
370 |             # http://tools.ietf.org/html/rfc3056#section-2
371 |             prefix6to4 = int(ipaddress.IPv6Address("2002::"))
372 |             ipv4 = address
373 |             address = ipaddress.IPv6Address(prefix6to4 | (int(ipv4) << 80))
374 |             assert address.sixtofour == ipv4
375 |         super().__init__(address.packed)
376 | 
377 |     def pack(self, byte_order: ByteOrder = ByteOrder.BIG) -> bytes:
378 |         if byte_order == ByteOrder.BIG:
379 |             return self.packed
380 |         else:
381 |             return bytes(reversed(self.packed))
382 | 
383 |     @classmethod
384 |     def unpack_partial(
385 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
386 |     ) -> Tuple[P, bytes]:
387 |         if byte_order == ByteOrder.BIG:
388 |             return cls(data[:16]), data[16:]
389 |         else:
390 |             return cls(bytes(reversed(data[:16]))), data[16:]
391 | 
392 |     @classmethod
393 |     async def read(
394 |         cls: Type[P],
395 |         reader: asyncio.StreamReader,
396 |         byte_order: ByteOrder = ByteOrder.NETWORK,
397 |     ) -> P:
398 |         return cls.unpack(
399 |             await reader.readexactly(cls.num_bytes), byte_order=byte_order
400 |         )
401 | 
402 |     def __str__(self):
403 |         if self.sixtofour is not None:
404 |             return str(self.sixtofour)
405 |         elif self.ipv4_mapped is not None:
406 |             return str(self.ipv4_mapped)
407 |         else:
408 |             return super().__str__()
409 | 
410 | 
411 | class SizeMeta(type):
412 |     num_bytes_is_defined: bool = False
413 |     dependent_type_is_defined: bool = False
414 | 
415 |     @property
416 |     def num_bytes(cls) -> int:
417 |         if not cls.num_bytes_is_defined:
418 |             raise TypeError(
419 |                 f"{cls.__name__} must be subscripted with its size when used in a Struct! "
420 |                 f"(E.g., {cls.__name__}[1024] will specify that it is 1024 bytes.)"
421 |             )
422 |         return cls._num_bytes
423 | 
424 |     @property
425 |     def size_field_name(cls) -> str:
426 |         if not cls.dependent_type_is_defined:
427 |             raise TypeError(
428 |                 f"{cls.__name__} must be subscripted with the name of its size field when used in a Struct!"
429 |                 f' (E.g., {cls.__name__}["length"] will specify that its length is specified by the '
430 |                 "`length` field.)"
431 |             )
432 |         return cls._size_field_name
433 | 
434 |     def __getitem__(self, item):
435 |         if isinstance(item, int):
436 |             if item < 0:
437 |                 raise ValueError(f"Fixed size {item} must be non-negative")
438 |             typename = f"{self.__name__}{item}"
439 |             return type(
440 |                 typename, (self,), {"_num_bytes": item, "num_bytes_is_defined": True}
441 |             )
442 |         elif isinstance(item, str):
443 |             typename = f"{self.__name__}{item}"
444 |             return type(
445 |                 typename,
446 |                 (self,),
447 |                 {"_size_field_name": item, "dependent_type_is_defined": True},
448 |             )
449 |         else:
450 |             raise KeyError(item)
451 | 
452 | 
453 | class Sized(metaclass=SizeMeta):
454 |     num_bytes_is_defined: bool = False
455 |     dependent_type_is_defined: bool = False
456 | 
457 |     @property
458 |     def num_bytes(self) -> int:
459 |         if self.num_bytes_is_defined:
460 |             return super().num_bytes
461 |         elif self.dependent_type_is_defined:
462 |             raise NotImplementedError()
463 |         else:
464 |             raise ValueError(f"{self} does not have its size set!")
465 | 
466 |     @property
467 |     def has_size(self) -> bool:
468 |         return self.num_bytes_is_defined or self.dependent_type_is_defined
469 | 
470 | 
471 | class SizedByteArray(bytes, Sized):
472 |     @property
473 |     def num_bytes(self) -> int:
474 |         if not self.has_size:
475 |             return len(self)
476 |         else:
477 |             return super().num_bytes
478 | 
479 |     def __new__(cls, value: bytes, pad: bool = True):
480 |         if cls.num_bytes_is_defined and cls.num_bytes < len(value):
481 |             raise ValueError(
482 |                 f"{cls.__name__} can hold at most {cls.num_bytes} bytes, but {value!r} is longer!"
483 |             )
484 |         elif cls.num_bytes_is_defined and cls.num_bytes > len(value):
485 |             value = value + b"\0" * (cls.num_bytes - len(value))
486 |         return bytes.__new__(cls, value)
487 | 
488 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
489 |         return self
490 | 
491 |     @classmethod
492 |     def unpack(
493 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
494 |     ) -> P:
495 |         return cls(data)
496 | 
497 |     @classmethod
498 |     def unpack_partial(
499 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
500 |     ) -> Tuple[P, bytes]:
501 |         return cls(data[: cls.num_bytes]), data[cls.num_bytes :]
502 | 
503 |     @classmethod
504 |     async def read(
505 |         cls: Type[P],
506 |         reader: asyncio.StreamReader,
507 |         byte_order: ByteOrder = ByteOrder.NETWORK,
508 |     ) -> P:
509 |         data = await reader.read(cls.num_bytes)
510 |         return cls.unpack(data, byte_order)
511 | 
512 | 
513 | class SizedIntegerMeta(ABCMeta):
514 |     FORMAT: str
515 |     BITS: int
516 |     BYTES: int
517 |     SIGNED: bool
518 |     MAX_VALUE: int
519 |     MIN_VALUE: int
520 | 
521 |     def __init__(cls, name, bases, clsdict):
522 |         if (
523 |             name != "SizedInteger"
524 |             and "FORMAT" not in clsdict
525 |             and (not isinstance(cls.FORMAT, str) or not cls.FORMAT)
526 |         ):
527 |             raise ValueError(
528 |                 f"{name} subclasses `SizedInteger` but does not define a `FORMAT` class member"
529 |             )
530 |         super().__init__(name, bases, clsdict)
531 |         if name != "SizedInteger":
532 |             setattr(
533 |                 cls, "BYTES", struct.calcsize(f"{ByteOrder.NETWORK.value}{cls.FORMAT}")
534 |             )
535 |             setattr(cls, "BITS", cls.BYTES * 8)
536 |             setattr(cls, "SIGNED", cls.FORMAT.islower())
537 |             setattr(cls, "MAX_VALUE", 2 ** (cls.BITS - [0, 1][cls.SIGNED]) - 1)
538 |             setattr(cls, "MIN_VALUE", [0, -(2 ** (cls.BITS - 1))][cls.SIGNED])
539 | 
540 |     @property
541 |     def num_bytes(cls) -> int:
542 |         return cls.BYTES
543 | 
544 |     @property
545 |     def c_type(cls) -> str:
546 |         return f"{['u',''][cls.SIGNED]}int{cls.BITS}_t"
547 | 
548 | 
549 | class SizedInteger(int, metaclass=SizedIntegerMeta):
550 |     def __new__(cls: SizedIntegerMeta, value: int):
551 |         retval: SizedInteger = int.__new__(cls, value)
552 |         if not (cls.MIN_VALUE <= retval <= cls.MAX_VALUE):
553 |             raise ValueError(
554 |                 f"{retval} is not in the range [{cls.MIN_VALUE}, {cls.MAX_VALUE}]"
555 |             )
556 |         return retval
557 | 
558 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
559 |         return struct.pack(f"{byte_order.value}{self.__class__.FORMAT}", self)
560 | 
561 |     @classmethod
562 |     def unpack(
563 |         cls, data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
564 |     ) -> "SizedInteger":
565 |         return cls(struct.unpack(f"{byte_order.value}{cls.FORMAT}", data)[0])
566 | 
567 |     @classmethod
568 |     def unpack_partial(
569 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
570 |     ) -> Tuple[P, bytes]:
571 |         try:
572 |             return (
573 |                 cls(
574 |                     struct.unpack(f"{byte_order.value}{cls.FORMAT}", data[: cls.BYTES])[
575 |                         0
576 |                     ]
577 |                 ),
578 |                 data[cls.BYTES :],
579 |             )
580 |         except struct.error:
581 |             pass
582 |         raise UnpackError(f"Error unpacking {cls.__name__} from the front of {data!r}")
583 | 
584 |     @classmethod
585 |     async def read(
586 |         cls: Type[P],
587 |         reader: asyncio.StreamReader,
588 |         byte_order: ByteOrder = ByteOrder.NETWORK,
589 |     ) -> P:
590 |         data = await reader.read(cls.num_bytes)
591 |         return cls.unpack(data, byte_order)
592 | 
593 |     def __str__(self):
594 |         return f"{self.__class__.c_type}({super().__str__()})"
595 | 
596 | 
597 | class Char(SizedInteger):
598 |     FORMAT = "b"
599 | 
600 | 
601 | class UnsignedChar(SizedInteger):
602 |     FORMAT = "B"
603 | 
604 | 
605 | class Short(SizedInteger):
606 |     FORMAT = "h"
607 | 
608 | 
609 | class UnsignedShort(SizedInteger):
610 |     FORMAT = "H"
611 | 
612 | 
613 | class Int(SizedInteger):
614 |     FORMAT = "i"
615 | 
616 | 
617 | class UnsignedInt(SizedInteger):
618 |     FORMAT = "I"
619 | 
620 | 
621 | class Long(SizedInteger):
622 |     FORMAT = "l"
623 | 
624 | 
625 | class UnsignedLong(SizedInteger):
626 |     FORMAT = "L"
627 | 
628 | 
629 | class LongLong(SizedInteger):
630 |     FORMAT = "q"
631 | 
632 | 
633 | class UnsignedLongLong(SizedInteger):
634 |     FORMAT = "Q"
635 | 
636 | 
637 | Int8 = Char
638 | UInt8 = UnsignedChar
639 | Bool = UInt8
640 | Int16 = Short
641 | UInt16 = UnsignedShort
642 | Int32 = Long
643 | UInt32 = UnsignedLong
644 | Int64 = LongLong
645 | UInt64 = UnsignedLongLong
646 | 
647 | 
648 | class DateTime(UInt64):
649 |     def __new__(cls, *args):
650 |         if not args:
651 |             return UInt64.__new__(cls, int(time.time()))
652 |         else:
653 |             return UInt64.__new__(cls, *args)
654 | 
655 |     @staticmethod
656 |     def fromisoformat(timestamp: str) -> "DateTime":
657 |         return DateTime(
658 |             int(datetime.datetime.fromisoformat(timestamp).timestamp() + 0.5)
659 |         )
660 | 
661 |     @property
662 |     def date(self) -> datetime.datetime:
663 |         return datetime.datetime.fromtimestamp(float(self))
664 | 
665 |     def __repr__(self):
666 |         return f"{self.__class__.__name__}({int(self)})"
667 | 
668 |     def __str__(self):
669 |         return self.date.isoformat()
670 | 


--------------------------------------------------------------------------------
/fluxture/shodan.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from abc import ABC
  3 | from argparse import ArgumentParser, Namespace
  4 | from getpass import getpass
  5 | from time import sleep
  6 | from typing import (Any, AsyncIterator, Callable, Dict, Iterable, Iterator,
  7 |                     Optional, Tuple)
  8 | 
  9 | import keyring
 10 | from shodan import APIError, Shodan
 11 | 
 12 | from .async_utils import iterator_to_async, sync_to_async
 13 | from .bitcoin import Node
 14 | from .crawl_schema import HostInfo
 15 | from .crawler import Crawler, CrawlListener
 16 | from .fluxture import Command
 17 | from .serialization import DateTime, IPv6Address
 18 | 
 19 | KEYRING_NAME: str = "fluxture"
 20 | 
 21 | 
 22 | def prompt(
 23 |     message: str,
 24 |     yes_options: Tuple[str, ...] = ("y", "yes"),
 25 |     no_options: Tuple[str, ...] = ("n", "no"),
 26 |     default: bool = True,
 27 | ) -> bool:
 28 |     if default:
 29 |         yes_options = yes_options + ("",)
 30 |     else:
 31 |         no_options = no_options + ("",)
 32 |     while True:
 33 |         ret = input(message).strip().lower()
 34 |         if ret in yes_options:
 35 |             return True
 36 |         elif ret in no_options:
 37 |             return False
 38 | 
 39 | 
 40 | class ShodanResult(HostInfo):
 41 |     def __init__(self, **kwargs):
 42 |         if "timestamp" in kwargs:
 43 |             timestamp: DateTime = DateTime.fromisoformat(kwargs["timestamp"])
 44 |         else:
 45 |             timestamp = DateTime()
 46 |         if "ip" in kwargs:
 47 |             ip: IPv6Address = IPv6Address(kwargs["ip"])
 48 |         else:
 49 |             raise ValueError(
 50 |                 f"Shodan Result does not contain an IP address: {kwargs!r}"
 51 |             )
 52 |         if "isp" in kwargs:
 53 |             isp: Optional[str] = kwargs["isp"]
 54 |         else:
 55 |             isp = None
 56 |         if "ip_str" in kwargs:
 57 |             self.ip_str: Optional[str] = kwargs["ip_str"]
 58 |         else:
 59 |             self.ip_str = None
 60 |         if "os" in kwargs:
 61 |             os: Optional[str] = kwargs["os"]
 62 |         else:
 63 |             os = None
 64 |         self.result: Dict[str, Any] = kwargs
 65 |         super().__init__(ip=ip, isp=isp, os=os, timestamp=timestamp)
 66 | 
 67 |     def __getattr__(self, item):
 68 |         if item in self.result:
 69 |             return self.result[item]
 70 | 
 71 |     def __str__(self):
 72 |         if self.ip_str is not None:
 73 |             ip = self.ip_str
 74 |         else:
 75 |             ip = str(self.ip)
 76 |         if self.isp is not None:
 77 |             isp = f" on {self.isp}"
 78 |         else:
 79 |             isp = ""
 80 |         if self.os is not None:
 81 |             os = f" running {self.os}"
 82 |         else:
 83 |             os = ""
 84 |         return f"{ip}{isp}{os}"
 85 | 
 86 |     def __repr__(self):
 87 |         kwargs = "".join(
 88 |             [
 89 |                 f", {argname!s}={argvalue!r}"
 90 |                 for argname, argvalue in self.result.items()
 91 |                 if argname != "ip" and argname != "isp"
 92 |             ]
 93 |         )
 94 |         return f"{self.__class__.__name__}(ip={self.ip!r}, isp={self.isp!r}{kwargs})"
 95 | 
 96 | 
 97 | class Query(ABC):
 98 |     def __init__(self, name: str, callback: Optional[Callable[["Query"], Any]] = None):
 99 |         self.name: str = name
100 |         self.callback: Optional[Callable[[Query], Any]] = callback
101 | 
102 | 
103 | SEARCH_QUERIES: Dict[str, "SearchQuery"] = {}
104 | 
105 | 
106 | class SearchQuery(Query):
107 |     def __init__(
108 |         self, name: str, query: str, callback: Optional[Callable[["Query"], Any]] = None
109 |     ):
110 |         super().__init__(name=name, callback=callback)
111 |         self.query: str = query
112 | 
113 |     @staticmethod
114 |     def register(
115 |         name: str, query: str, callback: Optional[Callable[["Query"], Any]] = None
116 |     ) -> "SearchQuery":
117 |         sq = SearchQuery(name=name, query=query, callback=callback)
118 |         if name in SEARCH_QUERIES:
119 |             raise KeyError(
120 |                 f'A search query of name "{name}" is already registered: {SEARCH_QUERIES[name]!r}'
121 |             )
122 |         SEARCH_QUERIES[name] = sq
123 |         return sq
124 | 
125 |     def run(self, api: Shodan) -> Iterator[ShodanResult]:
126 |         for result in api.search_cursor(self.query):
127 |             yield ShodanResult(**result)
128 | 
129 |     @iterator_to_async(poll_interval=0.5)
130 |     def run_async(self, api: Shodan) -> AsyncIterator[ShodanResult]:
131 |         return self.run(api)  # type: ignore
132 | 
133 |     def __repr__(self):
134 |         return f"{self.__class__.__name__}(name={self.name!r}, query={self.query!r}, callback={self.callback!r})"
135 | 
136 | 
137 | def get_keychain_api_key() -> Optional[str]:
138 |     return keyring.get_password(KEYRING_NAME, "shodan_api_key")
139 | 
140 | 
141 | def save_keychain_api_key(api_key: str):
142 |     keyring.set_password(KEYRING_NAME, "shodan_api_key", api_key)
143 | 
144 | 
145 | def get_api(api_key: Optional[str] = None) -> Shodan:
146 |     keychain_key = get_keychain_api_key()
147 |     if api_key is None:
148 |         api_key = keychain_key
149 |         if api_key is None:
150 |             api_key = getpass(f"Shodan API KEY: ")
151 |             if prompt(
152 |                 "Would you like to save this API key to the system keychain for future use? [Yn] "
153 |             ):
154 |                 save_keychain_api_key(api_key)
155 |     elif keychain_key is None:
156 |         if prompt(
157 |             "Would you like to save this API key to the system keychain for future use? [Yn] "
158 |         ):
159 |             save_keychain_api_key(api_key)
160 |     elif api_key != keychain_key:
161 |         print("This is a different API key than what is stored in the system keychain.")
162 |         if prompt("Would you like to update the API key in the system keychain? [Yn] "):
163 |             save_keychain_api_key(api_key)
164 |     return Shodan(api_key)
165 | 
166 | 
167 | class ShodanCommand:
168 |     def __init_arguments__(self, parser: ArgumentParser):
169 |         parser.add_argument(
170 |             "--api-key",
171 |             "-k",
172 |             type=str,
173 |             default=None,
174 |             help="Shodan API key. If omitted, a saved "
175 |             "API key in the system keychain will be used, if one exists. Otherwise the user will be "
176 |             "prompted to enter an API key.",
177 |         )
178 | 
179 | 
180 | class ActiveNodes(ShodanCommand, Command):
181 |     name = "active"
182 |     help = "enumerate active nodes from Shodan"
183 | 
184 |     def __init_arguments__(self, parser: ArgumentParser):
185 |         super().__init_arguments__(parser)
186 |         parser.add_argument("QUERY", choices=SEARCH_QUERIES.keys())
187 | 
188 |     def run(self, args: Namespace):
189 |         api = get_api(args.api_key)
190 |         for result in SEARCH_QUERIES[args.QUERY].run(api):
191 |             print(str(result))
192 | 
193 | 
194 | class HostInfoCommand(ShodanCommand, Command):
195 |     name = "hostinfo"
196 |     help = "get information about IP addresses from Shodan"
197 | 
198 |     def __init_arguments__(self, parser: ArgumentParser):
199 |         super().__init_arguments__(parser)
200 |         parser.add_argument("IP", nargs="+", type=str)
201 | 
202 |     def run(self, args: Namespace):
203 |         api = get_api(args.api_key)
204 |         for ip in args.IP:
205 |             try:
206 |                 info = api.host(ip)
207 |                 for key, value in info.items():
208 |                     print(f"{key!s}:\t{value!r}")
209 |             except APIError as e:
210 |                 sys.stdout.flush()
211 |                 sys.stderr.write(str(e))
212 |                 sys.stderr.write("\n")
213 |                 sys.stderr.flush()
214 | 
215 | 
216 | class HostInfoFetcher(CrawlListener):
217 |     def __init__(self):
218 |         self.batch_size: int = 100
219 |         self.node_queue: list[Node] = []
220 | 
221 |     @staticmethod
222 |     @sync_to_async(poll_interval=0.5)
223 |     def get_host_info(ips: Iterable[IPv6Address]) -> Iterable[HostInfo]:
224 |         max_delay = 10.0
225 |         next_delay = 0.5
226 |         while True:
227 |             try:
228 |                 info = get_api().host(map(str, ips))
229 |                 print(repr(info))
230 |                 exit(0)
231 |             except APIError as e:
232 |                 if "rate limit reached" in str(e):
233 |                     sleep(next_delay)
234 |                     next_delay = min(max_delay, next_delay * 1.5)
235 |         # TODO: Check for rate limiting
236 |         return (ShodanResult(**get_api().host(str(ip))) for ip in ips)
237 | 
238 |     async def process_nodes(self, crawler: Crawler, finalize: bool = False):
239 |         if finalize:
240 |             batch_size = len(self.node_queue)
241 |         else:
242 |             batch_size = self.batch_size
243 |         while len(self.node_queue) >= batch_size > 0:
244 |             to_process = self.node_queue[:batch_size]
245 |             self.node_queue = self.node_queue[batch_size:]
246 |             for info in await HostInfoFetcher.get_host_info(
247 |                 (p.address for p in to_process)
248 |             ):
249 |                 crawler.crawl.set_host_info(info)
250 | 
251 |     # async def on_crawl_node(self, crawler: Crawler, node: Node):
252 |     #     self.node_queue.append(node)
253 |     #     await self.process_nodes(crawler)
254 |     #
255 |     #     info = await HostInfoFetcher.get_host_info(node.address)
256 |     #     print(info)
257 |     #
258 |     # async def on_miner(self, crawler: Crawler, node: Node, miner):
259 |     #     self.node_queue.append(node)
260 |     #     await self.process_nodes(crawler)
261 |     #
262 |     # async def on_complete(self, crawler: Crawler):
263 |     #     await self.process_nodes(crawler, finalize=True)
264 | 


--------------------------------------------------------------------------------
/fluxture/statistics.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from math import sqrt
 3 | from typing import Iterable, Iterator, List, Optional, Tuple, Union
 4 | 
 5 | Numeric = Union[int, float]
 6 | 
 7 | 
 8 | def memoize(func):
 9 |     member_name = f"_{func.__name__}_memoized"
10 | 
11 |     @wraps(func)
12 |     def wrapper(self, *args, **kwargs):
13 |         if hasattr(self, member_name):
14 |             return getattr(self, member_name)
15 |         result = func(self, *args, **kwargs)
16 |         setattr(self, member_name, result)
17 |         return result
18 | 
19 |     return wrapper
20 | 
21 | 
22 | class Statistics:
23 |     def __init__(self, iterable: Iterable[Numeric]):
24 |         self._iter: Optional[Iterator[Numeric]] = iter(iterable)
25 |         self._data: List[Numeric] = []
26 | 
27 |     def __getitem__(self, index: int) -> Numeric:
28 |         while self._iter is not None and index >= len(self._data):
29 |             try:
30 |                 self._data.append(next(self._iter))
31 |             except StopIteration:
32 |                 self._iter = None
33 |         return self._data[index]
34 | 
35 |     def __iter__(self) -> Iterator[Numeric]:
36 |         if self._iter is None:
37 |             yield from self._data
38 |             return
39 |         i = 0
40 |         while True:
41 |             try:
42 |                 yield self[i]
43 |                 i += 1
44 |             except IndexError:
45 |                 break
46 | 
47 |     def __len__(self):
48 |         while self._iter is not None:
49 |             try:
50 |                 _ = self[len(self._data)]
51 |             except IndexError:
52 |                 break
53 |         return len(self._data)
54 | 
55 |     def __bool__(self):
56 |         if self._data:
57 |             return True
58 |         try:
59 |             _ = next(iter(self))
60 |             return True
61 |         except StopIteration:
62 |             return False
63 | 
64 |     @property
65 |     @memoize
66 |     def average(self) -> float:
67 |         if not self:
68 |             return 0.0
69 |         return sum(self) / len(self)
70 | 
71 |     @property
72 |     @memoize
73 |     def std_dev(self) -> float:
74 |         if not self:
75 |             return 0.0
76 |         avg = self.average
77 |         return sqrt(sum((x - avg) ** 2.0 for x in self) / len(self))
78 | 
79 |     @property
80 |     @memoize
81 |     def ordered(self) -> Tuple[Numeric, ...]:
82 |         return tuple(sorted(self))
83 | 
84 |     @property
85 |     @memoize
86 |     def median(self) -> Numeric:
87 |         n = len(self)
88 |         ordered = self.ordered
89 |         if n % 2 == 0:
90 |             return (ordered[(n - 1) // 2] + ordered[(n + 1) // 2]) / 2.0
91 |         else:
92 |             return ordered[n // 2]
93 | 
94 |     def __str__(self):
95 |         return f"μ {self.average} σ {self.std_dev} Med {self.median}"
96 | 


--------------------------------------------------------------------------------
/fluxture/structures.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import itertools
  3 | from abc import ABCMeta
  4 | from collections import OrderedDict
  5 | from typing import Generic, Iterator, KeysView
  6 | from typing import OrderedDict as OrderedDictType
  7 | from typing import Tuple, Type, TypeVar
  8 | from typing import ValuesView
  9 | from typing import ValuesView as ValuesViewType
 10 | 
 11 | from fluxture.serialization import (AbstractIntEnum, ByteOrder, FixedSize, P,
 12 |                                     Packable, UnpackError)
 13 | 
 14 | F = TypeVar("F")
 15 | 
 16 | 
 17 | class StructMeta(ABCMeta, Generic[F]):
 18 |     FIELDS: OrderedDictType[str, Type[F]]
 19 | 
 20 |     def __init__(cls, name, bases, clsdict):
 21 |         fields = OrderedDict()
 22 |         field_sources = {}
 23 |         for base in bases:
 24 |             if isinstance(base, StructMeta):
 25 |                 # this will happen if a Struct is extending another Struct
 26 |                 # so inherit all of the superclass's fields
 27 |                 for field_name, field_type in base.FIELDS.items():
 28 |                     if field_name in fields:
 29 |                         raise TypeError(
 30 |                             f"{name} inherits field {field_name} from both {base.__name__} and "
 31 |                             f"{field_sources[field_name]}"
 32 |                         )
 33 |                     elif hasattr(base, "non_serialized") and field_name not in getattr(
 34 |                         base, "non_serialized"
 35 |                     ):
 36 |                         field_sources[field_name] = base
 37 |                         fields[field_name] = field_type
 38 |         if "non_serialized" in clsdict:
 39 |             non_serialized = set(clsdict["non_serialized"])
 40 |         else:
 41 |             non_serialized = set()
 42 |         non_serialized |= {"FIELDS", "non_serialized"}
 43 |         if "__annotations__" in clsdict:
 44 |             for field_name, field_type in clsdict["__annotations__"].items():
 45 |                 if field_name in field_sources:
 46 |                     raise TypeError(
 47 |                         f"{name} cannot redefine field {field_name} from {field_sources[field_name]}"
 48 |                     )
 49 |                 elif field_name not in non_serialized:
 50 |                     fields[field_name] = field_type
 51 |         super().__init__(name, bases, clsdict)
 52 |         cls.validate_fields(fields)
 53 |         setattr(cls, "FIELDS", fields)
 54 |         # are all fields fixed size? if so, we are fixed size, too!
 55 |         if all(hasattr(field, "num_bytes") for field in fields.values()):
 56 |             cls.num_bytes = sum(field.num_bytes for field in fields.values())  # type: ignore
 57 |             assert isinstance(cls, FixedSize)
 58 | 
 59 |     def validate_fields(cls, fields: OrderedDictType[str, Type[F]]):
 60 |         pass
 61 | 
 62 | 
 63 | class Struct(Generic[F], metaclass=StructMeta[F]):
 64 |     def __init__(self, *args, **kwargs):
 65 |         unsatisfied_fields = [
 66 |             name for name in self.__class__.FIELDS.keys() if name not in kwargs
 67 |         ]
 68 |         if len(args) > len(unsatisfied_fields):
 69 |             raise ValueError(
 70 |                 f"Unexpected positional argument: {args[len(unsatisfied_fields)]}"
 71 |             )
 72 |         elif len(args) < len(unsatisfied_fields):
 73 |             # see if any of the unsatisfied fields have defaults:
 74 |             for name in unsatisfied_fields[len(args) :]:
 75 |                 field_type = self.__class__.FIELDS[name]
 76 |                 if (
 77 |                     hasattr(field_type, "column_options")
 78 |                     and field_type.column_options.default is not None
 79 |                 ):
 80 |                     kwargs[name] = field_type.column_options.default
 81 |                 elif issubclass(field_type, AbstractIntEnum):
 82 |                     kwargs[name] = field_type.DEFAULT
 83 |                 else:
 84 |                     raise ValueError(f"Missing argument for {name} in {self.__class__}")
 85 |             unsatisfied_fields = unsatisfied_fields[: len(args)]
 86 |         for name, value in itertools.chain(
 87 |             kwargs.items(), zip(unsatisfied_fields, args)
 88 |         ):
 89 |             if name not in self.__class__.FIELDS:
 90 |                 raise TypeError(
 91 |                     f"{self.__class__.__name__}.__init__() got an unexpected keyword argument '{name}'. "
 92 |                     f"Valid arguments are: {', '.join(self.__class__.FIELDS.keys())}"
 93 |                 )
 94 |             elif isinstance(value, self.__class__.FIELDS[name]):
 95 |                 # the value was already passed as the correct type
 96 |                 setattr(self, name, value)
 97 |             else:
 98 |                 # we need to construct the correct type
 99 |                 setattr(self, name, self.__class__.FIELDS[name](value))
100 |         super().__init__()
101 | 
102 |     def __contains__(self, field_name: str):
103 |         return field_name in self.__class__.FIELDS
104 | 
105 |     def __getitem__(self, field_name: str) -> Type[F]:
106 |         if field_name not in self:
107 |             raise KeyError(field_name)
108 |         return getattr(self, field_name)
109 | 
110 |     def __len__(self) -> int:
111 |         return len(self.__class__.FIELDS)
112 | 
113 |     def __iter__(self) -> Iterator[str]:
114 |         return iter(self.__class__.FIELDS.keys())
115 | 
116 |     def items(self) -> Iterator[Tuple[str, Type[F]]]:
117 |         for field_name in self:
118 |             yield field_name, getattr(self, field_name)
119 | 
120 |     def keys(self) -> KeysView[str]:
121 |         return self.__class__.FIELDS.keys()
122 | 
123 |     def values(self) -> ValuesViewType[Type[F]]:
124 |         return ValuesView(self)
125 | 
126 |     def __eq__(self, other):
127 |         return (
128 |             isinstance(other, Struct)
129 |             and len(self) == len(other)
130 |             and all(a == b for (_, a), (_, b) in zip(self.items(), other.items()))
131 |         )
132 | 
133 |     def __ne__(self, other):
134 |         return not (self == other)
135 | 
136 |     def __str__(self):
137 |         types = "".join(
138 |             f"    {field_name} = {field_value!s};\n"
139 |             for field_name, field_value in self.items()
140 |         )
141 |         newline = "\n"
142 |         return f"typedef struct {{{['', newline][len(types) > 0]}{types}}} {self.__class__.__name__}"
143 | 
144 |     def __repr__(self):
145 |         args = [
146 |             f"{name}={getattr(self, name)!r}" for name in self.__class__.FIELDS.keys()
147 |         ]
148 |         return f"{self.__class__.__name__}({', '.join(args)})"
149 | 
150 | 
151 | class PackableStruct(Generic[P], Struct[P]):
152 |     def pack(self, byte_order: ByteOrder = ByteOrder.NETWORK) -> bytes:
153 |         # TODO: Combine the formats and use a single struct.pack instead
154 |         return b"".join(
155 |             getattr(self, field_name).pack(byte_order)
156 |             for field_name in self.__class__.FIELDS.keys()
157 |         )
158 | 
159 |     @classmethod
160 |     def validate_fields(cls, fields: OrderedDictType[str, Type[F]]):
161 |         for field_name, field_type in fields.items():
162 |             if not isinstance(field_type, Packable):
163 |                 raise TypeError(
164 |                     f"Field {field_name} of {cls.__name__} must be Packable, not {field_type}"
165 |                 )
166 | 
167 |     @classmethod
168 |     def unpack(
169 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
170 |     ) -> P:
171 |         ret, remaining = cls.unpack_partial(data, byte_order)
172 |         if remaining:
173 |             raise ValueError(f"Unexpected trailing bytes: {remaining!r}")
174 |         return ret
175 | 
176 |     @classmethod
177 |     def unpack_partial(
178 |         cls: Type[P], data: bytes, byte_order: ByteOrder = ByteOrder.NETWORK
179 |     ) -> Tuple[P, bytes]:
180 |         remaining_data = data
181 |         args = []
182 |         for field_name, field_type in cls.FIELDS.items():
183 |             try:
184 |                 field, remaining_data = field_type.unpack_partial(
185 |                     remaining_data, byte_order
186 |                 )
187 |                 errored = False
188 |             except UnpackError:
189 |                 errored = True
190 |             if errored:
191 |                 parsed_fields = [
192 |                     f"{field_name} = {arg!r}"
193 |                     for field_name, arg in zip(cls.FIELDS.keys(), args)
194 |                 ]
195 |                 parsed_fields = ", ".join(parsed_fields)
196 |                 raise UnpackError(
197 |                     f"Error parsing field {cls.__name__}.{field_name} (field {len(args)+1}) of type "
198 |                     f"{field_type.__name__} from bytes {remaining_data!r}. Prior parsed field values: "
199 |                     f"{parsed_fields}"
200 |                 )
201 |             args.append(field)
202 |         return cls(*args), remaining_data
203 | 
204 |     @classmethod
205 |     async def read(
206 |         cls: Type[P],
207 |         reader: asyncio.StreamReader,
208 |         byte_order: ByteOrder = ByteOrder.NETWORK,
209 |     ) -> P:
210 |         if hasattr(cls, "num_bytes"):
211 |             data = await reader.read(cls.num_bytes)
212 |             return cls.unpack(data, byte_order)
213 |         # we need to read it one field at a time
214 |         args = []
215 |         for field_name, field_type in cls.FIELDS.items():
216 |             try:
217 |                 field = field_type.read(reader, byte_order)
218 |                 errored = False
219 |             except UnpackError:
220 |                 errored = True
221 |             if errored:
222 |                 parsed_fields = [
223 |                     f"{field_name} = {arg!r}"
224 |                     for field_name, arg in zip(cls.FIELDS.keys(), args)
225 |                 ]
226 |                 parsed_fields = ", ".join(parsed_fields)
227 |                 raise UnpackError(
228 |                     f"Error parsing field {cls.__name__}.{field_name} (field {len(args) + 1}) of type "
229 |                     f"{field_type.__name__}. Prior parsed field values: {parsed_fields}"
230 |                 )
231 |             args.append(field)
232 |         return cls(*args)
233 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name="fluxture",
 6 |     description="A crawling framework for blockchains and peer-to-peer systems",
 7 |     url="https://github.com/trailofbits/fluxture",
 8 |     author="Trail of Bits",
 9 |     version="0.0.1",
10 |     packages=find_packages(exclude=["test"]),
11 |     python_requires=">=3.7",
12 |     install_requires=[
13 |         "fastkml~=0.11",
14 |         "geoip2~=4.1.0",
15 |         "graphviz~=0.14.1",
16 |         "great-circle-calculator~=1.1.0",
17 |         "keyring~=21.8.0",
18 |         "lxml~=4.9.1",
19 |         "networkx~=2.4",
20 |         "numpy>=1.19.4",
21 |         "shapely~=1.8.0",
22 |         "shodan~=1.24.0",
23 |         "six>=1.5",
24 |         "tqdm>=4.48.0",
25 |         "typing_extensions~=4.2.0 ; python_version < '3.8'",
26 |     ],
27 |     extras_require={"dev": ["flake8", "pytest", "twine"]},
28 |     entry_points={"console_scripts": ["fluxture = fluxture.__main__:main"]},
29 |     classifiers=[
30 |         "Development Status :: 4 - Beta",
31 |         "Environment :: Console",
32 |         "Intended Audience :: Science/Research",
33 |         "License :: OSI Approved :: Apache Software License",
34 |         "Programming Language :: Python :: 3 :: Only",
35 |         "Topic :: Utilities",
36 |     ],
37 | )
38 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crytic/fluxture/882c44627a1ea677c5d44f5d6463fee03ebad3b6/test/__init__.py


--------------------------------------------------------------------------------
/test/test_async_utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from time import sleep
 3 | from typing import List, Tuple
 4 | from unittest import TestCase
 5 | 
 6 | from fluxture.async_utils import iterator_to_async, sync_to_async
 7 | 
 8 | 
 9 | @iterator_to_async(poll_interval=0.24)
10 | def slow_iterator(n: int):
11 |     for i in range(n):
12 |         sleep(0.5)
13 |         yield i
14 | 
15 | 
16 | async def slow_iterator_async(n: int) -> List[Tuple[int, float]]:
17 |     loop = asyncio.get_running_loop()
18 |     results = []
19 |     async for i in slow_iterator(n):
20 |         results.append((i, loop.time()))
21 |     return results
22 | 
23 | 
24 | async def sleep_and_return_time(duration: float) -> float:
25 |     loop = asyncio.get_running_loop()
26 |     await asyncio.sleep(duration)
27 |     return loop.time()
28 | 
29 | 
30 | async def slow_iterator_test(test: TestCase, n: int):
31 |     slow_iterator_results, sleep_time = await asyncio.gather(slow_iterator_async(n), sleep_and_return_time(n / 2.0))
32 |     expected = 0
33 |     has_time_before = False
34 |     has_time_after = False
35 |     for i, end_time in slow_iterator_results:
36 |         test.assertEqual(i, expected)
37 |         expected += 1
38 |         has_time_before = has_time_before or end_time < sleep_time
39 |         has_time_after = has_time_after or end_time > sleep_time
40 |     # ensure that asyncio actually scheduled `sleep_and_return` interleaved between `slow_iterator_async` iterations:
41 |     test.assertTrue(has_time_before)
42 |     test.assertTrue(has_time_after)
43 | 
44 | 
45 | @sync_to_async(poll_interval=0.25)
46 | def slow_function():
47 |     sleep(2.0)
48 | 
49 | 
50 | async def time_slow_function() -> float:
51 |     loop = asyncio.get_running_loop()
52 |     await slow_function()
53 |     return loop.time()
54 | 
55 | 
56 | async def slow_function_test(test: TestCase):
57 |     slow_func_end_time, sleep_time = await asyncio.gather(time_slow_function(), sleep_and_return_time(1.0))
58 |     # ensure that asyncio actually scheduled `sleep_and_return` before `time_slow_function`:
59 |     test.assertLessEqual(sleep_time, slow_func_end_time)
60 | 
61 | 
62 | class TestAsyncUtils(TestCase):
63 |     def test_iterator_to_async(self):
64 |         asyncio.run(slow_iterator_test(self, 10))
65 | 
66 |     def test_sync_to_async(self):
67 |         asyncio.run(slow_function_test(self))
68 | 


--------------------------------------------------------------------------------
/test/test_bitcoin.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from ipaddress import ip_address
 3 | from unittest import TestCase
 4 | 
 5 | from fluxture.bitcoin import BitcoinMessage, NetAddr, VersionMessage
 6 | from fluxture.serialization import ByteOrder
 7 | 
 8 | EXAMPLE_VERSION_MESSAGE = b"".join([
 9 |     b"\x72\x11\x01\x00",                  # Protocol version: 70002
10 |     b"\x01\x00\x00\x00\x00\x00\x00\x00",  # Services: NODE_NETWORK
11 |     b"\xbc\x8f\x5e\x54\x00\x00\x00\x00",  # [Epoch time][unix epoch time]: 1415483324
12 |     b"\x01\x00\x00\x00\x00\x00\x00\x00",  # Receiving node's services
13 |     b"\x00\x00\x00\x00\x00\x00\x00\x00",
14 |     b"\x00\x00\xff\xff\xc6\x1b\x64\x09",  # Receiving node's IPv6 address
15 |     b"\x20\x8d",                          # Receiving node's port number
16 |     b"\x01\x00\x00\x00\x00\x00\x00\x00",  # Transmitting node's services
17 |     b"\x00\x00\x00\x00\x00\x00\x00\x00",
18 |     b"\x00\x00\xff\xff\xcb\x00\x71\xc0",  # Transmitting node's IPv6 address
19 |     b"\x20\x8d",                          # Transmitting node's port number
20 |     b"\x12\x80\x35\xcb\xc9\x79\x53\xf8",  # Nonce
21 |     b"\x0F"                               # Bytes in user agent string: 15
22 |     b"\x2f\x53\x61\x74\x6f\x73\x68\x69",
23 |     b"\x3a\x30\x2e\x39\x2e\x33\x2f",      # User agent: /Satoshi:0.9.3/
24 |     b"\xcf\x05\x05\x00",                  # Start height: 329167
25 |     b"\x01",                              # Relay flag: true
26 | ])
27 | 
28 | 
29 | class TestBitcoin(TestCase):
30 |     def test_version_message(self):
31 |         msg = VersionMessage(
32 |             version=70015,
33 |             services=0,
34 |             timestamp=int(time.time()),
35 |             addr_recv=NetAddr(),
36 |             addr_from=NetAddr(),
37 |             nonce=0,
38 |             user_agent=b"BlockScraper",
39 |             start_height=123,
40 |             relay=True
41 |         )
42 |         self.assertEqual(msg, BitcoinMessage.deserialize(msg.serialize()))
43 |         msg = VersionMessage.unpack(EXAMPLE_VERSION_MESSAGE, byte_order=ByteOrder.LITTLE)
44 |         self.assertIsInstance(msg, VersionMessage)
45 |         self.assertEqual(msg.version, 70002)
46 |         self.assertEqual(msg.timestamp, 1415483324)
47 |         self.assertEqual(msg.addr_recv.port, 8333)
48 |         self.assertEqual(msg.addr_from.port, 8333)
49 |         self.assertEqual(msg.addr_recv.ip, ip_address("::ffff:c61b:6409"))
50 |         self.assertEqual(msg.addr_from.ip, ip_address("::ffff:cb00:71c0"))
51 | 


--------------------------------------------------------------------------------
/test/test_db.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | 
 4 | from fluxture.db import (
 5 |     AutoIncrement, column_options, ColumnOptions, Database, default, ForeignKey, Model, primary_key, Table
 6 | )
 7 | 
 8 | 
 9 | class Person(Model):
10 |     name: primary_key(str)
11 |     age: int
12 | 
13 | 
14 | class TestDatabase(TestCase):
15 |     def test_create_table(self):
16 |         db = Database()
17 |         table = db.create_table("people", Table[Person])
18 |         self.assertEqual(len(table), 0)
19 |         person = Person(name="Foo", age=1337)
20 |         table.append(person)
21 |         self.assertEqual(len(table), 1)
22 |         retrieved_person = next(iter(table))
23 |         self.assertIsInstance(retrieved_person, Person)
24 |         self.assertEqual(retrieved_person, person)
25 |         self.assertEqual(next(iter(table.select(age=1337))), person)
26 |         self.assertCountEqual(table.select(age=0), ())
27 | 
28 |     def test_define_db(self):
29 |         class TestDB(Database):
30 |             people: Table[Person]
31 | 
32 |         db = TestDB()
33 |         self.assertEqual(len(db.people), 0)
34 | 
35 |     def test_primary_key(self):
36 |         self.assertEqual(Person.primary_key_name, "name")
37 | 
38 |         class NoPrimaryKey(Model):
39 |             not_primary_key: int
40 |             not_primary_key_either: float
41 | 
42 |         self.assertEqual(NoPrimaryKey.primary_key_name, "rowid")
43 | 
44 |     def test_default(self):
45 |         class Number(Model):
46 |             n: default(primary_key(int), 1)
47 | 
48 |         class TestDB(Database):
49 |             numbers: Table[Number]
50 | 
51 |         db = TestDB()
52 |         db.numbers.append(Number())
53 |         self.assertEqual(next(iter(db.numbers)), Number(1))
54 | 
55 |     def test_foreign_key(self):
56 |         class Height(Model):
57 |             person: primary_key(ForeignKey["people", Person])  # noqa: F821
58 |             height: int
59 | 
60 |         class TestDB(Database):
61 |             people: Table[Person]
62 |             heights: Table[Height]
63 | 
64 |         db = TestDB()
65 |         person = Person(name="Foo", age=1337)
66 |         db.people.append(person)
67 |         db.heights.append(Height(person="Foo", height=80))
68 |         h = next(iter(db.heights))
69 |         self.assertEqual(h.person, person)
70 | 
71 |     def test_auto_increment(self):
72 |         class Counter(Model):
73 |             id: column_options(AutoIncrement, ColumnOptions(primary_key=True, auto_increment=True))
74 | 
75 |         class TestDB(Database):
76 |             counters: Table[Counter]
77 | 
78 |         db = TestDB()
79 |         counter = Counter()
80 |         self.assertIsInstance(counter.id, AutoIncrement)
81 |         self.assertEqual(counter.id.initialized, False)
82 |         self.assertTrue(any(key == "id" for key, _ in counter.uninitialized_auto_increments()))
83 |         db.counters.append(counter)
84 |         self.assertEqual(counter.id.initialized, True)
85 | 


--------------------------------------------------------------------------------
/test/test_statistics.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from fluxture.statistics import Statistics
 4 | 
 5 | 
 6 | class StatisticsTest(TestCase):
 7 |     def test_memoization(self):
 8 |         stats = Statistics((1, 2, 3, 4, 5))
 9 |         self.assertEqual(stats.average, sum((1, 2, 3, 4, 5)) / 5.0)
10 |         self.assertEqual(stats.std_dev, stats.std_dev)
11 | 
12 |     def test_median(self):
13 |         stats = Statistics((1, 2, 3, 4, 5))
14 |         self.assertEqual(stats.median, 3)
15 |         stats = Statistics((1, 2, 3, 4, 5, 6))
16 |         self.assertEqual(stats.median, (3 + 4) / 2.0)
17 | 


--------------------------------------------------------------------------------
/test/test_types.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import random
  3 | 
  4 | from fluxture.structures import PackableStruct
  5 | from tqdm import tqdm, trange
  6 | from typing import List
  7 | from unittest import TestCase
  8 | 
  9 | from fluxture.serialization import *
 10 | 
 11 | 
 12 | class TestTypes(TestCase):
 13 |     @classmethod
 14 |     def setUpClass(cls):
 15 |         cls.packable_types: List[Type[Packable]] = [
 16 |             t for t in globals().values()
 17 |             if inspect.isclass(t) and issubclass(t, Packable) and (
 18 |                     not hasattr(t, "__abstractmethods__") or not t.__abstractmethods__
 19 |             )
 20 |         ]
 21 |         cls.sized_integer_types: List[Type[SizedInteger]] = [
 22 |             ty for ty in cls.packable_types if issubclass(ty, SizedInteger) and ty is not SizedInteger
 23 |         ]
 24 | 
 25 |     def test_int_enum(self):
 26 |         class EnumTest(IntEnum):
 27 |             FOO = 0
 28 |             BAR = 10
 29 | 
 30 |         self.assertEqual(EnumTest.FOO, 0)
 31 |         self.assertEqual(EnumTest.BAR, 10)
 32 |         self.assertIsInstance(EnumTest.FOO, EnumTest)
 33 |         self.assertIsInstance(EnumTest.BAR, EnumTest)
 34 |         self.assertEqual(EnumTest.get("FOO"), EnumTest.FOO)
 35 |         self.assertEqual(EnumTest.get("BAR"), EnumTest.BAR)
 36 |         self.assertIs(EnumTest.get_type(), UInt8)
 37 |         self.assertEqual(EnumTest.DEFAULT, EnumTest.FOO)
 38 | 
 39 |         class SignedEnum(IntEnum):
 40 |             FOO = -1
 41 |             BAR = 100
 42 | 
 43 |         self.assertIs(SignedEnum.get_type(), Int8)
 44 | 
 45 |         class LargeEnum(IntEnum):
 46 |             FOO = -500
 47 |             BAR = 35000
 48 | 
 49 |         self.assertIs(LargeEnum.get_type(), Int32)
 50 | 
 51 |     def test_int_flags(self):
 52 |         class Flags(IntFlag):
 53 |             A = 0
 54 |             B = 1
 55 |             C = 2
 56 |             D = 4
 57 | 
 58 |         self.assertEqual(Flags.A, 0)
 59 |         self.assertEqual(Flags.B, 1)
 60 |         self.assertEqual(Flags.C, 2)
 61 |         self.assertEqual(Flags.D, 4)
 62 |         self.assertEqual(Flags.B | Flags.C, 3)
 63 | 
 64 |     def test_bad_int_enum(self):
 65 |         def make_bad_enum():
 66 |             class BadEnum(IntEnum):
 67 |                 NOT_INT = "foo"
 68 | 
 69 |         self.assertRaises(TypeError, make_bad_enum)
 70 | 
 71 |         def make_oversized_enum():
 72 |             class OversizedEnum(IntEnum):
 73 |                 FOO = 99999999999999999999999
 74 | 
 75 |         self.assertRaises(TypeError, make_oversized_enum)
 76 | 
 77 |     def test_sized_integers(self):
 78 |         for int_type in tqdm(self.sized_integer_types, desc="testing sized integers", unit=" types", leave=False):
 79 |             for _ in trange(1000, desc=f"testing {int_type.__name__}", unit=" tests", leave=False):
 80 |                 value = random.randint(int_type.MIN_VALUE, int_type.MAX_VALUE)
 81 |                 packed = int_type(value).pack()
 82 |                 self.assertEqual(int_type.unpack(packed), value)
 83 | 
 84 |     def test_empty_struct(self):
 85 |         class EmptyStruct(PackableStruct):
 86 |             pass
 87 |         s = EmptyStruct()
 88 |         self.assertEqual(s, EmptyStruct.unpack(s.pack()))
 89 | 
 90 |     def test_struct_comparison(self):
 91 |         class S1(PackableStruct):
 92 |             a: Int32
 93 |             b: UnsignedChar
 94 |             c: UInt64
 95 | 
 96 |         class S2(PackableStruct):
 97 |             a: Int32
 98 |             b: UnsignedChar
 99 |             c: UInt64
100 | 
101 |         self.assertRaises(ValueError, S1, (0, 1))
102 |         self.assertRaises(ValueError, S1, (0, 1, 2, 3))
103 |         self.assertEqual(S1(0, 1, 2), S2(0, 1, 2))
104 |         self.assertNotEqual(S1(0, 1, 2), S2(0, 1, 3))
105 | 
106 |     def test_struct_packing(self):
107 |         class S3(PackableStruct):
108 |             a: Int32
109 |             b: UInt64
110 |             c: Int16
111 | 
112 |         s3 = S3(0, 1, 2)
113 |         self.assertEqual(S3.unpack(s3.pack()), s3)
114 | 
115 |     def test_byte_arrays(self):
116 |         class HasArrays(PackableStruct):
117 |             a: SizedByteArray[1024]
118 |             b: SizedByteArray[0]
119 |             c: SizedByteArray[10]
120 | 
121 |         self.assertRaises(ValueError, HasArrays, (b"abcd", b"defg", b"hijk"))
122 |         has_arrays = HasArrays(b"abcd", b"", b"hijk")
123 |         self.assertEqual(HasArrays.unpack(has_arrays.pack()), has_arrays)
124 | 


--------------------------------------------------------------------------------