├── .gitignore ├── LICENSE ├── README.md ├── chart_bar_company.py ├── chart_map_china.py ├── chart_map_country.py ├── chart_pie_operator.py ├── chart_pie_ports.py ├── font └── Muyao-Softbrush.ttf ├── geo ├── GeoLite2-City.mmdb ├── GeoLite2-Country.mmdb └── ip2region.db ├── ip2Region.py ├── ipgeo.py ├── trim_data.py └── visual.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | .idea 107 | data 108 | .DS_Store 109 | render.html 110 | output 111 | .vscode 112 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 监视我的手机:数据都去哪儿了? 2 | 3 | > _“现在的人几乎是全部透明的。我心里就想,马化腾肯定天天在看我们的微信,因为他都可以看的,随便看,这些问题非常大。” —— 李书福_ 4 | 5 |
6 | 7 | **日常使用的手机可能比想象的更加活跃**,当微信聊天、淘宝购物、抖音看视频甚至是喵的手机待机啥也不干,某些 App 都会悄悄地与服务器交换着数据。这些数据包括微信聊天记录、地理位置、通讯录、通话记录、QQ消息,甚至短信 8 | 内容... 9 | 10 | 我一直想知道**我的数据都去了哪里**?**哪些 App 在源源不断上传数据**?**数据被哪些公司搜集了**? 11 | 12 | 前一段时间,浏览过一篇国外的博客《[Tracking my phone's silent connections](https://kushaldas.in/posts/tracking-my-phone-s-silent-connections.html)》,文中作者 Kushal 使用 WireGuard 代理的方式,监控自己的手机 1 个星期,截获手机与服务器之间的所有请求,最后统计了手机到底悄悄地在和哪些公司的服务器进行连接。 13 | 14 | 受到 Kushal 的启发,我决定使用部署 ss 的方式截获我个人的手机数据。 15 | 16 | ## 监控方案 17 | 18 |
19 | 20 | ### 实验设备 21 | 22 | - 日常使用的安卓手机 `x1` 23 | - 国内某云服务器 `x1` 24 | 25 | ### 代理方案 26 | 27 | 手机的数据都是与不同的服务器进行着连接,如何获取所有的连接?首先我想到的是手机要通过 Wi-Fi 路由器上网,那么如果在路由器端截取数据包,会比较容易。但是无法获取手机的移动基站流量。 28 | 29 | 于是在 1 台云服务器上搭建了个代理服务,手机客户端设置为全局代理连接 VPN 服务器,就可以在服务器端获取所有的数据请求。 30 | 31 | ### 部署服务 32 | 33 | 为了保证上网访问速度,提升网络体验,推荐选择国内的服务器,代理服务器首先安装 Docker 34 | 35 | ```shell 36 | $ sudo apt-get -y install docker.io 37 | ``` 38 | 39 | **启动 ss Docker 容器** 40 | 41 | 通过阅读 ss 的文档,可知在启动 ss 时只需要加上 `-v` 参数(Verbose mode)即可输出详细 Log。同时使用 `tmux` 让服务在后台运行,将输出以追加的方式(`>>`)重定向到 `logs.txt` 文件。 42 | 43 | ```shell 44 | $ tmux 45 | $ sudo docker run -t --name ss -p 9000:9000 mritd/shadowsocks -s "-s 0.0.0.0 -p 9000 -m aes-256-cfb -k yourpassword --fast-open -v" >> logs.txt 46 | ``` 47 | 48 | **手机客户端** 49 | 50 | 在手机端安装 ss 或者酸酸乳客户端,配置代理服务器地址、端口、密码与加密方式,代理模式设置为全局代理。 51 | 52 | 然后在服务器端,使用 `tail` 命令从指定点开始将从文件写到标准输出,显示实时 Log,服务搭建成功 53 | 54 | ```shell 55 | $ tail -f logs.txt 56 | ``` 57 | 58 | 当手机使用微信时,记录的 Log 日志如下 59 | 60 |
61 | 62 | 63 | ### 数据处理 64 | 65 | **DNS 域名解析** 66 | 67 | DNS(Domain Name System),翻译过来就是域名系统,是互联网上作为域名和 IP 地址相互映射的一个分布式数据库。获取到的记录大多数是域名,需要先解析成 IP 地址 68 | 69 | ```python 70 | import socket 71 | def domain_to_ip(domain): 72 | return socket.gethostbyname(domain) 73 | ``` 74 | 例如,解析 `www.baidu.com` 的 IP 地址 75 | ```Python 76 | domain_to_ip('www.baidu.com') 77 | '14.215.177.38' 78 | ``` 79 | 80 | **IP 地理数据库** 81 | 82 | 推荐使用 [ip2region](https://github.com/lionsoul2014/ip2region),一个开源的 IP 到地区的映射库,具有 99.9% 准确率,提供 Binary,B 树和纯内存三种查询快速搜索算法。 83 | 84 | ```python 85 | >> result = ipgeo.find('www.baidu.com') 86 | >> print(result) 87 | {'ip': '14.215.177.38', 'city_id': 2140, 'country': '中国', 'province': '广东省', 'city': '广州市', 'operator': '电信'} 88 | ``` 89 | 90 | **保存数据** 91 | 92 | ```python 93 | df.to_csv(out_csv, index=False) 94 | print('saved to {}'.format(out_csv)) 95 | ``` 96 | 97 | ### 数据可视化 98 | 99 | 经过十多天的记录,俺一共记录了 `280059` 条记录 100 | 101 |
102 | 103 | 接下来使用 Pyecharts 对数据进行可视化。Echarts 是百度开源的一个数据可视化 JS 库,而 Pyecharts 是一个用于生成 Echarts 图表 Python 库。 104 | 105 | #### 主要的互联网公司 106 | 107 |
108 | 109 |
110 | 111 | 从上图可以看出,俺的安卓手机(安装了谷歌服务),在国内的网络环境,请求次数最多还是 Google。 112 | 113 | 然后就是日常使用的微信和 QQ 了。由于平时会看 B 站视频,所以 Bilibili 排名第三 orz... 114 | 115 | 我手机安装的是 QQ 输入法,但是去往 `sougou.com` 的请求居然有 `1952` 条,查看了用户协议才发现 `“QQ输入法”是经腾讯公司认可,由搜狗公司发布的客户端软件。` 116 | 117 | 还有像美团、高德地图这样的软件,平时并不怎么频繁使用,网络请求却异常地活跃,不知道偷偷摸摸干着啥。 118 | 119 | #### 夜间活动排行 120 | 121 | 过滤出凌晨 00:00 ~ 06:00 时间段的活动,可以发现去往 `*.qq.com` 的连接始终是最多的。 122 | 123 |
124 | 125 | #### 全球分布 126 | 127 |
128 | 129 |
130 | 131 | #### 国内各省份分布 132 | 133 |
134 | 135 | 可以看到俺的流量大多去往了广东、上海和北京这样的地方,台湾这么高的原因是谷歌的服务器在那边,DNS 解析谷歌的域名都指向了台湾。 136 | 137 | #### 电信运营商 138 | 139 |
140 | 141 | #### 服务器端口统计 142 | 143 |
144 | 145 | #### 其他 146 | 147 | 在一加手机的网络请求中,发现了一些发往 oppo 服务器的请求,看来不光硬件由 oppo 代工,连软件也是。 148 | 149 | ```python 150 | [('epoch.cdo.oppomobile.com', 208), 151 | ('gslb.cdo.oppomobile.com', 38), 152 | ('istore.oppomobile.com', 38), 153 | ('opsapi.store.oppomobile.com', 34), 154 | ('api.cdo.oppomobile.com', 22), 155 | ('message.pull.oppomobile.com', 21), 156 | ('st.pull.oppomobile.com', 13), 157 | ('cdopic0.oppomobile.com', 9), 158 | ('newds01.myoppo.com', 9), 159 | ('httpdns.push.oppomobile.com', 4), 160 | ('conn1.oppomobile.com', 1), 161 | ('iopen.cdo.oppomobile.com', 1) 162 | ``` 163 | 164 | ### 最后 165 | 166 | > _吉利控股集团创始人、董事长李书福曾说 “现在的人几乎是全部透明的。我心里就想,马化腾肯定天天在看我们的微信,因为他都可以看的,随便看,这些问题非常大。”_ 167 | 168 | ### 完整代码 169 | 170 | [https://github.com/wangshub/tracking-my-phone](https://github.com/wangshub/tracking-my-phone) 171 | 172 | - 如果需要更为详细的数据,可以考虑使用 [mitmproxy](https://mitmproxy.org/) 代理,能够抓取 HTTPS 数据,并提供 Python API。 173 | 174 | ### 参考链接 175 | 176 | - [Tracking my phone's silent connections](https://kushaldas.in/posts/tracking-my-phone-s-silent-connections.html) 177 | - [ip2region: Ip2region is a offline IP location library](https://github.com/lionsoul2014/ip2region) 178 | - [Python Data Analysis Library](https://pandas.pydata.org/) 179 | - [Pyecharts: A Python Echarts Plotting Library.](https://pyecharts.org) 180 | -------------------------------------------------------------------------------- /chart_bar_company.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pyecharts import options as opts 5 | from pyecharts.charts import Geo, Page, Map, Pie, Bar, WordCloud 6 | from pyecharts.globals import ChartType, SymbolType 7 | 8 | 9 | class Collector: 10 | charts = [] 11 | 12 | @staticmethod 13 | def funcs(fn): 14 | Collector.charts.append((fn, fn.__name__)) 15 | 16 | 17 | C = Collector() 18 | 19 | data = [('googleapis.com', 79979), 20 | ('qq.com', 25716), 21 | ('bilibili.com', 11771), 22 | ('qpic.cn', 5735), 23 | ('google.com', 5207), 24 | ('amap.com', 3749), 25 | ('hdslb.com', 3693), 26 | ('zhihu.com', 2994), 27 | ('163.com', 2042), 28 | ('sogou.com', 1952), 29 | ('alipay.com', 1364), 30 | ('snssdk.com', 1317), 31 | ('meituan.com', 1298), 32 | ('p2cdn.com', 1287), 33 | ('baidu.com', 1274), 34 | ('other', 25598)] 35 | 36 | data.reverse() 37 | 38 | data_night = [('qq.com', 588), 39 | ('zhihu.com', 191), 40 | ('qpic.cn', 131), 41 | ('google.com', 120), 42 | ('sogou.com', 96), 43 | ('xiaojukeji.com', 75), 44 | ('amap.com', 59), 45 | ('qunar.com', 52), 46 | ('baidu.com', 47), 47 | ('com.cn', 36), 48 | ('qlogo.cn', 36), 49 | ('zhimg.com', 31), 50 | ('163.com', 30), 51 | ('didistatic.com', 16), 52 | ('vzuu.com', 16), 53 | ('jpush.cn', 15), 54 | ('crashlytics.com', 15), 55 | ('xdrig.com', 13), 56 | ('v2ex.com', 12), 57 | ('oneplus.cn', 11), 58 | ('bdstatic.com', 11), 59 | ('aliyuncs.com', 11), 60 | ('gstatic.com', 10), 61 | ('sinaimg.cn', 10), 62 | ('shuzilm.cn', 10), 63 | ('dida365.com', 9), 64 | ('biliapi.com', 8), 65 | ('imgur.com', 8), 66 | ('udache.com', 7), 67 | ('bilibili.com', 7)] 68 | data_night.reverse() 69 | 70 | data_cloud = [('googleapis', 79979), 71 | ('qq', 25716), 72 | ('bilibili', 11771), 73 | ('qpic', 5735), 74 | ('google', 5207), 75 | ('amap', 3749), 76 | ('hdslb', 3693), 77 | ('zhihu', 2994), 78 | ('163', 2042), 79 | ('sogou', 1952), 80 | ('alipay', 1364), 81 | ('snssdk', 1317), 82 | ('meituan', 1298), 83 | ('p2cdn', 1287), 84 | ('baidu', 1274), 85 | ('iqiyi', 1131), 86 | ('mobike', 1059), 87 | ('qlogo', 1016), 88 | ('biliapi', 955), 89 | ('acgvideo', 838), 90 | ('gstatic', 811), 91 | ('biliapi.net', 808), 92 | ('alicdn', 678), 93 | ('qy.net', 666), 94 | ('juejin.im', 650), 95 | ('com', 640), 96 | ('taobao', 610), 97 | ('huodongxing', 605), 98 | ('xycdn', 588), 99 | ('qunar', 583), 100 | ('jpush', 574), 101 | ('byteimg', 549), 102 | ('126.net', 515), 103 | ('bdurl.net', 482), 104 | ('xiaojukeji', 466), 105 | ('meituan.net', 462), 106 | ('dianping', 448), 107 | ('netease', 447), 108 | ('oppomobile', 389), 109 | ('10010', 379), 110 | ('alipayobjects', 358), 111 | ('amemv', 331), 112 | ('umeng', 318), 113 | ('dida365', 280), 114 | ('xdrig', 248), 115 | ('zhimg', 243), 116 | ('csg', 238), 117 | ('oneplus', 235), 118 | ('bytecdn', 233), 119 | ('xitu.io', 221), 120 | ('pstatp', 178), 121 | ('thinkhard.tech', 174), 122 | ('bdstatic', 166), 123 | ('weathercn', 164), 124 | ('aliyuncs', 160), 125 | ('teddymobile', 157), 126 | ('iqiyipic', 144), 127 | ('didistatic', 133), 128 | ('gvt2', 131), 129 | ('coolapk', 128), 130 | ('qchannel03', 124), 131 | ('nocode', 119), 132 | ('biligame', 117), 133 | ('crashlytics', 115), 134 | ('doubleclick.net', 113), 135 | ('google-analytics', 110), 136 | ('imgur', 110), 137 | ('ele.me', 105), 138 | ('aocde', 92), 139 | ('jd', 89), 140 | ('v2ex', 89), 141 | ('app-measurement', 88), 142 | ('sogoucdn', 87), 143 | ('tencent', 82), 144 | ('uc', 76), 145 | ('qinengkeji', 73), 146 | ('com.hk', 69), 147 | ('dpfile', 68), 148 | ('getui', 67), 149 | ('udache', 67), 150 | ('just4fun.site', 65), 151 | ('qunarzz', 64), 152 | ('leancloud', 58), 153 | ('lehuipay', 58), 154 | ('sinaimg', 57), 155 | ('bcebos', 54), 156 | ('githubusercontent', 53), 157 | ('irs01', 52), 158 | ('gtimg', 52), 159 | ('vzuu', 52), 160 | ('cnzz', 50), 161 | ('aliapp.org', 50), 162 | ('meijuniao.net', 49), 163 | ('shuzilm', 46), 164 | ('shouqianba', 45), 165 | ('unpkg', 45), 166 | ('growingio', 44), 167 | ('cmpassport', 42), 168 | ('h2os', 42), 169 | ('qiyi', 41), 170 | ('gtimg', 41), 171 | ('bdimg', 40), 172 | ('tmall', 40), 173 | ('autonavi', 39), 174 | ('mob', 39), 175 | ('avoscloud', 36), 176 | ('sf-express', 35), 177 | ('mmstat', 35), 178 | ('googlesyndication', 34), 179 | ('qqmail', 33), 180 | ('douban', 33), 181 | ('huoxing24', 33), 182 | ('ez4q2', 32), 183 | ('bsgslb', 32), 184 | ('51y5.net', 32), 185 | ('12306', 31), 186 | ('appsflyer', 31), 187 | ('ifengimg', 31), 188 | ('no4book', 31), 189 | ('googletagmanager', 29), 190 | ('jianshu', 28), 191 | ('shumafen', 28), 192 | ('facebook', 27), 193 | ('mydrivers', 27), 194 | ('xiaomi.net', 26), 195 | ('geekbang.org', 26), 196 | ('umengcloud', 25), 197 | ('myqcloud', 25), 198 | ('smzdm', 25), 199 | ('wjx.top', 24), 200 | ('github', 24), 201 | ('wm-motor', 24), 202 | ('didialift', 23), 203 | ('36kr', 23), 204 | ('codelab.club', 22), 205 | ('wezhuiyi', 22), 206 | ('ucweb', 22), 207 | ('toutiao', 22), 208 | ('cmfspay', 20), 209 | ('bootcss', 20), 210 | ('meizu', 19), 211 | ('toutiaocdn', 18), 212 | ('lmgtfy', 18), 213 | ('edu', 18), 214 | ('neixin', 17), 215 | ('xuebuyuan', 17), 216 | ('umsns', 17), 217 | ('elemecdn', 17), 218 | ('360buyimg', 17), 219 | ('feedblitz', 16), 220 | ('beijing-time.org', 15), 221 | ('servicewechat', 14), 222 | ('163yun', 14), 223 | ('cnblogs', 14), 224 | ('optimix.asia', 14), 225 | ('flyhand', 13), 226 | ('meiguanjia.net', 13), 227 | ('71edge', 13), 228 | ('luckincoffee', 13), 229 | ('xiumi.us', 12), 230 | ('githubassets', 12), 231 | ('qbox.me', 11), 232 | ('lejuhub', 11), 233 | ('csdnimg', 11), 234 | ('jomodns', 11), 235 | ('aliyun', 11), 236 | ('izatcloud.net', 11), 237 | ('qmlog', 11), 238 | ('myapp', 11), 239 | ('appinn.net', 11), 240 | ('weixinbridge', 10), 241 | ('1plus.io', 10), 242 | ('csdn.net', 10), 243 | ('qchannel01', 9), 244 | ('miaozhen', 9), 245 | ('weibo', 9), 246 | ('wangbase', 9), 247 | ('clmbtech', 9), 248 | ('youzan', 9), 249 | ('myoppo', 9), 250 | ('wojiazongguan', 9), 251 | ('360', 9), 252 | ('ifeng', 9), 253 | ('eqxiu', 8), 254 | ('openspeech', 8), 255 | ('127.net', 7), 256 | ('shimo.im', 7), 257 | ('typekit.net', 7), 258 | ('tanx', 7), 259 | ('url', 7), 260 | ('wm-imotor', 7), 261 | ('sharesmile', 7), 262 | ('leanplum', 7), 263 | ('idqqimg', 7), 264 | ('baidustatic', 7), 265 | ('iteye', 7), 266 | ('alibaba', 7), 267 | ('lkme.cc', 7), 268 | ('loli.net', 7), 269 | ('appjiagu', 7), 270 | ('chaihuo.org', 6), 271 | ('iprchn', 6), 272 | ('36krcnd', 6), 273 | ('ctrip', 6), 274 | ('qunarcdn', 6), 275 | ('disqus', 5), 276 | ('cytcm', 5), 277 | ('kuyun88', 5), 278 | ('mozilla', 5), 279 | ('3', 5), 280 | ('googletagservices', 5), 281 | ('googleadsserving', 5), 282 | ('ruanyifeng', 5), 283 | ('smartont.net', 4), 284 | ('gosmarthome', 4), 285 | ('bshare', 4), 286 | ('qiniucdn', 4), 287 | ('1688', 4), 288 | ('mediav', 4), 289 | ('mxplay', 4), 290 | ('ixiguavideo', 4), 291 | ('t', 4), 292 | ('189', 4), 293 | ('openinstall.io', 4), 294 | ('lyhanda', 4), 295 | ('optimix', 4), 296 | ('j2inter', 3), 297 | ('tv1box', 3), 298 | ('utteranc.es', 3), 299 | ('sankuai', 3), 300 | ('hdxu', 3), 301 | ('tddmp', 3), 302 | ('miaomiaoz', 3), 303 | ('appinn', 3), 304 | ('zdmimg', 3), 305 | ('inmobi', 3), 306 | ('youtube', 3), 307 | ('qcloud', 3), 308 | ('wm-motor', 3), 309 | ('mathjax.org', 3), 310 | ('taboola', 3), 311 | ('smcdn', 3), 312 | ('xiaomi', 3), 313 | ('youzanyun', 3), 314 | ('igexin', 3), 315 | ('qcloudimg', 3), 316 | ('gvt1', 2), 317 | ('weiyun', 2), 318 | ('tuyacn', 2), 319 | ('gemius.pl', 2), 320 | ('geektutu', 2), 321 | ('fugetech', 2), 322 | ('fastapi.net', 2), 323 | ('v2ex.co', 2), 324 | ('david-smith.org', 2), 325 | ('wjx', 2), 326 | ('chengzijianzhan', 2), 327 | ('wosaimg', 2), 328 | ('wzrkt', 2), 329 | ('xf-yun', 2), 330 | ('youku', 2), 331 | ('yunfengdie', 2), 332 | ('baifubao', 2), 333 | ('sohu', 2), 334 | ('adnxs', 2), 335 | ('52ecy', 2), 336 | ('rokid', 2), 337 | ('miaomiaozhe', 2), 338 | ('mzstatic', 2), 339 | ('adsrvr.org', 2), 340 | ('oneplusmobile', 2), 341 | ('adget', 2), 342 | ('lncld.net', 2), 343 | ('qipus', 2), 344 | ('jinshuju.net', 2), 345 | ('reachmax', 2), 346 | ('ming.today', 2), 347 | ('sensorsdata', 1), 348 | ('boip.net', 1), 349 | ('pinduoduo', 1), 350 | ('wosai', 1), 351 | ('inmobi', 1), 352 | ('mathtag', 1), 353 | ('highcharts', 1), 354 | ('advertising', 1), 355 | ('impact-ad.jp', 1), 356 | ('netease.im', 1), 357 | ('callget', 1), 358 | ('bttrack', 1), 359 | ('adjust', 1), 360 | ('mfadsrvr', 1), 361 | ('lnk0', 1), 362 | ('yahoo', 1), 363 | ('ydstatic', 1), 364 | ('snowballtech', 1), 365 | ('mh163k', 1), 366 | ('bidswitch.net', 1), 367 | ('svend.cc', 1), 368 | ('yqwyx.xyz', 1), 369 | ('mlinks.cc', 1), 370 | ('mozilla.org', 1), 371 | ('zenmxapps', 1), 372 | ('mozilla.net', 1), 373 | ('placeholder', 1), 374 | ('cloudflare', 1), 375 | ('scorecardresearch', 1), 376 | ('extraimage.net', 1), 377 | ('sc.gg', 1), 378 | ('ros.org', 1), 379 | ('guazi', 1), 380 | ('gio.ren', 1), 381 | ('testin', 1), 382 | ('geetest', 1), 383 | ('jiguang', 1), 384 | ('ugdtimg', 1), 385 | ('futunn', 1), 386 | ('jaeapp', 1), 387 | ('36krcdn', 1), 388 | ('duxiaoman', 1), 389 | ('shimodev', 1), 390 | ('akamaized.net', 1), 391 | ('jsdelivr.net', 1), 392 | ('dripemail2', 1), 393 | ('qnssl', 1), 394 | ('dingtalk', 1), 395 | ('aflink', 1), 396 | ('storygize.net', 1), 397 | ('kuaidadi', 1), 398 | ('shadowsocks.org', 1), 399 | ('qhres', 1), 400 | ('qhimg', 1), 401 | ('ictr', 1)] 402 | 403 | 404 | @C.funcs 405 | def pie_base() -> Pie: 406 | c = ( 407 | Pie() 408 | .add("", data) 409 | .set_global_opts(title_opts=opts.TitleOpts(title="")) 410 | .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}", font_size=18)) 411 | ) 412 | return c 413 | 414 | 415 | @C.funcs 416 | def bar_base() -> Bar: 417 | c = ( 418 | Bar(init_opts=opts.InitOpts(width="100%", height="800%")) 419 | .add_xaxis(xaxis_data=[x[0] for x in data]) 420 | .add_yaxis(series_name="所有公司排行", 421 | color='#59a2a7', 422 | yaxis_data=[x[1] for x in data]) 423 | .reversal_axis() 424 | .set_series_opts(label_opts=opts.LabelOpts(position="right")) 425 | 426 | ) 427 | return c 428 | 429 | 430 | @C.funcs 431 | def bar_base() -> Bar: 432 | c = ( 433 | Bar(init_opts=opts.InitOpts(width="100%", height="800%")) 434 | .add_xaxis(xaxis_data=[x[0] for x in data_night]) 435 | .add_yaxis(series_name="夜间活动排行", 436 | color='black', 437 | yaxis_data=[x[1] for x in data_night]) 438 | .reversal_axis() 439 | .set_series_opts(label_opts=opts.LabelOpts(position="right")) 440 | 441 | ) 442 | return c 443 | 444 | 445 | @C.funcs 446 | def wordcloud_base() -> WordCloud: 447 | c = ( 448 | WordCloud() 449 | .add("", data_cloud[1:], word_size_range=[12, 100], shape=SymbolType.DIAMOND) 450 | .set_global_opts(title_opts=opts.TitleOpts(title="词云")) 451 | ) 452 | return c 453 | 454 | 455 | data_oppo = [('epoch.cdo.oppomobile.com', 208), 456 | ('gslb.cdo.oppomobile.com', 38), 457 | ('istore.oppomobile.com', 38), 458 | ('opsapi.store.oppomobile.com', 34), 459 | ('api.cdo.oppomobile.com', 22), 460 | ('message.pull.oppomobile.com', 21), 461 | ('st.pull.oppomobile.com', 13), 462 | ('cdopic0.oppomobile.com', 9), 463 | ('newds01.myoppo.com', 9), 464 | ('httpdns.push.oppomobile.com', 4), 465 | ('conn1.oppomobile.com', 1), 466 | ('iopen.cdo.oppomobile.com', 1)] 467 | data_oppo.reverse() 468 | 469 | @C.funcs 470 | def bar_base() -> Bar: 471 | c = ( 472 | Bar(init_opts=opts.InitOpts(width="100%", height="800%")) 473 | .add_xaxis(xaxis_data=[x[0] for x in data_oppo]) 474 | .add_yaxis(series_name="oneplus and oppo", 475 | color='#009363', 476 | yaxis_data=[x[1] for x in data_oppo]) 477 | .reversal_axis() 478 | .set_series_opts(label_opts=opts.LabelOpts(position="right")) 479 | 480 | ) 481 | return c 482 | 483 | 484 | Page().add(*[fn() for fn, _ in C.charts]).render('./output/map_bar_company.html') 485 | -------------------------------------------------------------------------------- /chart_map_china.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pyecharts import options as opts 5 | from pyecharts.charts import Geo, Page, Map, Pie 6 | from pyecharts.globals import ChartType, SymbolType 7 | 8 | 9 | data = [('广东', 54558), 10 | ('台湾', 49564), 11 | ('上海', 37677), 12 | ('北京', 14690), 13 | ('浙江', 9062), 14 | ('江苏', 4226), 15 | ('河北', 4216), 16 | ('天津', 4017), 17 | ('福建', 2358), 18 | ('香港', 1667), 19 | ('山东', 1427), 20 | ('江西', 1119), 21 | ('湖南', 863), 22 | ('河南', 847), 23 | ('湖北', 377), 24 | ('黑龙江', 341), 25 | ('海南', 226), 26 | ('四川', 101), 27 | ('山西', 38), 28 | ('安徽', 36), 29 | ('辽宁', 34), 30 | ('陕西', 32), 31 | ('贵州', 28), 32 | ('吉林', 27), 33 | ('重庆', 1)] 34 | 35 | 36 | class Collector: 37 | charts = [] 38 | 39 | @staticmethod 40 | def funcs(fn): 41 | Collector.charts.append((fn, fn.__name__)) 42 | 43 | 44 | C = Collector() 45 | 46 | 47 | @C.funcs 48 | def map_world() -> Map: 49 | c = ( 50 | Map() 51 | .add("中国省份分布", data, "china") 52 | .set_global_opts( 53 | title_opts=opts.TitleOpts(title="Map-VisualMap(连续型)"), 54 | visualmap_opts=opts.VisualMapOpts(max_=54558), 55 | ) 56 | ) 57 | return c 58 | 59 | 60 | Page().add(*[fn() for fn, _ in C.charts]).render('./output/map_china_province.html') 61 | 62 | -------------------------------------------------------------------------------- /chart_map_country.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pyecharts import options as opts 5 | from pyecharts.charts import Geo, Page, Map, Pie 6 | 7 | data = [('China', 188080), 8 | ('United States', 86471), 9 | ('Others', 4041), 10 | ('Ireland', 366), 11 | ('Sweden', 262), 12 | ('Chile', 253), 13 | ('Belgium', 243), 14 | ('Singapore', 101), 15 | ('Canada', 72), 16 | ('Germany', 68), 17 | ('India', 54), 18 | ('Japan', 44), 19 | ('New Zealand', 2), 20 | ('Poland', 2)] 21 | 22 | data_zh = [('中国', 188080), 23 | ('美国', 86471), 24 | ('未知', 4041), 25 | ('爱尔兰', 366), 26 | ('瑞典', 262), 27 | ('智利', 253), 28 | ('比利时', 243), 29 | ('新加坡', 101), 30 | ('加拿大', 72), 31 | ('德国', 68), 32 | ('印度', 54), 33 | ('日本', 44), 34 | ('新西兰', 2), 35 | ('波兰', 2)] 36 | 37 | 38 | class Collector: 39 | charts = [] 40 | 41 | @staticmethod 42 | def funcs(fn): 43 | Collector.charts.append((fn, fn.__name__)) 44 | 45 | 46 | C = Collector() 47 | 48 | 49 | @C.funcs 50 | def map_world() -> Map: 51 | c = ( 52 | Map() 53 | .add("全球分布", data, "world") 54 | .set_series_opts(label_opts=opts.LabelOpts(is_show=False)) 55 | .set_global_opts( 56 | title_opts=opts.TitleOpts(title="Map-世界地图"), 57 | visualmap_opts=opts.VisualMapOpts(max_=200000), 58 | ) 59 | ) 60 | return c 61 | 62 | 63 | @C.funcs 64 | def bar_base() -> Pie: 65 | c = ( 66 | Pie(init_opts=opts.InitOpts(width="1000px", height="900px")) 67 | .add( 68 | series_name="国家分布", 69 | data_pair=data_zh, 70 | radius=["50%", "70%"], 71 | label_opts=opts.LabelOpts(is_show=True, position="layoutCenter"), 72 | ) 73 | .set_global_opts(legend_opts=opts.LegendOpts(pos_left="legft", orient="vertical")) 74 | .set_series_opts( 75 | tooltip_opts=opts.TooltipOpts( 76 | trigger="item", formatter="{a}
{b}: {c} ({d}%)" 77 | ), 78 | label_opts=opts.LabelOpts(formatter="{b}: {c}", font_size=18) 79 | ) 80 | ) 81 | 82 | return c 83 | 84 | 85 | Page().add(*[fn() for fn, _ in C.charts]).render('./output/global_country.html') 86 | -------------------------------------------------------------------------------- /chart_pie_operator.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pyecharts import options as opts 5 | from pyecharts.charts import Geo, Page, Map, Pie, Bar 6 | from pyecharts.globals import ChartType, SymbolType 7 | 8 | 9 | class Collector: 10 | charts = [] 11 | 12 | @staticmethod 13 | def funcs(fn): 14 | Collector.charts.append((fn, fn.__name__)) 15 | 16 | 17 | C = Collector() 18 | 19 | data = [('谷歌', 84301), 20 | ('电信', 76497), 21 | ('阿里巴巴', 6031), 22 | ('联通', 4318), 23 | ('未知', 1408), 24 | ('阿里云', 1003), 25 | ('移动', 606), 26 | ('脸书', 323), 27 | ('亚马逊', 222), 28 | ('内网IP', 210), 29 | ('阿卡迈', 22), 30 | ('教育网', 18), 31 | ('层峰网络', 7), 32 | ('香港宽频', 7), 33 | ('沃达丰', 2), 34 | ('Hurricane-Electric', 1)] 35 | 36 | 37 | @C.funcs 38 | def pie_base() -> Pie: 39 | c = ( 40 | Pie(init_opts=opts.InitOpts(width="53%", height="1100%")) 41 | .add("", data) 42 | .set_global_opts(title_opts=opts.TitleOpts(title="")) 43 | .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%", font_size=16)) 44 | ) 45 | return c 46 | 47 | 48 | Page().add(*[fn() for fn, _ in C.charts]).render('./output/map_pie_operator.html') 49 | -------------------------------------------------------------------------------- /chart_pie_ports.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from pyecharts import options as opts 5 | from pyecharts.charts import Geo, Page, Map, Pie, Bar 6 | from pyecharts.globals import ChartType, SymbolType 7 | 8 | 9 | class Collector: 10 | charts = [] 11 | 12 | @staticmethod 13 | def funcs(fn): 14 | Collector.charts.append((fn, fn.__name__)) 15 | 16 | 17 | C = Collector() 18 | 19 | data = [(443, 145842), 20 | (53, 37725), 21 | (80, 47873), 22 | (53, 12649), 23 | (8080, 11255), 24 | (5228, 5581), 25 | (8081, 1881), 26 | (9900, 1576), 27 | (39620, 1321), 28 | (9800, 1068), 29 | (7006, 959), 30 | ('other', 12329)] 31 | 32 | 33 | @C.funcs 34 | def pie_base() -> Pie: 35 | c = ( 36 | Pie() 37 | .add("", data) 38 | .set_global_opts(title_opts=opts.TitleOpts(title="")) 39 | .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%", font_size=16)) 40 | ) 41 | return c 42 | 43 | 44 | Page().add(*[fn() for fn, _ in C.charts]).render('./output/map_pie_ports.html') 45 | -------------------------------------------------------------------------------- /font/Muyao-Softbrush.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshub/tracking-my-phone/4d8e3d69416372e202601a4fd68026d4f7eb7074/font/Muyao-Softbrush.ttf -------------------------------------------------------------------------------- /geo/GeoLite2-City.mmdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshub/tracking-my-phone/4d8e3d69416372e202601a4fd68026d4f7eb7074/geo/GeoLite2-City.mmdb -------------------------------------------------------------------------------- /geo/GeoLite2-Country.mmdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshub/tracking-my-phone/4d8e3d69416372e202601a4fd68026d4f7eb7074/geo/GeoLite2-Country.mmdb -------------------------------------------------------------------------------- /geo/ip2region.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshub/tracking-my-phone/4d8e3d69416372e202601a4fd68026d4f7eb7074/geo/ip2region.db -------------------------------------------------------------------------------- /ip2Region.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | " ip2region python seacher client module 4 | " 5 | " Author: koma 6 | " Date : 2015-11-06 7 | """ 8 | import struct, io, socket, sys 9 | 10 | 11 | class Ip2Region(object): 12 | __INDEX_BLOCK_LENGTH = 12 13 | __TOTAL_HEADER_LENGTH = 8192 14 | 15 | __f = None 16 | __headerSip = [] 17 | __headerPtr = [] 18 | __headerLen = 0 19 | __indexSPtr = 0 20 | __indexLPtr = 0 21 | __indexCount = 0 22 | __dbBinStr = '' 23 | 24 | def __init__(self, dbfile): 25 | self.initDatabase(dbfile) 26 | 27 | def memorySearch(self, ip): 28 | """ 29 | " memory search method 30 | " param: ip 31 | """ 32 | if not ip.isdigit(): ip = self.ip2long(ip) 33 | 34 | if self.__dbBinStr == '': 35 | self.__dbBinStr = self.__f.read() # read all the contents in file 36 | self.__indexSPtr = self.getLong(self.__dbBinStr, 0) 37 | self.__indexLPtr = self.getLong(self.__dbBinStr, 4) 38 | self.__indexCount = int((self.__indexLPtr - self.__indexSPtr) / self.__INDEX_BLOCK_LENGTH) + 1 39 | 40 | l, h, dataPtr = (0, self.__indexCount, 0) 41 | while l <= h: 42 | m = int((l + h) >> 1) 43 | p = self.__indexSPtr + m * self.__INDEX_BLOCK_LENGTH 44 | sip = self.getLong(self.__dbBinStr, p) 45 | 46 | if ip < sip: 47 | h = m - 1 48 | else: 49 | eip = self.getLong(self.__dbBinStr, p + 4) 50 | if ip > eip: 51 | l = m + 1; 52 | else: 53 | dataPtr = self.getLong(self.__dbBinStr, p + 8) 54 | break 55 | 56 | if dataPtr == 0: raise Exception("Data pointer not found") 57 | 58 | return self.returnData(dataPtr) 59 | 60 | def binarySearch(self, ip): 61 | """ 62 | " binary search method 63 | " param: ip 64 | """ 65 | if not ip.isdigit(): ip = self.ip2long(ip) 66 | 67 | if self.__indexCount == 0: 68 | self.__f.seek(0) 69 | superBlock = self.__f.read(8) 70 | self.__indexSPtr = self.getLong(superBlock, 0) 71 | self.__indexLPtr = self.getLong(superBlock, 4) 72 | self.__indexCount = int((self.__indexLPtr - self.__indexSPtr) / self.__INDEX_BLOCK_LENGTH) + 1 73 | 74 | l, h, dataPtr = (0, self.__indexCount, 0) 75 | while l <= h: 76 | m = int((l + h) >> 1) 77 | p = m * self.__INDEX_BLOCK_LENGTH 78 | 79 | self.__f.seek(self.__indexSPtr + p) 80 | buffer = self.__f.read(self.__INDEX_BLOCK_LENGTH) 81 | sip = self.getLong(buffer, 0) 82 | if ip < sip: 83 | h = m - 1 84 | else: 85 | eip = self.getLong(buffer, 4) 86 | if ip > eip: 87 | l = m + 1 88 | else: 89 | dataPtr = self.getLong(buffer, 8) 90 | break 91 | 92 | if dataPtr == 0: raise Exception("Data pointer not found") 93 | 94 | return self.returnData(dataPtr) 95 | 96 | def btreeSearch(self, ip): 97 | """ 98 | " b-tree search method 99 | " param: ip 100 | """ 101 | if not ip.isdigit(): ip = self.ip2long(ip) 102 | 103 | if len(self.__headerSip) < 1: 104 | headerLen = 0 105 | # pass the super block 106 | self.__f.seek(8) 107 | # read the header block 108 | b = self.__f.read(self.__TOTAL_HEADER_LENGTH) 109 | # parse the header block 110 | for i in range(0, len(b), 8): 111 | sip = self.getLong(b, i) 112 | ptr = self.getLong(b, i + 4) 113 | if ptr == 0: 114 | break 115 | self.__headerSip.append(sip) 116 | self.__headerPtr.append(ptr) 117 | headerLen += 1 118 | self.__headerLen = headerLen 119 | 120 | l, h, sptr, eptr = (0, self.__headerLen, 0, 0) 121 | while l <= h: 122 | m = int((l + h) >> 1) 123 | 124 | if ip == self.__headerSip[m]: 125 | if m > 0: 126 | sptr = self.__headerPtr[m - 1] 127 | eptr = self.__headerPtr[m] 128 | else: 129 | sptr = self.__headerPtr[m] 130 | eptr = self.__headerPtr[m + 1] 131 | break 132 | 133 | if ip < self.__headerSip[m]: 134 | if m == 0: 135 | sptr = self.__headerPtr[m] 136 | eptr = self.__headerPtr[m + 1] 137 | break 138 | elif ip > self.__headerSip[m - 1]: 139 | sptr = self.__headerPtr[m - 1] 140 | eptr = self.__headerPtr[m] 141 | break 142 | h = m - 1 143 | else: 144 | if m == self.__headerLen - 1: 145 | sptr = self.__headerPtr[m - 1] 146 | eptr = self.__headerPtr[m] 147 | break 148 | elif ip <= self.__headerSip[m + 1]: 149 | sptr = self.__headerPtr[m] 150 | eptr = self.__headerPtr[m + 1] 151 | break 152 | l = m + 1 153 | 154 | if sptr == 0: raise Exception("Index pointer not found") 155 | 156 | indexLen = eptr - sptr 157 | self.__f.seek(sptr) 158 | index = self.__f.read(indexLen + self.__INDEX_BLOCK_LENGTH) 159 | 160 | l, h, dataPrt = (0, int(indexLen / self.__INDEX_BLOCK_LENGTH), 0) 161 | while l <= h: 162 | m = int((l + h) >> 1) 163 | offset = int(m * self.__INDEX_BLOCK_LENGTH) 164 | sip = self.getLong(index, offset) 165 | 166 | if ip < sip: 167 | h = m - 1 168 | else: 169 | eip = self.getLong(index, offset + 4) 170 | if ip > eip: 171 | l = m + 1; 172 | else: 173 | dataPrt = self.getLong(index, offset + 8) 174 | break 175 | 176 | if dataPrt == 0: raise Exception("Data pointer not found") 177 | 178 | return self.returnData(dataPrt) 179 | 180 | def initDatabase(self, dbfile): 181 | """ 182 | " initialize the database for search 183 | " param: dbFile 184 | """ 185 | try: 186 | self.__f = io.open(dbfile, "rb") 187 | except IOError as e: 188 | print("[Error]: %s" % e) 189 | sys.exit() 190 | 191 | def returnData(self, dataPtr): 192 | """ 193 | " get ip data from db file by data start ptr 194 | " param: dsptr 195 | """ 196 | dataLen = (dataPtr >> 24) & 0xFF 197 | dataPtr = dataPtr & 0x00FFFFFF 198 | 199 | self.__f.seek(dataPtr) 200 | data = self.__f.read(dataLen) 201 | 202 | return { 203 | "city_id": self.getLong(data, 0), 204 | "region": data[4:] 205 | } 206 | 207 | def ip2long(self, ip): 208 | _ip = socket.inet_aton(ip) 209 | return struct.unpack("!L", _ip)[0] 210 | 211 | def isip(self, ip): 212 | p = ip.split(".") 213 | 214 | if len(p) != 4: return False 215 | for pp in p: 216 | if not pp.isdigit(): return False 217 | if len(pp) > 3: return False 218 | if int(pp) > 255: return False 219 | 220 | return True 221 | 222 | def getLong(self, b, offset): 223 | if len(b[offset:offset + 4]) == 4: 224 | return struct.unpack('I', b[offset:offset + 4])[0] 225 | return 0 226 | 227 | def close(self): 228 | if self.__f != None: 229 | self.__f.close() 230 | 231 | self.__dbBinStr = None 232 | self.__headerPtr = None 233 | self.__headerSip = None 234 | -------------------------------------------------------------------------------- /ipgeo.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from ip2Region import Ip2Region 3 | import socket 4 | import time 5 | 6 | 7 | def domain_to_ip(domain): 8 | # print('parsing {}'.format(domain)) 9 | return socket.gethostbyname(domain) 10 | 11 | 12 | class IpGeo: 13 | def __init__(self, db_file): 14 | self.searcher = Ip2Region(db_file) 15 | 16 | def find(self, ip): 17 | result = {} 18 | try: 19 | if not self.searcher.isip(ip): 20 | ip = domain_to_ip(ip) 21 | 22 | data = self.searcher.btreeSearch(ip) 23 | loc = data["region"].decode('utf-8').split('|') 24 | result['ip'] = ip 25 | result['city_id'] = data["city_id"] 26 | result['country'] = loc[0] 27 | result['province'] = loc[2] 28 | result['city'] = loc[3] 29 | result['operator'] = loc[4] 30 | except Exception as err: 31 | print(err, ip) 32 | return result 33 | 34 | def close(self): 35 | self.searcher.close() 36 | 37 | 38 | if __name__ == '__main__': 39 | ipgeo = IpGeo('./geo/ip2region.db') 40 | result = ipgeo.find('www.baidu.com') 41 | print(result) 42 | ipgeo.close() 43 | -------------------------------------------------------------------------------- /trim_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from ipgeo import IpGeo 3 | 4 | 5 | def read_log_file(fname, out_csv): 6 | str_flag = 'connect to' 7 | print('reading raw text {}'.format(fname)) 8 | with open(fname, 'r') as fp: 9 | text = fp.readlines() 10 | 11 | content = [x.strip() for x in text] 12 | filterd_lines = list(filter(lambda x: str_flag in x, content)) 13 | print('found {} connect to requests in total'.format(len(filterd_lines))) 14 | filterd_lines = list(map(lambda x: x.split(' '), filterd_lines)) 15 | 16 | list_day = list(map(lambda x: x[1], filterd_lines)) 17 | list_time = list(map(lambda x: x[2], filterd_lines)) 18 | list_url = list(map(lambda x: x[7], filterd_lines)) 19 | list_domain = list(map(lambda x: x.split(':')[0], list_url)) 20 | list_port = list(map(lambda x: x.split(':')[1], list_url)) 21 | 22 | ipgeo = IpGeo('./geo/ip2region.db') 23 | 24 | list_ipgeo = list(map(lambda x: ipgeo.find(x), list_domain)) 25 | list_ip4 = list(map(lambda x: x.get('ip'), list_ipgeo)) 26 | list_city_id = list(map(lambda x: x.get('city_id'), list_ipgeo)) 27 | list_country = list(map(lambda x: x.get('country'), list_ipgeo)) 28 | list_province = list(map(lambda x: x.get('province'), list_ipgeo)) 29 | list_city = list(map(lambda x: x.get('city'), list_ipgeo)) 30 | list_operator = list(map(lambda x: x.get('operator'), list_ipgeo)) 31 | 32 | df = pd.DataFrame({ 33 | 'day': list_day, 34 | 'time': list_time, 35 | 'domain': list_domain, 36 | 'ip': list_ip4, 37 | 'port': list_port, 38 | 'city_id': list_city_id, 39 | 'country': list_country, 40 | 'province': list_province, 41 | 'city': list_city, 42 | 'operator': list_operator 43 | }) 44 | df.to_csv(out_csv, index=False) 45 | print('saved to {}'.format(out_csv)) 46 | 47 | 48 | if __name__ == '__main__': 49 | log_file = './data/logs.txt' 50 | out_csv = './data/logs.csv' 51 | read_log_file(log_file, out_csv) 52 | 53 | --------------------------------------------------------------------------------