├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── doc └── 说明.md ├── proxy_pool ├── config.ini.default ├── data │ ├── squid.conf │ └── squid.conf.default ├── manager.py ├── proxy_pool │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── models.py │ ├── pipelines.py │ ├── script │ │ ├── __init__.py │ │ ├── check_proxy.py │ │ └── sync_squid.py │ ├── settings.py │ ├── spiders │ │ ├── __init__.py │ │ ├── ihuan.py │ │ ├── ip66.py │ │ ├── kuaidaili.py │ │ └── xici.py │ └── utils.py └── scrapy.cfg └── readme └── 1.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | proxy.db 107 | test.py 108 | config.ini -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.mirrors.ustc.edu.cn/simple/" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | scrapy = "*" 8 | arrow = "*" 9 | click = "*" 10 | requests = ">=2.20.0" 11 | sqlalchemy = ">=1.3.0" 12 | pymysql = "*" 13 | ipython = "*" 14 | 15 | [dev-packages] 16 | yapf = "*" 17 | pylint = "*" 18 | 19 | [requires] 20 | python_version = "3.6" 21 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "69c99def4e3846a06bda5343092c6022f28742388b63caafd8f3926fb7e173f3" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.6" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.mirrors.ustc.edu.cn/simple/", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "appnope": { 20 | "hashes": [ 21 | "sha256:5b26757dc6f79a3b7dc9fab95359328d5747fcb2409d331ea66d0272b90ab2a0", 22 | "sha256:8b995ffe925347a2138d7ac0fe77155e4311a0ea6d6da4f5128fe4b3cbe5ed71" 23 | ], 24 | "markers": "sys_platform == 'darwin'", 25 | "version": "==0.1.0" 26 | }, 27 | "arrow": { 28 | "hashes": [ 29 | "sha256:a558d3b7b6ce7ffc74206a86c147052de23d3d4ef0e17c210dd478c53575c4cd" 30 | ], 31 | "index": "pypi", 32 | "version": "==0.12.1" 33 | }, 34 | "asn1crypto": { 35 | "hashes": [ 36 | "sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87", 37 | "sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49" 38 | ], 39 | "version": "==0.24.0" 40 | }, 41 | "attrs": { 42 | "hashes": [ 43 | "sha256:10cbf6e27dbce8c30807caf056c8eb50917e0eaafe86347671b57254006c3e69", 44 | "sha256:ca4be454458f9dec299268d472aaa5a11f67a4ff70093396e1ceae9c76cf4bbb" 45 | ], 46 | "version": "==18.2.0" 47 | }, 48 | "automat": { 49 | "hashes": [ 50 | "sha256:cbd78b83fa2d81fe2a4d23d258e1661dd7493c9a50ee2f1a5b2cac61c1793b0e", 51 | "sha256:fdccab66b68498af9ecfa1fa43693abe546014dd25cf28543cbe9d1334916a58" 52 | ], 53 | "version": "==0.7.0" 54 | }, 55 | "backcall": { 56 | "hashes": [ 57 | "sha256:38ecd85be2c1e78f77fd91700c76e14667dc21e2713b63876c0eb901196e01e4", 58 | "sha256:bbbf4b1e5cd2bdb08f915895b51081c041bac22394fdfcfdfbe9f14b77c08bf2" 59 | ], 60 | "version": "==0.1.0" 61 | }, 62 | "certifi": { 63 | "hashes": [ 64 | "sha256:376690d6f16d32f9d1fe8932551d80b23e9d393a8578c5633a2ed39a64861638", 65 | "sha256:456048c7e371c089d0a77a5212fb37a2c2dce1e24146e3b7e0261736aaeaa22a" 66 | ], 67 | "version": "==2018.8.24" 68 | }, 69 | "cffi": { 70 | "hashes": [ 71 | "sha256:151b7eefd035c56b2b2e1eb9963c90c6302dc15fbd8c1c0a83a163ff2c7d7743", 72 | "sha256:1553d1e99f035ace1c0544050622b7bc963374a00c467edafac50ad7bd276aef", 73 | "sha256:1b0493c091a1898f1136e3f4f991a784437fac3673780ff9de3bcf46c80b6b50", 74 | "sha256:2ba8a45822b7aee805ab49abfe7eec16b90587f7f26df20c71dd89e45a97076f", 75 | "sha256:3bb6bd7266598f318063e584378b8e27c67de998a43362e8fce664c54ee52d30", 76 | "sha256:3c85641778460581c42924384f5e68076d724ceac0f267d66c757f7535069c93", 77 | "sha256:3eb6434197633b7748cea30bf0ba9f66727cdce45117a712b29a443943733257", 78 | "sha256:495c5c2d43bf6cebe0178eb3e88f9c4aa48d8934aa6e3cddb865c058da76756b", 79 | "sha256:4c91af6e967c2015729d3e69c2e51d92f9898c330d6a851bf8f121236f3defd3", 80 | "sha256:57b2533356cb2d8fac1555815929f7f5f14d68ac77b085d2326b571310f34f6e", 81 | "sha256:770f3782b31f50b68627e22f91cb182c48c47c02eb405fd689472aa7b7aa16dc", 82 | "sha256:79f9b6f7c46ae1f8ded75f68cf8ad50e5729ed4d590c74840471fc2823457d04", 83 | "sha256:7a33145e04d44ce95bcd71e522b478d282ad0eafaf34fe1ec5bbd73e662f22b6", 84 | "sha256:857959354ae3a6fa3da6651b966d13b0a8bed6bbc87a0de7b38a549db1d2a359", 85 | "sha256:87f37fe5130574ff76c17cab61e7d2538a16f843bb7bca8ebbc4b12de3078596", 86 | "sha256:95d5251e4b5ca00061f9d9f3d6fe537247e145a8524ae9fd30a2f8fbce993b5b", 87 | "sha256:9d1d3e63a4afdc29bd76ce6aa9d58c771cd1599fbba8cf5057e7860b203710dd", 88 | "sha256:a36c5c154f9d42ec176e6e620cb0dd275744aa1d804786a71ac37dc3661a5e95", 89 | "sha256:a6a5cb8809091ec9ac03edde9304b3ad82ad4466333432b16d78ef40e0cce0d5", 90 | "sha256:ae5e35a2c189d397b91034642cb0eab0e346f776ec2eb44a49a459e6615d6e2e", 91 | "sha256:b0f7d4a3df8f06cf49f9f121bead236e328074de6449866515cea4907bbc63d6", 92 | "sha256:b75110fb114fa366b29a027d0c9be3709579602ae111ff61674d28c93606acca", 93 | "sha256:ba5e697569f84b13640c9e193170e89c13c6244c24400fc57e88724ef610cd31", 94 | "sha256:be2a9b390f77fd7676d80bc3cdc4f8edb940d8c198ed2d8c0be1319018c778e1", 95 | "sha256:ca1bd81f40adc59011f58159e4aa6445fc585a32bb8ac9badf7a2c1aa23822f2", 96 | "sha256:d5d8555d9bfc3f02385c1c37e9f998e2011f0db4f90e250e5bc0c0a85a813085", 97 | "sha256:e55e22ac0a30023426564b1059b035973ec82186ddddbac867078435801c7801", 98 | "sha256:e90f17980e6ab0f3c2f3730e56d1fe9bcba1891eeea58966e89d352492cc74f4", 99 | "sha256:ecbb7b01409e9b782df5ded849c178a0aa7c906cf8c5a67368047daab282b184", 100 | "sha256:ed01918d545a38998bfa5902c7c00e0fee90e957ce036a4000a88e3fe2264917", 101 | "sha256:edabd457cd23a02965166026fd9bfd196f4324fe6032e866d0f3bd0301cd486f", 102 | "sha256:fdf1c1dc5bafc32bc5d08b054f94d659422b05aba244d6be4ddc1c72d9aa70fb" 103 | ], 104 | "version": "==1.11.5" 105 | }, 106 | "chardet": { 107 | "hashes": [ 108 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 109 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 110 | ], 111 | "version": "==3.0.4" 112 | }, 113 | "click": { 114 | "hashes": [ 115 | "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", 116 | "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" 117 | ], 118 | "index": "pypi", 119 | "version": "==6.7" 120 | }, 121 | "constantly": { 122 | "hashes": [ 123 | "sha256:586372eb92059873e29eba4f9dec8381541b4d3834660707faf8ba59146dfc35", 124 | "sha256:dd2fa9d6b1a51a83f0d7dd76293d734046aa176e384bf6e33b7e44880eb37c5d" 125 | ], 126 | "version": "==15.1.0" 127 | }, 128 | "cryptography": { 129 | "hashes": [ 130 | "sha256:02602e1672b62e803e08617ec286041cc453e8d43f093a5f4162095506bc0beb", 131 | "sha256:10b48e848e1edb93c1d3b797c83c72b4c387ab0eb4330aaa26da8049a6cbede0", 132 | "sha256:17db09db9d7c5de130023657be42689d1a5f60502a14f6f745f6f65a6b8195c0", 133 | "sha256:227da3a896df1106b1a69b1e319dce218fa04395e8cc78be7e31ca94c21254bc", 134 | "sha256:2cbaa03ac677db6c821dac3f4cdfd1461a32d0615847eedbb0df54bb7802e1f7", 135 | "sha256:31db8febfc768e4b4bd826750a70c79c99ea423f4697d1dab764eb9f9f849519", 136 | "sha256:4a510d268e55e2e067715d728e4ca6cd26a8e9f1f3d174faf88e6f2cb6b6c395", 137 | "sha256:6a88d9004310a198c474d8a822ee96a6dd6c01efe66facdf17cb692512ae5bc0", 138 | "sha256:76936ec70a9b72eb8c58314c38c55a0336a2b36de0c7ee8fb874a4547cadbd39", 139 | "sha256:7e3b4aecc4040928efa8a7cdaf074e868af32c58ffc9bb77e7bf2c1a16783286", 140 | "sha256:8168bcb08403ef144ff1fb880d416f49e2728101d02aaadfe9645883222c0aa5", 141 | "sha256:8229ceb79a1792823d87779959184a1bf95768e9248c93ae9f97c7a2f60376a1", 142 | "sha256:8a19e9f2fe69f6a44a5c156968d9fc8df56d09798d0c6a34ccc373bb186cee86", 143 | "sha256:8d10113ca826a4c29d5b85b2c4e045ffa8bad74fb525ee0eceb1d38d4c70dfd6", 144 | "sha256:be495b8ec5a939a7605274b6e59fbc35e76f5ad814ae010eb679529671c9e119", 145 | "sha256:dc2d3f3b1548f4d11786616cf0f4415e25b0fbecb8a1d2cd8c07568f13fdde38", 146 | "sha256:e4aecdd9d5a3d06c337894c9a6e2961898d3f64fe54ca920a72234a3de0f9cb3", 147 | "sha256:e79ab4485b99eacb2166f3212218dd858258f374855e1568f728462b0e6ee0d9", 148 | "sha256:f995d3667301e1754c57b04e0bae6f0fa9d710697a9f8d6712e8cca02550910f" 149 | ], 150 | "version": "==2.3.1" 151 | }, 152 | "cssselect": { 153 | "hashes": [ 154 | "sha256:066d8bc5229af09617e24b3ca4d52f1f9092d9e061931f4184cd572885c23204", 155 | "sha256:3b5103e8789da9e936a68d993b70df732d06b8bb9a337a05ed4eb52c17ef7206" 156 | ], 157 | "markers": "python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*' and python_version != '3.3.*' and python_version >= '2.7'", 158 | "version": "==1.0.3" 159 | }, 160 | "decorator": { 161 | "hashes": [ 162 | "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82", 163 | "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c" 164 | ], 165 | "version": "==4.3.0" 166 | }, 167 | "hyperlink": { 168 | "hashes": [ 169 | "sha256:98da4218a56b448c7ec7d2655cb339af1f7d751cf541469bb4fc28c4a4245b34", 170 | "sha256:f01b4ff744f14bc5d0a22a6b9f1525ab7d6312cb0ff967f59414bbac52f0a306" 171 | ], 172 | "version": "==18.0.0" 173 | }, 174 | "idna": { 175 | "hashes": [ 176 | "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e", 177 | "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16" 178 | ], 179 | "version": "==2.7" 180 | }, 181 | "incremental": { 182 | "hashes": [ 183 | "sha256:717e12246dddf231a349175f48d74d93e2897244939173b01974ab6661406b9f", 184 | "sha256:7b751696aaf36eebfab537e458929e194460051ccad279c72b755a167eebd4b3" 185 | ], 186 | "version": "==17.5.0" 187 | }, 188 | "ipython": { 189 | "hashes": [ 190 | "sha256:007dcd929c14631f83daff35df0147ea51d1af420da303fd078343878bd5fb62", 191 | "sha256:b0f2ef9eada4a68ef63ee10b6dde4f35c840035c50fd24265f8052c98947d5a4" 192 | ], 193 | "index": "pypi", 194 | "version": "==6.5.0" 195 | }, 196 | "ipython-genutils": { 197 | "hashes": [ 198 | "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8", 199 | "sha256:eb2e116e75ecef9d4d228fdc66af54269afa26ab4463042e33785b887c628ba8" 200 | ], 201 | "version": "==0.2.0" 202 | }, 203 | "jedi": { 204 | "hashes": [ 205 | "sha256:b409ed0f6913a701ed474a614a3bb46e6953639033e31f769ca7581da5bd1ec1", 206 | "sha256:c254b135fb39ad76e78d4d8f92765ebc9bf92cbc76f49e97ade1d5f5121e1f6f" 207 | ], 208 | "version": "==0.12.1" 209 | }, 210 | "lxml": { 211 | "hashes": [ 212 | "sha256:02bc220d61f46e9b9d5a53c361ef95e9f5e1d27171cd461dddb17677ae2289a5", 213 | "sha256:22f253b542a342755f6cfc047fe4d3a296515cf9b542bc6e261af45a80b8caf6", 214 | "sha256:2f31145c7ff665b330919bfa44aacd3a0211a76ca7e7b441039d2a0b0451e415", 215 | "sha256:36720698c29e7a9626a0dc802ef8885f8f0239bfd1689628ecd459a061f2807f", 216 | "sha256:438a1b0203545521f6616132bfe0f4bca86f8a401364008b30e2b26ec408ce85", 217 | "sha256:4815892904c336bbaf73dafd54f45f69f4021c22b5bad7332176bbf4fb830568", 218 | "sha256:5be031b0f15ad63910d8e5038b489d95a79929513b3634ad4babf77100602588", 219 | "sha256:5c93ae37c3c588e829b037fdfbd64a6e40c901d3f93f7beed6d724c44829a3ad", 220 | "sha256:60842230678674cdac4a1cf0f707ef12d75b9a4fc4a565add4f710b5fcf185d5", 221 | "sha256:62939a8bb6758d1bf923aa1c13f0bcfa9bf5b2fc0f5fa917a6e25db5fe0cfa4e", 222 | "sha256:75830c06a62fe7b8fe3bbb5f269f0b308f19f3949ac81cfd40062f47c1455faf", 223 | "sha256:81992565b74332c7c1aff6a913a3e906771aa81c9d0c68c68113cffcae45bc53", 224 | "sha256:8c892fb0ee52c594d9a7751c7d7356056a9682674b92cc1c4dc968ff0f30c52f", 225 | "sha256:9d862e3cf4fc1f2837dedce9c42269c8c76d027e49820a548ac89fdcee1e361f", 226 | "sha256:a623965c086a6e91bb703d4da62dabe59fe88888e82c4117d544e11fd74835d6", 227 | "sha256:a7783ab7f6a508b0510490cef9f857b763d796ba7476d9703f89722928d1e113", 228 | "sha256:aab09fbe8abfa3b9ce62aaf45aca2d28726b1b9ee44871dbe644050a2fff4940", 229 | "sha256:abf181934ac3ef193832fb973fd7f6149b5c531903c2ec0f1220941d73eee601", 230 | "sha256:ae07fa0c115733fce1e9da96a3ac3fa24801742ca17e917e0c79d63a01eeb843", 231 | "sha256:b9c78242219f674ab645ec571c9a95d70f381319a23911941cd2358a8e0521cf", 232 | "sha256:bccb267678b870d9782c3b44d0cefe3ba0e329f9af8c946d32bf3778e7a4f271", 233 | "sha256:c4df4d27f4c93b2cef74579f00b1d3a31a929c7d8023f870c4b476f03a274db4", 234 | "sha256:caf0e50b546bb60dfa99bb18dfa6748458a83131ecdceaf5c071d74907e7e78a", 235 | "sha256:d3266bd3ac59ac4edcd5fa75165dee80b94a3e5c91049df5f7c057ccf097551c", 236 | "sha256:db0d213987bcd4e6d41710fb4532b22315b0d8fb439ff901782234456556aed1", 237 | "sha256:dbbd5cf7690a40a9f0a9325ab480d0fccf46d16b378eefc08e195d84299bfae1", 238 | "sha256:e16e07a0ec3a75b5ee61f2b1003c35696738f937dc8148fbda9fe2147ccb6e61", 239 | "sha256:e175a006725c7faadbe69e791877d09936c0ef2cf49d01b60a6c1efcb0e8be6f", 240 | "sha256:edd9c13a97f6550f9da2236126bb51c092b3b1ce6187f2bd966533ad794bbb5e", 241 | "sha256:fa39ea60d527fbdd94215b5e5552f1c6a912624521093f1384a491a8ad89ad8b" 242 | ], 243 | "version": "==4.2.5" 244 | }, 245 | "parsel": { 246 | "hashes": [ 247 | "sha256:a4d581260eb845a762b9a354b0fc5e1c5c42df009dc8163c181097bd5314db58", 248 | "sha256:b24618fe81dce29d717aa8c4a9534c46e807dd6a5c8d5e1bb3b1fdb3fbd22b56" 249 | ], 250 | "markers": "python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*' and python_version != '3.3.*' and python_version >= '2.7'", 251 | "version": "==1.5.0" 252 | }, 253 | "parso": { 254 | "hashes": [ 255 | "sha256:35704a43a3c113cce4de228ddb39aab374b8004f4f2407d070b6a2ca784ce8a2", 256 | "sha256:895c63e93b94ac1e1690f5fdd40b65f07c8171e3e53cbd7793b5b96c0e0a7f24" 257 | ], 258 | "version": "==0.3.1" 259 | }, 260 | "pexpect": { 261 | "hashes": [ 262 | "sha256:2a8e88259839571d1251d278476f3eec5db26deb73a70be5ed5dc5435e418aba", 263 | "sha256:3fbd41d4caf27fa4a377bfd16fef87271099463e6fa73e92a52f92dfee5d425b" 264 | ], 265 | "markers": "sys_platform != 'win32'", 266 | "version": "==4.6.0" 267 | }, 268 | "pickleshare": { 269 | "hashes": [ 270 | "sha256:84a9257227dfdd6fe1b4be1319096c20eb85ff1e82c7932f36efccfe1b09737b", 271 | "sha256:c9a2541f25aeabc070f12f452e1f2a8eae2abd51e1cd19e8430402bdf4c1d8b5" 272 | ], 273 | "version": "==0.7.4" 274 | }, 275 | "prompt-toolkit": { 276 | "hashes": [ 277 | "sha256:1df952620eccb399c53ebb359cc7d9a8d3a9538cb34c5a1344bdbeb29fbcc381", 278 | "sha256:3f473ae040ddaa52b52f97f6b4a493cfa9f5920c255a12dc56a7d34397a398a4", 279 | "sha256:858588f1983ca497f1cf4ffde01d978a3ea02b01c8a26a8bbc5cd2e66d816917" 280 | ], 281 | "version": "==1.0.15" 282 | }, 283 | "ptyprocess": { 284 | "hashes": [ 285 | "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0", 286 | "sha256:d7cc528d76e76342423ca640335bd3633420dc1366f258cb31d05e865ef5ca1f" 287 | ], 288 | "version": "==0.6.0" 289 | }, 290 | "pyasn1": { 291 | "hashes": [ 292 | "sha256:b9d3abc5031e61927c82d4d96c1cec1e55676c1a991623cfed28faea73cdd7ca", 293 | "sha256:f58f2a3d12fd754aa123e9fa74fb7345333000a035f3921dbdaa08597aa53137" 294 | ], 295 | "version": "==0.4.4" 296 | }, 297 | "pyasn1-modules": { 298 | "hashes": [ 299 | "sha256:a0cf3e1842e7c60fde97cb22d275eb6f9524f5c5250489e292529de841417547", 300 | "sha256:a38a8811ea784c0136abfdba73963876328f66172db21a05a82f9515909bfb4e" 301 | ], 302 | "version": "==0.2.2" 303 | }, 304 | "pycparser": { 305 | "hashes": [ 306 | "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3" 307 | ], 308 | "markers": "python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.1.*' and python_version != '3.2.*' and python_version >= '2.7'", 309 | "version": "==2.19" 310 | }, 311 | "pydispatcher": { 312 | "hashes": [ 313 | "sha256:5570069e1b1769af1fe481de6dd1d3a388492acddd2cdad7a3bde145615d5caf", 314 | "sha256:5be4a8be12805ef7d712dd9a93284fb8bc53f309867e573f653a72e5fd10e433" 315 | ], 316 | "version": "==2.0.5" 317 | }, 318 | "pygments": { 319 | "hashes": [ 320 | "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", 321 | "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc" 322 | ], 323 | "version": "==2.2.0" 324 | }, 325 | "pyhamcrest": { 326 | "hashes": [ 327 | "sha256:6b672c02fdf7470df9674ab82263841ce8333fb143f32f021f6cb26f0e512420", 328 | "sha256:8ffaa0a53da57e89de14ced7185ac746227a8894dbd5a3c718bf05ddbd1d56cd" 329 | ], 330 | "version": "==1.9.0" 331 | }, 332 | "pymysql": { 333 | "hashes": [ 334 | "sha256:95f057328357e0e13a30e67857a8c694878b0175797a9a203ee7adbfb9b1ec5f", 335 | "sha256:9ec760cbb251c158c19d6c88c17ca00a8632bac713890e465b2be01fdc30713f" 336 | ], 337 | "index": "pypi", 338 | "version": "==0.9.2" 339 | }, 340 | "pyopenssl": { 341 | "hashes": [ 342 | "sha256:26ff56a6b5ecaf3a2a59f132681e2a80afcc76b4f902f612f518f92c2a1bf854", 343 | "sha256:6488f1423b00f73b7ad5167885312bb0ce410d3312eb212393795b53c8caa580" 344 | ], 345 | "version": "==18.0.0" 346 | }, 347 | "python-dateutil": { 348 | "hashes": [ 349 | "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0", 350 | "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8" 351 | ], 352 | "version": "==2.7.3" 353 | }, 354 | "queuelib": { 355 | "hashes": [ 356 | "sha256:42b413295551bdc24ed9376c1a2cd7d0b1b0fa4746b77b27ca2b797a276a1a17", 357 | "sha256:ff43b5b74b9266f8df4232a8f768dc4d67281a271905e2ed4a3689d4d304cd02" 358 | ], 359 | "version": "==1.5.0" 360 | }, 361 | "requests": { 362 | "hashes": [ 363 | "sha256:63b52e3c866428a224f97cab011de738c36aec0185aa91cfacd418b5d58911d1", 364 | "sha256:ec22d826a36ed72a7358ff3fe56cbd4ba69dd7a6718ffd450ff0e9df7a47ce6a" 365 | ], 366 | "index": "pypi", 367 | "version": ">=2.20.0" 368 | }, 369 | "scrapy": { 370 | "hashes": [ 371 | "sha256:5a398bf6818f87dcc817c919408a195f19ba46414ae12f259119336cfa862bb6", 372 | "sha256:5b9621731e26b0d195ca3e25ab34d559f45b0b906c0a0cc359199f1b6b612184" 373 | ], 374 | "index": "pypi", 375 | "version": "==1.5.1" 376 | }, 377 | "service-identity": { 378 | "hashes": [ 379 | "sha256:0e76f3c042cc0f5c7e6da002cf646f59dc4023962d1d1166343ce53bdad39e17", 380 | "sha256:4001fbb3da19e0df22c47a06d29681a398473af4aa9d745eca525b3b2c2302ab" 381 | ], 382 | "version": "==17.0.0" 383 | }, 384 | "simplegeneric": { 385 | "hashes": [ 386 | "sha256:dc972e06094b9af5b855b3df4a646395e43d1c9d0d39ed345b7393560d0b9173" 387 | ], 388 | "version": "==0.8.1" 389 | }, 390 | "six": { 391 | "hashes": [ 392 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", 393 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" 394 | ], 395 | "version": "==1.11.0" 396 | }, 397 | "sqlalchemy": { 398 | "hashes": [ 399 | "sha256:c5951d9ef1d5404ed04bae5a16b60a0779087378928f997a294d1229c6ca4d3e" 400 | ], 401 | "index": "pypi", 402 | "version": "==1.2.12" 403 | }, 404 | "traitlets": { 405 | "hashes": [ 406 | "sha256:9c4bd2d267b7153df9152698efb1050a5d84982d3384a37b2c1f7723ba3e7835", 407 | "sha256:c6cb5e6f57c5a9bdaa40fa71ce7b4af30298fbab9ece9815b5d995ab6217c7d9" 408 | ], 409 | "version": "==4.3.2" 410 | }, 411 | "twisted": { 412 | "hashes": [ 413 | "sha256:5de7b79b26aee64efe63319bb8f037af88c21287d1502b39706c818065b3d5a4", 414 | "sha256:95ae985716e8107816d8d0df249d558dbaabb677987cc2ace45272c166b267e4" 415 | ], 416 | "version": ">=19.2.1" 417 | }, 418 | "urllib3": { 419 | "hashes": [ 420 | "sha256:a68ac5e15e76e7e5dd2b8f94007233e01effe3e50e8daddf69acfd81cb686baf", 421 | "sha256:b5725a0bd4ba422ab0e66e89e030c806576753ea3ee08554382c14e685d117b5" 422 | ], 423 | "markers": "python_version < '4' and python_version != '3.0.*' and python_version != '3.3.*' and python_version != '3.2.*' and python_version != '3.1.*' and python_version >= '2.6'", 424 | "version": ">=1.24.2" 425 | }, 426 | "w3lib": { 427 | "hashes": [ 428 | "sha256:55994787e93b411c2d659068b51b9998d9d0c05e0df188e6daf8f45836e1ea38", 429 | "sha256:aaf7362464532b1036ab0092e2eee78e8fd7b56787baa9ed4967457b083d011b" 430 | ], 431 | "version": "==1.19.0" 432 | }, 433 | "wcwidth": { 434 | "hashes": [ 435 | "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", 436 | "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" 437 | ], 438 | "version": "==0.1.7" 439 | }, 440 | "zope.interface": { 441 | "hashes": [ 442 | "sha256:21506674d30c009271fe68a242d330c83b1b9d76d62d03d87e1e9528c61beea6", 443 | "sha256:3d184aff0756c44fff7de69eb4cd5b5311b6f452d4de28cb08343b3f21993763", 444 | "sha256:467d364b24cb398f76ad5e90398d71b9325eb4232be9e8a50d6a3b3c7a1c8789", 445 | "sha256:57c38470d9f57e37afb460c399eb254e7193ac7fb8042bd09bdc001981a9c74c", 446 | "sha256:9ada83f4384bbb12dedc152bcdd46a3ac9f5f7720d43ac3ce3e8e8b91d733c10", 447 | "sha256:a1daf9c5120f3cc6f2b5fef8e1d2a3fb7bbbb20ed4bfdc25bc8364bc62dcf54b", 448 | "sha256:e6b77ae84f2b8502d99a7855fa33334a1eb6159de45626905cb3e454c023f339", 449 | "sha256:e881ef610ff48aece2f4ee2af03d2db1a146dc7c705561bd6089b2356f61641f", 450 | "sha256:f41037260deaacb875db250021fe883bf536bf6414a4fd25b25059b02e31b120" 451 | ], 452 | "markers": "python_version != '3.0.*' and python_version != '3.2.*' and python_version != '3.1.*' and python_version != '3.3.*' and python_version >= '2.7'", 453 | "version": "==4.5.0" 454 | } 455 | }, 456 | "develop": { 457 | "astroid": { 458 | "hashes": [ 459 | "sha256:292fa429e69d60e4161e7612cb7cc8fa3609e2e309f80c224d93a76d5e7b58be", 460 | "sha256:c7013d119ec95eb626f7a2011f0b63d0c9a095df9ad06d8507b37084eada1a8d" 461 | ], 462 | "version": "==2.0.4" 463 | }, 464 | "isort": { 465 | "hashes": [ 466 | "sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af", 467 | "sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8", 468 | "sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497" 469 | ], 470 | "markers": "python_version != '3.1.*' and python_version != '3.2.*' and python_version != '3.0.*' and python_version >= '2.7' and python_version != '3.3.*'", 471 | "version": "==4.3.4" 472 | }, 473 | "lazy-object-proxy": { 474 | "hashes": [ 475 | "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", 476 | "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", 477 | "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", 478 | "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", 479 | "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", 480 | "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", 481 | "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", 482 | "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", 483 | "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", 484 | "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", 485 | "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", 486 | "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", 487 | "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", 488 | "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", 489 | "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", 490 | "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", 491 | "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", 492 | "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", 493 | "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", 494 | "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", 495 | "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", 496 | "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", 497 | "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", 498 | "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", 499 | "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", 500 | "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", 501 | "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", 502 | "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", 503 | "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" 504 | ], 505 | "version": "==1.3.1" 506 | }, 507 | "mccabe": { 508 | "hashes": [ 509 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 510 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 511 | ], 512 | "version": "==0.6.1" 513 | }, 514 | "pylint": { 515 | "hashes": [ 516 | "sha256:1d6d3622c94b4887115fe5204982eee66fdd8a951cf98635ee5caee6ec98c3ec", 517 | "sha256:31142f764d2a7cd41df5196f9933b12b7ee55e73ef12204b648ad7e556c119fb" 518 | ], 519 | "index": "pypi", 520 | "version": "==2.1.1" 521 | }, 522 | "six": { 523 | "hashes": [ 524 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", 525 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" 526 | ], 527 | "version": "==1.11.0" 528 | }, 529 | "typed-ast": { 530 | "hashes": [ 531 | "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", 532 | "sha256:10703d3cec8dcd9eef5a630a04056bbc898abc19bac5691612acba7d1325b66d", 533 | "sha256:1f6c4bd0bdc0f14246fd41262df7dfc018d65bb05f6e16390b7ea26ca454a291", 534 | "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", 535 | "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", 536 | "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", 537 | "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", 538 | "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", 539 | "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa", 540 | "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", 541 | "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", 542 | "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", 543 | "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", 544 | "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", 545 | "sha256:898f818399cafcdb93cbbe15fc83a33d05f18e29fb498ddc09b0214cdfc7cd51", 546 | "sha256:94b091dc0f19291adcb279a108f5d38de2430411068b219f41b343c03b28fb1f", 547 | "sha256:a26863198902cda15ab4503991e8cf1ca874219e0118cbf07c126bce7c4db129", 548 | "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", 549 | "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", 550 | "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", 551 | "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", 552 | "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", 553 | "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6" 554 | ], 555 | "markers": "python_version < '3.7' and implementation_name == 'cpython'", 556 | "version": "==1.1.0" 557 | }, 558 | "wrapt": { 559 | "hashes": [ 560 | "sha256:d4d560d479f2c21e1b5443bbd15fe7ec4b37fe7e53d335d3b9b0a7b1226fe3c6" 561 | ], 562 | "version": "==1.10.11" 563 | }, 564 | "yapf": { 565 | "hashes": [ 566 | "sha256:b96815bd0bbd2ab290f2ae9e610756940b17a0523ef2f6b2d31da749fc395137", 567 | "sha256:cebb6faf35c9027c08996c07831b8971f3d67c0eb615269f66dfd7e6815fdc2a" 568 | ], 569 | "index": "pypi", 570 | "version": "==0.24.0" 571 | } 572 | } 573 | } 574 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aox_proxy_pool | Ip代理池项目 2 | 本项目是为了解决在抓取代理ip后, 代理ip失效快, 不稳定的问题 以及代理ip使用不方便等问题。 3 | 4 | 1. 可以自己去增加抓取的代理ip网站, 项目会自动去重, 并且之前抓取过的ip会保存在数据库中, 不会删除, 所以放心不会出现重复抓取的问题 5 | 2. 通过校验服务器开放的端口、访问的速度、校验多个网址访问的情况来对代理ip设置权重排序, 达到过滤垃圾ip的目的, 运行一段时间后剩下的ip, 则可以进入使用, 而且实测比较稳定 6 | 3. 通过脚本自动更新squid配置文件, 这样使用的客户端只需要指定squid服务器的地址即可。 7 | 4. 自动网络监测, 断网等情况下不会进行ip监测, 防止意外数据出错。 8 | 9 |  10 | 11 | ## 功能特色 12 | 13 | 1. 可自行增加代理抓取渠道 14 | 2. ip校验模块 15 | 3. squid配置自动更新 16 | 4. 客户端使用简单 17 | 18 | ## 运行环境 19 | 1. python 3.6 20 | 2. pipenv 21 | 3. scrapy模块 22 | 3. mysql5.6 23 | 24 | ## 运行部署 25 | 1. 克隆代码 26 | ``` 27 | git clone "https://github.com/aox-lei/aox_proxy_pool" 28 | ``` 29 | 30 | 2. 本机安装python3.6、pip、pipenv、mysql 31 | 3. 安装虚拟环境以及python模块 32 | ``` 33 | > pipenv --three 34 | > pipenv shell 35 | > pipenv install 36 | ``` 37 | 4. 复制配置文件 38 | ``` 39 | > cp proxy_pool/config.ini.default proxy_pool/config.ini 40 | ``` 41 | 5. 修改配置信息和创建数据表 42 | ```sql 43 | CREATE TABLE `ip` ( 44 | `id` int(11) NOT NULL AUTO_INCREMENT, 45 | `ip` char(15) NOT NULL DEFAULT '', 46 | `port` int(6) NOT NULL DEFAULT '0', 47 | `score` tinyint(5) NOT NULL DEFAULT '5' COMMENT '得分, 默认5分, 抓取成功一次, 分数+1, 失败一次-1, 到0则不抓取', 48 | `weight` int(3) NOT NULL DEFAULT '0' COMMENT '权重', 49 | `speed` int(11) NOT NULL DEFAULT '0' COMMENT '平均速度', 50 | `http_type` tinyint(1) NOT NULL DEFAULT '1' COMMENT '支持的http类型: 1:http 2:https 3:all', 51 | `country` char(5) NOT NULL DEFAULT '' COMMENT '所属国家', 52 | `open_port` varchar(255) NOT NULL DEFAULT '' COMMENT '开放端口, 逗号分隔', 53 | `create_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', 54 | `update_time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00', 55 | PRIMARY KEY (`id`), 56 | UNIQUE KEY `uniq_ip_port` (`ip`,`port`) USING BTREE 57 | ) ENGINE=InnoDB AUTO_INCREMENT=4538 DEFAULT CHARSET=utf8 58 | ``` 59 | 6. 运行抓取项目 60 | ``` 61 | > scrapy crawl xici 62 | > scrapy crawl kuaidaili 63 | > scrapy crawl ip66 64 | ``` 65 | 7. 运行ip检测 66 | ``` 67 | > python manager.py check_proxy 68 | ``` 69 | 8. 运行squid同步 70 | ``` 71 | > python manager.py sync_squid -d 默认配置文件地址 -c squid的配置文件地址 72 | ``` 73 | 74 | ## squid同步配置文件说明 75 | 1. 安装完squid后修改配置文件, 之后复制一份squid.conf为squid.conf.default 76 | 2. 执行squid同步的命令, python会读取有效的代理ip和squid.conf.default, 拼合成新的配置文件squid.conf。 77 | 78 | ## 运行效果检测 79 | 1. 抓取了4537个免费代理ip, 但是在运行一段时间检测后, 长期比较稳定的基本只有60多个, 所以如果需要大量的代理ip, 那么必须得增加抓取量 80 | 2. 代理ip的速度以及稳定性还是比较不错的, 偶尔会发生无法访问的情况, 但是总体来说, 还是比较ok的, 可以在生产环境中使用。 81 | 82 | ## 目前支持的抓取网站 83 | 1. 西祠代理 84 | 2. 快代理 85 | 3. ip66 86 | 87 | 如果有什么新的好一些的免费代理网站, 可以提ISSUE或者qq:2387813033, 微信: 18500402623 88 | -------------------------------------------------------------------------------- /doc/说明.md: -------------------------------------------------------------------------------- 1 | http://www.89ip.cn/index_1.html 2 | http://www.ip3366.net/free/ 3 | http://ip.zdaye.com/FreeIPlist.html 4 | http://www.xsdaili.com/ 5 | http://www.mayidaili.com/free 6 | http://www.data5u.com/free/index.html 7 | http://ip.seofangfa.com/ -------------------------------------------------------------------------------- /proxy_pool/config.ini.default: -------------------------------------------------------------------------------- 1 | [mysql] 2 | dsn=mysql+pymysql://root:123123@127.0.0.1:3306/test -------------------------------------------------------------------------------- /proxy_pool/data/squid.conf: -------------------------------------------------------------------------------- 1 | acl SSL_ports port 443 2 | acl Safe_ports port 80 # http 3 | acl Safe_ports port 21 # ftp 4 | acl Safe_ports port 443 # https 5 | acl Safe_ports port 70 # gopher 6 | acl Safe_ports port 210 # wais 7 | acl Safe_ports port 1025-65535 # unregistered ports 8 | acl Safe_ports port 280 # http-mgmt 9 | acl Safe_ports port 488 # gss-http 10 | acl Safe_ports port 591 # filemaker 11 | acl Safe_ports port 777 # multiling http 12 | acl CONNECT method CONNECT 13 | http_access deny !Safe_ports 14 | http_access deny CONNECT !SSL_ports 15 | http_access allow all 16 | http_access deny manager 17 | http_access allow localhost 18 | http_access deny all 19 | http_port 3128 20 | coredump_dir /var/spool/squid 21 | refresh_pattern ^ftp: 1440 20% 10080 22 | refresh_pattern ^gopher: 1440 0% 1440 23 | refresh_pattern -i (/cgi-bin/|\?) 0 0% 0 24 | refresh_pattern (Release|Packages(.gz)*)$ 0 20% 2880 25 | refresh_pattern . 0 20% 4320 26 | request_header_access Via deny all 27 | request_header_access X-Forwarded-For deny all 28 | request_header_access From deny all 29 | never_direct allow all 30 | cache_peer 101.4.136.34 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-546 31 | cache_peer 115.159.31.195 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-2958 32 | cache_peer 39.135.10.99 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3174 33 | cache_peer 39.135.9.227 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4023 34 | cache_peer 39.135.9.163 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4032 35 | cache_peer 118.140.151.98 parent 3128 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4097 36 | cache_peer 39.135.9.98 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-550 37 | cache_peer 221.7.255.168 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1477 38 | cache_peer 125.39.9.34 parent 9000 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-853 39 | cache_peer 125.39.9.35 parent 9000 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1086 40 | cache_peer 123.161.62.150 parent 9000 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1408 41 | cache_peer 221.7.255.168 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1286 42 | cache_peer 117.131.75.134 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1247 43 | cache_peer 39.134.68.4 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-698 44 | cache_peer 218.207.212.86 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1184 45 | cache_peer 222.212.143.109 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1559 46 | cache_peer 117.127.0.209 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1352 47 | cache_peer 117.127.0.210 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1905 48 | cache_peer 117.127.0.210 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-2385 49 | cache_peer 117.127.0.209 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-2240 50 | cache_peer 111.7.130.101 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4145 51 | cache_peer 223.203.0.14 parent 8000 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-643 52 | cache_peer 218.60.8.83 parent 3129 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1006 53 | cache_peer 218.60.8.98 parent 3129 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-947 54 | cache_peer 58.240.170.108 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-2215 55 | cache_peer 119.28.37.58 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3216 56 | cache_peer 218.60.8.99 parent 3129 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-820 57 | cache_peer 120.78.215.151 parent 808 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-644 58 | cache_peer 119.27.177.169 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1084 59 | cache_peer 221.122.91.64 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4142 60 | cache_peer 117.127.0.196 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-887 61 | cache_peer 117.127.0.196 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1894 62 | cache_peer 117.127.0.203 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1166 63 | cache_peer 117.127.0.203 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1285 64 | cache_peer 218.59.139.238 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1334 65 | cache_peer 117.127.0.204 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1337 66 | cache_peer 117.127.0.204 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1779 67 | cache_peer 39.135.10.102 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-666 68 | cache_peer 27.208.91.121 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3266 69 | cache_peer 112.247.171.247 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4485 70 | cache_peer 221.2.175.238 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-545 71 | cache_peer 27.203.240.7 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4341 72 | cache_peer 221.2.174.6 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3189 73 | cache_peer 119.180.174.99 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4143 74 | cache_peer 121.8.98.197 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-587 75 | cache_peer 59.44.43.238 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1365 76 | cache_peer 124.225.176.82 parent 8010 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-737 77 | cache_peer 221.122.91.61 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4122 78 | cache_peer 222.33.192.238 parent 8118 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1702 79 | cache_peer 183.129.207.73 parent 14051 0 no-query weighted-round-robin weight=4 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4500 80 | cache_peer 124.225.176.83 parent 8010 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-747 81 | cache_peer 39.108.128.242 parent 8000 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-2783 82 | cache_peer 106.75.164.15 parent 3128 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3786 83 | cache_peer 39.134.134.149 parent 8080 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-711 84 | cache_peer 111.7.130.101 parent 80 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1026 85 | cache_peer 39.104.119.128 parent 3128 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4342 86 | cache_peer 119.179.133.149 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3641 87 | cache_peer 221.2.174.99 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-1063 88 | cache_peer 221.1.205.74 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4355 89 | cache_peer 183.250.250.73 parent 63000 0 no-query weighted-round-robin weight=3 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4064 90 | cache_peer 221.2.174.28 parent 8060 0 no-query weighted-round-robin weight=5 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-3526 91 | cache_peer 203.130.46.108 parent 9090 0 no-query weighted-round-robin weight=1 connect-fail-limit=2 allow-miss max_conn=2 name=proxy-4237 92 | -------------------------------------------------------------------------------- /proxy_pool/data/squid.conf.default: -------------------------------------------------------------------------------- 1 | acl SSL_ports port 443 2 | acl Safe_ports port 80 # http 3 | acl Safe_ports port 21 # ftp 4 | acl Safe_ports port 443 # https 5 | acl Safe_ports port 70 # gopher 6 | acl Safe_ports port 210 # wais 7 | acl Safe_ports port 1025-65535 # unregistered ports 8 | acl Safe_ports port 280 # http-mgmt 9 | acl Safe_ports port 488 # gss-http 10 | acl Safe_ports port 591 # filemaker 11 | acl Safe_ports port 777 # multiling http 12 | acl CONNECT method CONNECT 13 | http_access deny !Safe_ports 14 | http_access deny CONNECT !SSL_ports 15 | http_access allow all 16 | http_access deny manager 17 | http_access allow localhost 18 | http_access deny all 19 | http_port 3128 20 | coredump_dir /var/spool/squid 21 | refresh_pattern ^ftp: 1440 20% 10080 22 | refresh_pattern ^gopher: 1440 0% 1440 23 | refresh_pattern -i (/cgi-bin/|\?) 0 0% 0 24 | refresh_pattern (Release|Packages(.gz)*)$ 0 20% 2880 25 | refresh_pattern . 0 20% 4320 26 | request_header_access Via deny all 27 | request_header_access X-Forwarded-For deny all 28 | request_header_access From deny all 29 | never_direct allow all -------------------------------------------------------------------------------- /proxy_pool/manager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import click 3 | from proxy_pool.script.check_proxy import check_proxy 4 | from proxy_pool.script.sync_squid import update_squid_conf 5 | 6 | 7 | @click.group() 8 | def cli(): 9 | pass 10 | 11 | 12 | @click.command() 13 | def check_ip(): 14 | check_proxy().run() 15 | 16 | 17 | @click.command() 18 | @click.option('--default-conf-path', '-d', required=True, help='默认的squid的配置文件地址') 19 | @click.option('--conf-path', '-c', required=True, help='配置文件地址') 20 | def sync_squid(default_conf_path, conf_path): 21 | update_squid_conf(default_conf_path, conf_path) 22 | 23 | 24 | cli.add_command(check_ip) 25 | cli.add_command(sync_squid) 26 | 27 | if __name__ == '__main__': 28 | cli() -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import configparser 4 | from sqlalchemy import create_engine 5 | from sqlalchemy.orm import sessionmaker 6 | logging.basicConfig( 7 | level=logging.INFO, 8 | format= 9 | '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s') 10 | 11 | config = configparser.ConfigParser() 12 | config.read('config.ini') 13 | 14 | engine = create_engine(config.get('mysql', 'dsn'), echo=False, pool_size=500, pool_recycle=3600) 15 | Session = sessionmaker(engine) 16 | -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class ProxyPoolItem(scrapy.Item): 12 | ip = scrapy.Field() 13 | port = scrapy.Field() 14 | city = scrapy.Field() 15 | http_type = scrapy.Field() 16 | country = scrapy.Field() -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ProxyPoolSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class ProxyPoolDownloaderMiddleware(object): 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | from sqlalchemy import Column, Integer, String, DateTime 4 | from sqlalchemy.ext.declarative import declarative_base 5 | from sqlalchemy.orm.exc import NoResultFound 6 | 7 | Base = declarative_base() 8 | 9 | 10 | class Ip(Base): 11 | __tablename__ = 'ip' 12 | 13 | id = Column(Integer, primary_key=True) 14 | ip = Column(String(length=15), default='') 15 | port = Column(Integer, default=0) 16 | score = Column(Integer, default=5) 17 | weight = Column(Integer, default=0) 18 | speed = Column(Integer, default=0) 19 | http_type = Column(Integer, default=1) 20 | country = Column(Integer, default='') 21 | open_port = Column(String(length=255), default='') 22 | create_time = Column(DateTime, default='0000-00-00 00:00:00') 23 | update_time = Column(DateTime, default='0000-00-00 00:00:00') 24 | -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import arrow 8 | import logging 9 | from proxy_pool import Session 10 | from proxy_pool.models import Ip 11 | from sqlalchemy.orm.exc import NoResultFound 12 | 13 | 14 | class ProxyPoolPipeline(object): 15 | def process_item(self, item, spider): 16 | if not self.check(item['ip'], item['port']): 17 | session = Session() 18 | try: 19 | session.add( 20 | Ip(ip=item['ip'], 21 | port=item['port'], 22 | http_type=item['http_type'], 23 | country=item['country'], 24 | create_time=arrow.now().datetime, 25 | update_time=arrow.now().datetime)) 26 | session.commit() 27 | except Exception as e: 28 | logging.exception(e) 29 | session.rollback() 30 | finally: 31 | session.close() 32 | 33 | 34 | def check(self, ip, port): 35 | session = Session() 36 | try: 37 | info = session.query(Ip).filter(Ip.ip == ip).filter( 38 | Ip.port==port).with_entities(Ip.id).one() 39 | except NoResultFound as e: 40 | return False 41 | 42 | return True -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/script/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aox-lei/aox_proxy_pool/f734a9969c9344a4bda4ed3c14be2d056044f7a2/proxy_pool/proxy_pool/script/__init__.py -------------------------------------------------------------------------------- /proxy_pool/proxy_pool/script/check_proxy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import requests 3 | import arrow 4 | import time 5 | import logging 6 | import re 7 | import logging 8 | from concurrent.futures import ThreadPoolExecutor 9 | from requests.exceptions import ConnectionError 10 | from proxy_pool import Session 11 | from proxy_pool.models import Ip 12 | from proxy_pool import utils 13 | 14 | 15 | class check_proxy(object): 16 | check_urls_str = { 17 | 'http://www.baidu.com': '百度', 18 | 'http://www.qq.com': '腾讯', 19 | 'http://www.ccidcom.com/': '通信', 20 | 'https://www.taobao.com/': '淘宝', 21 | 'https://www.zhihu.com/': '知乎', 22 | 'https://www.baidu.com': '百度', 23 | } 24 | check_urls = { 25 | 'http': [ 26 | 'http://www.baidu.com', 'http://www.qq.com', 27 | 'http://www.ccidcom.com/' 28 | ], 29 | 'https': [ 30 | 'https://www.taobao.com/', 'https://www.zhihu.com/', 31 | 'https://www.baidu.com' 32 | ] 33 | } 34 | 35 | check_ports = { 36 | 80: 500, 37 | 8080: 100, 38 | 3128: 100, 39 | 8081: 100, 40 | 9080: 100, 41 | 1080: 100, 42 | 21: 300, 43 | 23: 200, 44 | 53: 300, 45 | 1863: 200, 46 | 2289: 100, 47 | 443: 500, 48 | 69: 100, 49 | 22: 500, 50 | 25: 200, 51 | 110: 200, 52 | 7001: 100, 53 | 9090: 100, 54 | 3389: 500, 55 | 1521: 500, 56 | 1158: 300, 57 | 2100: 100, 58 | 1433: 200, 59 | 3306: 500, 60 | 5631: 100, 61 | 5632: 100, 62 | 5000: 200, 63 | 8888: 200 64 | } 65 | 66 | def run(self): 67 | while 1: 68 | ip_list = self.get_proxy_list() 69 | if (not ip_list): 70 | return False 71 | pool = ThreadPoolExecutor(max_workers=100) 72 | 73 | threads = [] 74 | for _info in ip_list: 75 | threads.append( 76 | pool.submit( 77 | self.check_ip, 78 | _info.ip, 79 | _info.port, 80 | _info.http_type, 81 | _info.score, 82 | is_new=1 83 | if _info.create_time == _info.update_time else 0)) 84 | 85 | pool.shutdown() 86 | 87 | def get_proxy_list(self): 88 | session = Session() 89 | try: 90 | lists = session.query(Ip).filter(Ip.score > 0).order_by( 91 | Ip.update_time.desc()).with_entities( 92 | Ip.id, Ip.ip, Ip.port, Ip.http_type, Ip.score, 93 | Ip.create_time, Ip.update_time).all() 94 | return lists 95 | except Exception as e: 96 | logging.exception(e) 97 | return False 98 | 99 | def check_ip(self, ip, port, http_type, score, is_new=1): 100 | open_ports = self.check_port(ip, port) 101 | session = Session() 102 | if open_ports is False: 103 | try: 104 | session.query(Ip).filter(Ip.ip == ip).filter( 105 | Ip.port == port).update({ 106 | 'score': 0, 107 | 'update_time': arrow.now().datetime 108 | }) 109 | session.commit() 110 | except Exception as e: 111 | logging.exception(e) 112 | session.rollback() 113 | logging.warning('%s:%d ---- 端口未开放' % (ip, int(port))) 114 | return False 115 | 116 | _update_data = { 117 | 'open_port': ','.join(map(lambda x: str(x), open_ports)), 118 | 'update_time': arrow.now().datetime 119 | } 120 | if is_new == 1: 121 | new_http_type = self.check_http_type(ip, port) 122 | if (new_http_type): 123 | _update_data['http_type'] = new_http_type 124 | 125 | speed_time = self.check_visit(ip, port, http_type) 126 | 127 | if speed_time is False: 128 | _update_data['score'] = score - 1 if score - 1 >= 0 else 0 129 | 130 | logging.warning('%s:%d ------ 无法访问' % (ip, int(port))) 131 | else: 132 | _update_data['score'] = score + 1 if score + 1 <= 5 else 5 133 | _update_data['speed'] = speed_time 134 | _update_data['weight'] = self.calculate_weight( 135 | score, speed_time, open_ports) 136 | 137 | logging.info('%s:%d ------ 代理有效, speed:%d, open_ports:%s' % 138 | (ip, port, speed_time, _update_data['open_port'])) 139 | if utils.check_network(): 140 | try: 141 | session.query(Ip).filter(Ip.ip == ip).filter( 142 | Ip.port == port).update(_update_data) 143 | session.commit() 144 | except Exception as e: 145 | logging.exception(e) 146 | session.rollback() 147 | 148 | def check_http_type(self, ip, port): 149 | check_urls = self.check_urls['http'] + self.check_urls['https'] 150 | success_visit_count = { 151 | 'http': len(self.check_urls['http']), 152 | 'https': len(self.check_urls['https']) 153 | } 154 | for _url in check_urls: 155 | try: 156 | requests.get( 157 | _url, 158 | timeout=5, 159 | proxies={ 160 | 'http': 'http://%s:%d' % (ip, port), 161 | 'https': 'http://%s:%d' % (ip, port) 162 | }) 163 | except ConnectionError: 164 | if _url[0:5] == 'https': 165 | success_visit_count['https'] -= 1 166 | elif _url[0:4] == 'http': 167 | success_visit_count['http'] -= 1 168 | except Exception: 169 | pass 170 | 171 | if success_visit_count['http'] > 0 and success_visit_count[ 172 | 'https'] == 0: 173 | return 1 174 | elif success_visit_count['https'] > 0 and success_visit_count[ 175 | 'http'] == 0: 176 | return 2 177 | elif success_visit_count['https'] > 0 and success_visit_count['http'] > 0: 178 | return 3 179 | else: 180 | return False 181 | 182 | def check_visit(self, ip, port, http_type): 183 | if http_type == 1: 184 | check_urls = self.check_urls['http'] 185 | elif http_type == 2: 186 | check_urls = self.check_urls['https'] 187 | elif http_type == 3: 188 | check_urls = self.check_urls['http'] + self.check_urls['https'] 189 | 190 | total_speed_time = 0 191 | visit_success_count = 0 192 | 193 | for _url in check_urls: 194 | _start_time = time.time() 195 | try: 196 | result = requests.get( 197 | _url, 198 | timeout=5, 199 | proxies={ 200 | 'http': 'http://%s:%d' % (ip, port), 201 | 'https': 'http://%s:%d' % (ip, port) 202 | }) 203 | 204 | if self.check_html_title(result.text, 205 | self.check_urls_str.get(_url)): 206 | total_speed_time += int((time.time() - _start_time) * 1000) 207 | visit_success_count += 1 208 | except Exception: 209 | pass 210 | 211 | if total_speed_time == 0 or visit_success_count == 0: 212 | return False 213 | else: 214 | total_speed_time += (len(check_urls) - visit_success_count) * 5000 215 | visit_success_count = len(check_urls) 216 | 217 | return int(total_speed_time / visit_success_count) 218 | 219 | def check_html_title(self, html, check_str): 220 | title = re.findall('