├── .editorconfig ├── .gitattributes ├── .gitignore ├── LICENSE ├── ProxyCrawler.sln ├── ProxyCrawler ├── Entity │ └── ProxyIpEntity.cs ├── IProxyProvider.cs ├── Job │ ├── BaseQuartzJob.cs │ └── SyncProxyJob.cs ├── Program.cs ├── ProxyCrawler.csproj ├── ProxyProviders │ ├── BaibianIpProxyProvider.cs │ ├── BaseProxyProvider.cs │ ├── CoderBusyProxyProvider.cs │ ├── KuaidailiProxyProvider.cs │ ├── MayiProxyProvider.cs │ ├── SixIpProxyProvider.cs │ ├── XicidailiProxyProvider.cs │ └── YundailiProxyProvider.cs ├── QuartzService.cs ├── app.config ├── log4net.config └── quartz_jobs.config ├── README.md └── nuget.config /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome:http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Don't use tabs for indentation. 7 | [*] 8 | indent_style = space 9 | # (Please don't specify an indent_size here; that has too many unintended consequences.) 10 | 11 | # Code files 12 | [*.{cs,csx,vb,vbx}] 13 | indent_size = 4 14 | insert_final_newline = true 15 | charset = utf-8-bom 16 | 17 | # Xml project files 18 | [*.{csproj,vbproj,vcxproj,vcxproj.filters,proj,projitems,shproj}] 19 | indent_size = 2 20 | 21 | # Xml config files 22 | [*.{props,targets,ruleset,config,nuspec,resx,vsixmanifest,vsct}] 23 | indent_size = 2 24 | 25 | # JSON files 26 | [*.json] 27 | indent_size = 2 28 | 29 | # Dotnet code style settings: 30 | [*.{cs,vb}] 31 | # Sort using and Import directives with System.* appearing first 32 | dotnet_sort_system_directives_first = true 33 | # Avoid "this." and "Me." if not necessary 34 | dotnet_style_qualification_for_field = false:suggestion 35 | dotnet_style_qualification_for_property = false:suggestion 36 | dotnet_style_qualification_for_method = false:suggestion 37 | dotnet_style_qualification_for_event = false:suggestion 38 | 39 | # Use language keywords instead of framework type names for type references 40 | dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion 41 | dotnet_style_predefined_type_for_member_access = true:suggestion 42 | 43 | # Suggest more modern language features when available 44 | dotnet_style_object_initializer = true:suggestion 45 | dotnet_style_collection_initializer = true:suggestion 46 | dotnet_style_coalesce_expression = true:suggestion 47 | dotnet_style_null_propagation = true:suggestion 48 | dotnet_style_explicit_tuple_names = true:suggestion 49 | 50 | # CSharp code style settings: 51 | [*.cs] 52 | # Prefer "var" everywhere 53 | csharp_style_var_for_built_in_types = true:suggestion 54 | csharp_style_var_when_type_is_apparent = true:suggestion 55 | csharp_style_var_elsewhere = true:suggestion 56 | 57 | # Prefer method-like constructs to have a block body 58 | csharp_style_expression_bodied_methods = false:none 59 | csharp_style_expression_bodied_constructors = false:none 60 | csharp_style_expression_bodied_operators = false:none 61 | 62 | # Prefer property-like constructs to have an expression-body 63 | csharp_style_expression_bodied_properties = true:none 64 | csharp_style_expression_bodied_indexers = true:none 65 | csharp_style_expression_bodied_accessors = true:none 66 | 67 | # Suggest more modern language features when available 68 | csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion 69 | csharp_style_pattern_matching_over_as_with_null_check = true:suggestion 70 | csharp_style_inlined_variable_declaration = true:suggestion 71 | csharp_style_throw_expression = true:suggestion 72 | csharp_style_conditional_delegate_call = true:suggestion 73 | 74 | # Newline settings 75 | csharp_new_line_before_open_brace = all 76 | csharp_new_line_before_else = true 77 | csharp_new_line_before_catch = true 78 | csharp_new_line_before_finally = true 79 | csharp_new_line_before_members_in_object_initializers = true 80 | csharp_new_line_before_members_in_anonymous_types = true -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # MSTest test Results 33 | [Tt]est[Rr]esult*/ 34 | [Bb]uild[Ll]og.* 35 | 36 | # NUNIT 37 | *.VisualState.xml 38 | TestResult.xml 39 | 40 | # Build Results of an ATL Project 41 | [Dd]ebugPS/ 42 | [Rr]eleasePS/ 43 | dlldata.c 44 | 45 | # Benchmark Results 46 | BenchmarkDotNet.Artifacts/ 47 | 48 | # .NET Core 49 | project.lock.json 50 | project.fragment.lock.json 51 | artifacts/ 52 | **/Properties/launchSettings.json 53 | 54 | *_i.c 55 | *_p.c 56 | *_i.h 57 | *.ilk 58 | *.meta 59 | *.obj 60 | *.pch 61 | *.pdb 62 | *.pgc 63 | *.pgd 64 | *.rsp 65 | *.sbr 66 | *.tlb 67 | *.tli 68 | *.tlh 69 | *.tmp 70 | *.tmp_proj 71 | *.log 72 | *.vspscc 73 | *.vssscc 74 | .builds 75 | *.pidb 76 | *.svclog 77 | *.scc 78 | 79 | # Chutzpah Test files 80 | _Chutzpah* 81 | 82 | # Visual C++ cache files 83 | ipch/ 84 | *.aps 85 | *.ncb 86 | *.opendb 87 | *.opensdf 88 | *.sdf 89 | *.cachefile 90 | *.VC.db 91 | *.VC.VC.opendb 92 | 93 | # Visual Studio profiler 94 | *.psess 95 | *.vsp 96 | *.vspx 97 | *.sap 98 | 99 | # Visual Studio Trace Files 100 | *.e2e 101 | 102 | # TFS 2012 Local Workspace 103 | $tf/ 104 | 105 | # Guidance Automation Toolkit 106 | *.gpState 107 | 108 | # ReSharper is a .NET coding add-in 109 | _ReSharper*/ 110 | *.[Rr]e[Ss]harper 111 | *.DotSettings.user 112 | 113 | # JustCode is a .NET coding add-in 114 | .JustCode 115 | 116 | # TeamCity is a build add-in 117 | _TeamCity* 118 | 119 | # DotCover is a Code Coverage Tool 120 | *.dotCover 121 | 122 | # AxoCover is a Code Coverage Tool 123 | .axoCover/* 124 | !.axoCover/settings.json 125 | 126 | # Visual Studio code coverage results 127 | *.coverage 128 | *.coveragexml 129 | 130 | # NCrunch 131 | _NCrunch_* 132 | .*crunch*.local.xml 133 | nCrunchTemp_* 134 | 135 | # MightyMoose 136 | *.mm.* 137 | AutoTest.Net/ 138 | 139 | # Web workbench (sass) 140 | .sass-cache/ 141 | 142 | # Installshield output folder 143 | [Ee]xpress/ 144 | 145 | # DocProject is a documentation generator add-in 146 | DocProject/buildhelp/ 147 | DocProject/Help/*.HxT 148 | DocProject/Help/*.HxC 149 | DocProject/Help/*.hhc 150 | DocProject/Help/*.hhk 151 | DocProject/Help/*.hhp 152 | DocProject/Help/Html2 153 | DocProject/Help/html 154 | 155 | # Click-Once directory 156 | publish/ 157 | 158 | # Publish Web Output 159 | *.[Pp]ublish.xml 160 | *.azurePubxml 161 | # Note: Comment the next line if you want to checkin your web deploy settings, 162 | # but database connection strings (with potential passwords) will be unencrypted 163 | *.pubxml 164 | *.publishproj 165 | 166 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 167 | # checkin your Azure Web App publish settings, but sensitive information contained 168 | # in these scripts will be unencrypted 169 | PublishScripts/ 170 | 171 | # NuGet Packages 172 | *.nupkg 173 | # The packages folder can be ignored because of Package Restore 174 | **/[Pp]ackages/* 175 | # except build/, which is used as an MSBuild target. 176 | !**/[Pp]ackages/build/ 177 | # Uncomment if necessary however generally it will be regenerated when needed 178 | #!**/[Pp]ackages/repositories.config 179 | # NuGet v3's project.json files produces more ignorable files 180 | *.nuget.props 181 | *.nuget.targets 182 | 183 | # Microsoft Azure Build Output 184 | csx/ 185 | *.build.csdef 186 | 187 | # Microsoft Azure Emulator 188 | ecf/ 189 | rcf/ 190 | 191 | # Windows Store app package directories and files 192 | AppPackages/ 193 | BundleArtifacts/ 194 | Package.StoreAssociation.xml 195 | _pkginfo.txt 196 | *.appx 197 | 198 | # Visual Studio cache files 199 | # files ending in .cache can be ignored 200 | *.[Cc]ache 201 | # but keep track of directories ending in .cache 202 | !*.[Cc]ache/ 203 | 204 | # Others 205 | ClientBin/ 206 | ~$* 207 | *~ 208 | *.dbmdl 209 | *.dbproj.schemaview 210 | *.jfm 211 | *.pfx 212 | *.publishsettings 213 | orleans.codegen.cs 214 | 215 | # Since there are multiple workflows, uncomment next line to ignore bower_components 216 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 217 | #bower_components/ 218 | 219 | # RIA/Silverlight projects 220 | Generated_Code/ 221 | 222 | # Backup & report files from converting an old project file 223 | # to a newer Visual Studio version. Backup files are not needed, 224 | # because we have git ;-) 225 | _UpgradeReport_Files/ 226 | Backup*/ 227 | UpgradeLog*.XML 228 | UpgradeLog*.htm 229 | 230 | # SQL Server files 231 | *.mdf 232 | *.ldf 233 | *.ndf 234 | 235 | # Business Intelligence projects 236 | *.rdl.data 237 | *.bim.layout 238 | *.bim_*.settings 239 | 240 | # Microsoft Fakes 241 | FakesAssemblies/ 242 | 243 | # GhostDoc plugin setting file 244 | *.GhostDoc.xml 245 | 246 | # Node.js Tools for Visual Studio 247 | .ntvs_analysis.dat 248 | node_modules/ 249 | 250 | # Typescript v1 declaration files 251 | typings/ 252 | 253 | # Visual Studio 6 build log 254 | *.plg 255 | 256 | # Visual Studio 6 workspace options file 257 | *.opt 258 | 259 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 260 | *.vbw 261 | 262 | # Visual Studio LightSwitch build output 263 | **/*.HTMLClient/GeneratedArtifacts 264 | **/*.DesktopClient/GeneratedArtifacts 265 | **/*.DesktopClient/ModelManifest.xml 266 | **/*.Server/GeneratedArtifacts 267 | **/*.Server/ModelManifest.xml 268 | _Pvt_Extensions 269 | 270 | # Paket dependency manager 271 | .paket/paket.exe 272 | paket-files/ 273 | 274 | # FAKE - F# Make 275 | .fake/ 276 | 277 | # JetBrains Rider 278 | .idea/ 279 | *.sln.iml 280 | 281 | # CodeRush 282 | .cr/ 283 | 284 | # Python Tools for Visual Studio (PTVS) 285 | __pycache__/ 286 | *.pyc 287 | 288 | # Cake - Uncomment if you are using it 289 | # tools/** 290 | # !tools/packages.config 291 | 292 | # Tabs Studio 293 | *.tss 294 | 295 | # Telerik's JustMock configuration file 296 | *.jmconfig 297 | 298 | # BizTalk build output 299 | *.btp.cs 300 | *.btm.cs 301 | *.odx.cs 302 | *.xsd.cs 303 | 304 | # OpenCover UI analysis results 305 | OpenCover/ 306 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "{}" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright 2018 WeihanLi 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. -------------------------------------------------------------------------------- /ProxyCrawler.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27428.2037 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ProxyCrawler", "ProxyCrawler\ProxyCrawler.csproj", "{4C3E07B4-E8AF-4F84-9290-66C97320362B}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {4C3E07B4-E8AF-4F84-9290-66C97320362B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {4C3E07B4-E8AF-4F84-9290-66C97320362B}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {4C3E07B4-E8AF-4F84-9290-66C97320362B}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {4C3E07B4-E8AF-4F84-9290-66C97320362B}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {8DCB028B-D123-49B8-ADD3-B89EB7D022ED} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /ProxyCrawler/Entity/ProxyIpEntity.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Newtonsoft.Json; 3 | 4 | namespace ProxyCrawler.Entity 5 | { 6 | public class ProxyIpEntity 7 | { 8 | public string Ip { get; set; } 9 | 10 | public int Port { get; set; } 11 | 12 | public string Location { get; set; } 13 | 14 | public string Channel { get; set; } 15 | 16 | [JsonIgnore] 17 | public bool IsValid { get; set; } 18 | } 19 | 20 | internal class ProxyEntityEqualityComparer : IEqualityComparer 21 | { 22 | public bool Equals(ProxyIpEntity x, ProxyIpEntity y) 23 | { 24 | if (x == null || y == null) 25 | { 26 | return x == y; 27 | } 28 | return x.Ip == y.Ip && x.Port == y.Port; 29 | } 30 | 31 | public int GetHashCode(ProxyIpEntity obj) 32 | { 33 | return $"{obj.Ip}:{obj.Port}".GetHashCode(); 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /ProxyCrawler/IProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Threading.Tasks; 4 | 5 | namespace ProxyCrawler 6 | { 7 | public interface IProxyProvider 8 | { 9 | string ProxyProviderName { get; } 10 | 11 | Task> SyncProxyIp(); 12 | } 13 | } -------------------------------------------------------------------------------- /ProxyCrawler/Job/BaseQuartzJob.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | using Quartz; 4 | using WeihanLi.Common.Log; 5 | 6 | namespace ProxyCrawler.Job 7 | { 8 | public abstract class BaseQuartzJob : IJob 9 | { 10 | protected readonly ILogHelper Logger; 11 | 12 | protected BaseQuartzJob(ILogHelper logger) => Logger = logger; 13 | 14 | public virtual void Execute(IJobExecutionContext context) 15 | { 16 | try 17 | { 18 | Logger.Info("Job 开始执行"); 19 | ExecuteAsync(context).Wait(); 20 | } 21 | catch (Exception ex) 22 | { 23 | Logger.Error("Job 执行出错", ex); 24 | } 25 | finally 26 | { 27 | Logger.Info("Job 执行结束"); 28 | } 29 | } 30 | 31 | protected abstract Task ExecuteAsync(IJobExecutionContext context); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /ProxyCrawler/Job/SyncProxyJob.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Net; 5 | using System.Threading.Tasks; 6 | using ProxyCrawler.Entity; 7 | using Quartz; 8 | using WeihanLi.Common; 9 | using WeihanLi.Common.Helpers; 10 | using WeihanLi.Common.Log; 11 | using WeihanLi.Extensions; 12 | using WeihanLi.Redis; 13 | 14 | namespace ProxyCrawler.Job 15 | { 16 | public class SyncProxyJob : BaseQuartzJob 17 | { 18 | private readonly IReadOnlyCollection _proxyProviders; 19 | 20 | public SyncProxyJob() : this(DependencyResolver.Current.ResolveServices().ToArray()) 21 | { 22 | } 23 | 24 | public SyncProxyJob(IReadOnlyCollection proxyProviders) : base(LogHelper.GetLogHelper()) => _proxyProviders = proxyProviders; 25 | 26 | protected override async Task ExecuteAsync(IJobExecutionContext context) 27 | { 28 | var ips = (await Task.WhenAll(_proxyProviders.Select(_ => _.SyncProxyIp()))).SelectMany(_ => _).ToList(); 29 | ips.AddRange(RedisManager.GetListClient("proxyList").ListRange() ?? Enumerable.Empty()); 30 | if (ips.Count > 0) 31 | { 32 | //验证代理可用性 33 | var result = SaveProxy( 34 | await ValidateProxyAsync(ips.Distinct(new ProxyEntityEqualityComparer()).ToArray()) 35 | ); 36 | if (result > 0) 37 | { 38 | Logger.Info("代理同步成功"); 39 | } 40 | else 41 | { 42 | Logger.Warn("代理同步失败"); 43 | } 44 | } 45 | } 46 | 47 | private int SaveProxy(ProxyIpEntity[] proxyIpEntities) 48 | { 49 | if (proxyIpEntities.Length == 0) 50 | { 51 | Logger.Info("没有可用的代理"); 52 | return 0; 53 | } 54 | Logger.Info($"可用代理IP数量:{proxyIpEntities.Length}"); 55 | var commonClient = RedisManager.GetCommonRedisClient(RedisDataType.List); 56 | if (commonClient.KeyExists("proxyList")) 57 | { 58 | commonClient.KeyDelete("proxyList"); 59 | } 60 | var proxyList = RedisManager.GetListClient("proxyList"); 61 | proxyList.Push(proxyIpEntities); 62 | return 1; 63 | } 64 | 65 | private async Task ValidateProxyAsync(IReadOnlyCollection proxyList) 66 | { 67 | #if DEBUG 68 | await Task.Run(() => proxyList.ForEach(p => p.IsValid = true)); 69 | #else 70 | 71 | await Task.WhenAll(proxyList.Select(ValidateProxyAsync)); 72 | #endif 73 | return proxyList.Where(_ => _.IsValid).ToArray(); 74 | } 75 | 76 | private async Task ValidateProxyAsync(ProxyIpEntity proxyEntity) 77 | { 78 | var client = new HttpRequestClient("https://baidu.com"); 79 | client.AddProxy(new WebProxy(proxyEntity.Ip, proxyEntity.Port)); 80 | HttpWebResponse response = null; 81 | try 82 | { 83 | response = await RetryHelper.TryInvokeAsync(() => client.ExecuteForResponseAsync(), res => res.StatusCode == HttpStatusCode.OK); 84 | } 85 | catch (Exception ex) 86 | { 87 | Logger.Warn($"验证代理【{proxyEntity.Ip}:{proxyEntity.Port}】发生异常", ex); 88 | } 89 | if (response?.StatusCode != null) 90 | { 91 | proxyEntity.IsValid = true; 92 | Logger.Info($"验证代理【{proxyEntity.Ip}:{proxyEntity.Port}】,response StatusCode:{response.StatusCode}"); 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /ProxyCrawler/Program.cs: -------------------------------------------------------------------------------- 1 | using Autofac; 2 | using ProxyCrawler.Job; 3 | using ProxyCrawler.ProxyProviders; 4 | using WeihanLi.Common; 5 | using WeihanLi.Common.Helpers; 6 | using WeihanLi.Redis; 7 | 8 | namespace ProxyCrawler 9 | { 10 | public class Program 11 | { 12 | public static void Main(string[] args) 13 | { 14 | Init(); 15 | #if DEBUG 16 | new SyncProxyJob().Execute(null); 17 | #else 18 | try 19 | { 20 | HostFactory.Run(host => 21 | { 22 | host.RunAsLocalSystem(); 23 | host.StartAutomatically(); 24 | 25 | host.SetServiceName("ProxyCrawler"); 26 | host.SetDisplayName("ProxyCrawler"); 27 | host.SetDescription("代理爬虫"); 28 | 29 | host.DependsOn("Redis"); 30 | 31 | host.Service(service => 32 | { 33 | service.ConstructUsing(() => new QuartzService()); 34 | service.WhenStarted(x => x.Start()); 35 | service.WhenStopped(x => x.Stop()); 36 | }); 37 | }); 38 | } 39 | catch (Exception e) 40 | { 41 | LogHelper.GetLogHelper().Error(e); 42 | } 43 | #endif 44 | } 45 | 46 | private static void Init() 47 | { 48 | //Log 49 | LogHelper.LogInit(); 50 | 51 | // DI 52 | var builder = new ContainerBuilder(); 53 | #if DEBUG 54 | builder.RegisterType().As(); 55 | #else 56 | // TODO:Baibian Ip,Ip解码 57 | // builder.RegisterType().As(); 58 | builder.RegisterType().As(); 59 | builder.RegisterType().As(); 60 | builder.RegisterType().As(); 61 | builder.RegisterType().As(); 62 | builder.RegisterType().As(); 63 | builder.RegisterType().As(); 64 | #endif 65 | var container = builder.Build(); 66 | DependencyResolver.SetDependencyResolver(t => container.Resolve(t)); 67 | 68 | // Redis 69 | RedisManager.AddRedisConfig(config => 70 | { 71 | config.DefaultDatabase = 2; 72 | config.EnableCompress = false; 73 | }); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /ProxyCrawler/ProxyCrawler.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | net462 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | Never 25 | 26 | 27 | PreserveNewest 28 | 29 | 30 | PreserveNewest 31 | 32 | 33 | -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/BaibianIpProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using WeihanLi.Common.Helpers; 6 | using WeihanLi.Extensions; 7 | 8 | namespace ProxyCrawler.ProxyProviders 9 | { 10 | /// 11 | /// https://www.baibianip.com/home/free.html 12 | /// 13 | internal class BaibianIpProxyProvider : BaseProxyProvider 14 | { 15 | public BaibianIpProxyProvider() : base(LogHelper.GetLogHelper()) 16 | { 17 | } 18 | 19 | public override string ProxyProviderName => "BaibianIp"; 20 | 21 | protected override string PageUrlFormat => "https://www.baibianip.com/home/free.html"; 22 | 23 | protected override async Task> SyncProxyInternal(int pageIndex) 24 | { 25 | var doc = await Parser.ParseAsync(await Client.GetStringAsync(PageUrlFormat.FormatWith(pageIndex))); 26 | return doc.QuerySelectorAll("table>tbody>tr").Select(tr => 27 | { 28 | var tds = tr.QuerySelectorAll("td"); 29 | if (!tds[4].TextContent.Trim().EqualsIgnoreCase("优质高匿")) 30 | { 31 | return null; 32 | } 33 | 34 | return new ProxyIpEntity 35 | { 36 | Ip = tds[0].TextContent.Trim(), 37 | Port = tds[1].TextContent.Trim().To(), 38 | Channel = ProxyProviderName, 39 | Location = tds[2].TextContent + tds[3].TextContent 40 | }; 41 | }).Where(_ => _ != null); 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/BaseProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Diagnostics; 4 | using System.Linq; 5 | using System.Net.Http; 6 | using System.Threading.Tasks; 7 | using AngleSharp.Parser.Html; 8 | using ProxyCrawler.Entity; 9 | using WeihanLi.Common.Helpers; 10 | using WeihanLi.Common.Log; 11 | 12 | namespace ProxyCrawler.ProxyProviders 13 | { 14 | internal abstract class BaseProxyProvider : IProxyProvider, IDisposable 15 | { 16 | public abstract string ProxyProviderName { get; } 17 | 18 | protected abstract string PageUrlFormat { get; } 19 | 20 | protected virtual int TotalPage { get; } = 1; 21 | 22 | protected readonly HttpClient Client; 23 | 24 | //创建一个(可重用)解析器前端 25 | protected static readonly HtmlParser Parser = new HtmlParser(); 26 | 27 | protected readonly ILogHelper Logger; 28 | 29 | protected BaseProxyProvider(ILogHelper logger) 30 | { 31 | Logger = logger; 32 | Client = new HttpClient 33 | { 34 | Timeout = TimeSpan.FromSeconds(10) 35 | }; 36 | Client.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", HttpHelper.GetUserAgent()); 37 | } 38 | 39 | protected abstract Task> SyncProxyInternal(int pageIndex); 40 | 41 | public async Task> SyncProxyIp() 42 | { 43 | var stopwatch = Stopwatch.StartNew(); 44 | var proxyes = (await Task.WhenAll(Enumerable.Range(1, TotalPage).Select(SyncProxyInternal))).SelectMany(e => e).ToArray(); 45 | stopwatch.Stop(); 46 | if (proxyes.Length > 0) 47 | { 48 | Logger.Info($"{ProxyProviderName} 同步代理完成,同步成功{proxyes.Length}个代理"); 49 | } 50 | return proxyes; 51 | } 52 | 53 | #region IDisposable Support 54 | 55 | private bool _disposed; // 要检测冗余调用 56 | 57 | protected virtual void Dispose(bool disposing) 58 | { 59 | if (!_disposed) 60 | { 61 | if (disposing) 62 | { 63 | // 释放托管状态(托管对象)。 64 | } 65 | 66 | // 释放未托管的资源(未托管的对象)并在以下内容中替代终结器。 67 | Client?.Dispose(); 68 | // 将大型字段设置为 null。 69 | 70 | _disposed = true; 71 | } 72 | } 73 | 74 | // 仅当以上 Dispose(bool disposing) 拥有用于释放未托管资源的代码时才替代终结器。 75 | // ~BaseProxyProvider() { 76 | // // 请勿更改此代码。将清理代码放入以上 Dispose(bool disposing) 中。 77 | // Dispose(false); 78 | // } 79 | 80 | public void Dispose() 81 | { 82 | // 请勿更改此代码。将清理代码放入以上 Dispose(bool disposing) 中。 83 | Dispose(true); 84 | // 如果在以上内容中替代了终结器,则取消注释以下行。 85 | GC.SuppressFinalize(this); 86 | } 87 | 88 | #endregion IDisposable Support 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/CoderBusyProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using WeihanLi.Common.Helpers; 6 | using WeihanLi.Extensions; 7 | 8 | namespace ProxyCrawler.ProxyProviders 9 | { 10 | /// 11 | /// https://proxy.coderbusy.com/classical/anonymous-type/highanonymous.aspx?page=2 12 | /// 13 | internal class CoderBusyProxyProvider : BaseProxyProvider 14 | { 15 | public CoderBusyProxyProvider() : base(LogHelper.GetLogHelper()) 16 | { 17 | } 18 | 19 | public override string ProxyProviderName => "CoderBusy"; 20 | 21 | protected override int TotalPage => 3; 22 | 23 | protected override string PageUrlFormat => 24 | "https://proxy.coderbusy.com/classical/anonymous-type/highanonymous.aspx?page={0}"; 25 | 26 | protected override async Task> SyncProxyInternal(int pageIndex) 27 | { 28 | var doc = await Parser.ParseAsync(await Client.GetStringAsync(PageUrlFormat.FormatWith(pageIndex))); 29 | return doc.QuerySelectorAll("table>tbody>tr").Select(tr => 30 | { 31 | var tds = tr.QuerySelectorAll("td"); 32 | if (tds[2].TextContent.To() < 40) 33 | { 34 | return null; 35 | } 36 | return new ProxyIpEntity 37 | { 38 | Ip = tds[0].TextContent.Trim(), 39 | Port = tds[2].TextContent.Trim().To(), 40 | Channel = ProxyProviderName, 41 | Location = tds[3].TextContent 42 | }; 43 | }).Where(_ => null != _); 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/KuaidailiProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Net.Http; 6 | using System.Threading.Tasks; 7 | using WeihanLi.Common.Helpers; 8 | using WeihanLi.Common.Log; 9 | using WeihanLi.Extensions; 10 | 11 | namespace ProxyCrawler.ProxyProviders 12 | { 13 | /// 14 | /// 15 | /// https://www.kuaidaili.com/free/inha/{pageIndex}/ 16 | /// 17 | internal class KuaidailiProxyProvider : BaseProxyProvider 18 | { 19 | public override string ProxyProviderName => "Kuaidaili"; 20 | protected override string PageUrlFormat => "https://www.kuaidaili.com/free/inha/{0}/"; 21 | 22 | protected override int TotalPage => 3; 23 | 24 | private readonly HttpClient _client = new HttpClient(); 25 | 26 | public KuaidailiProxyProvider() : base(LogHelper.GetLogHelper()) 27 | { 28 | } 29 | 30 | protected override async Task> SyncProxyInternal(int pageIndex) 31 | { 32 | try 33 | { 34 | var result = await _client.GetStringAsync(PageUrlFormat.FormatWith(pageIndex)); 35 | 36 | var doc = await Parser.ParseAsync(result); 37 | return doc.QuerySelectorAll("#list>table>tbody>tr").Select(_ => new ProxyIpEntity 38 | { 39 | Ip = _.QuerySelectorAll("td")[0].TextContent, 40 | Port = _.QuerySelectorAll("td")[1].TextContent.To(), 41 | Location = _.QuerySelectorAll("td")[4].TextContent, 42 | Channel = ProxyProviderName 43 | }).ToArray(); 44 | } 45 | catch (Exception ex) 46 | { 47 | Logger.Error(ex); 48 | return Enumerable.Empty(); 49 | } 50 | } 51 | } 52 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/MayiProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using WeihanLi.Common.Helpers; 6 | 7 | namespace ProxyCrawler.ProxyProviders 8 | { 9 | /// 10 | /// http://www.mayidaili.com/free/anonymous/%E9%AB%98%E5%8C%BF/2 11 | /// 12 | internal class MayiProxyProvider : BaseProxyProvider 13 | { 14 | public MayiProxyProvider() : base(LogHelper.GetLogHelper()) 15 | { 16 | } 17 | 18 | public override string ProxyProviderName => "Mayi"; 19 | 20 | protected override string PageUrlFormat => "http://www.mayidaili.com/free/anonymous/%E9%AB%98%E5%8C%BF/{0}"; 21 | 22 | protected override Task> SyncProxyInternal(int pageIndex) 23 | { 24 | // TODO:识别网页上的图片端口号 25 | return Task.FromResult(Enumerable.Empty()); 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/SixIpProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using WeihanLi.Common.Helpers; 6 | 7 | namespace ProxyCrawler.ProxyProviders 8 | { 9 | internal class SixIpProxyProvider : BaseProxyProvider 10 | { 11 | public SixIpProxyProvider() : base(LogHelper.GetLogHelper()) 12 | { 13 | } 14 | 15 | public override string ProxyProviderName => "66Ip"; 16 | protected override string PageUrlFormat => ""; 17 | 18 | protected override Task> SyncProxyInternal(int pageIndex) 19 | { 20 | // TODO:66ip 21 | return Task.FromResult(Enumerable.Empty()); 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/XicidailiProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using WeihanLi.Common.Helpers; 6 | 7 | namespace ProxyCrawler.ProxyProviders 8 | { 9 | /// 10 | /// http://www.xicidaili.com/nn/{pageIndex} 11 | /// 12 | internal class XicidailiProxyProvider : BaseProxyProvider 13 | { 14 | public override string ProxyProviderName => "Xicidaili"; 15 | protected override string PageUrlFormat => "http://www.xicidaili.com/nn/{0}"; 16 | 17 | public XicidailiProxyProvider() : base(LogHelper.GetLogHelper()) 18 | { 19 | } 20 | 21 | protected override Task> SyncProxyInternal(int pageIndex) 22 | { 23 | // TODO:Xicidaili 24 | return Task.FromResult(Enumerable.Empty()); 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /ProxyCrawler/ProxyProviders/YundailiProxyProvider.cs: -------------------------------------------------------------------------------- 1 | using ProxyCrawler.Entity; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Threading.Tasks; 6 | using WeihanLi.Common.Helpers; 7 | using WeihanLi.Common.Log; 8 | using WeihanLi.Extensions; 9 | 10 | namespace ProxyCrawler.ProxyProviders 11 | { 12 | /// 13 | /// 14 | /// http://www.ip3366.net/free/?page={pageIndex} 15 | /// 16 | internal class YundailiProxyProvider : BaseProxyProvider 17 | { 18 | public override string ProxyProviderName => "Yundaili"; 19 | protected override string PageUrlFormat => "http://www.ip3366.net/free/?page={0}"; 20 | 21 | protected override async Task> SyncProxyInternal(int pageIndex) 22 | { 23 | try 24 | { 25 | var result = await Client.GetStringAsync(PageUrlFormat.FormatWith(pageIndex)); 26 | var doc = await Parser.ParseAsync(result); 27 | return doc.QuerySelectorAll("#list>table>tbody>tr").Select(tr => new ProxyIpEntity 28 | { 29 | Ip = tr.QuerySelectorAll("td")[0].TextContent.Trim(), 30 | Port = tr.QuerySelectorAll("td")[1].TextContent.Trim().To(), 31 | Channel = ProxyProviderName, 32 | Location = tr.QuerySelectorAll("td")[4].TextContent.Trim() 33 | }); 34 | } 35 | catch (Exception e) 36 | { 37 | Logger.Error(e); 38 | return Enumerable.Empty(); 39 | } 40 | } 41 | 42 | public YundailiProxyProvider() : base(LogHelper.GetLogHelper()) 43 | { 44 | } 45 | } 46 | } -------------------------------------------------------------------------------- /ProxyCrawler/QuartzService.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using CrystalQuartz.Application; 3 | using CrystalQuartz.Owin; 4 | using Microsoft.Owin.Hosting; 5 | using Quartz; 6 | using Quartz.Impl; 7 | using WeihanLi.Common.Helpers; 8 | using WeihanLi.Common.Log; 9 | 10 | namespace ProxyCrawler 11 | { 12 | public class QuartzService 13 | { 14 | private readonly IScheduler _scheduler = StdSchedulerFactory.GetDefaultScheduler(); 15 | private static readonly ILogHelper Logger = LogHelper.GetLogHelper(); 16 | 17 | private IDisposable _webApp; 18 | 19 | public bool Start() 20 | { 21 | if (_webApp != null) 22 | { 23 | _webApp.Dispose(); 24 | _webApp = null; 25 | } 26 | 27 | _scheduler.Start(); 28 | 29 | try 30 | { 31 | // https://stackoverflow.com/questions/27168432/the-server-factory-could-not-be-located-for-the-given-input-microsoft-owin-host 32 | // 33 | _webApp = WebApp.Start("http://*:8200", app => 34 | { 35 | app.UseCrystalQuartz(_scheduler, new CrystalQuartzOptions 36 | { 37 | Path = "/quartz" 38 | }); 39 | }); 40 | Logger.Info("webapp started"); 41 | } 42 | catch (Exception ex) 43 | { 44 | Logger.Error("服务启动 webApp 失败", ex); 45 | } 46 | 47 | return true; 48 | } 49 | 50 | public bool Stop() 51 | { 52 | if (_webApp != null) 53 | { 54 | _webApp.Dispose(); 55 | _webApp = null; 56 | } 57 | _scheduler.Shutdown(false); 58 | return true; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /ProxyCrawler/app.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 |
5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /ProxyCrawler/log4net.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /ProxyCrawler/quartz_jobs.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | true 5 | 6 | 7 | 8 | SyncProxyJob 9 | DefaultJobGroup 10 | 抓取代理 11 | ProxyCrawler.Job.SyncProxyJob, ProxyCrawler 12 | true 13 | false 14 | 15 | 16 | 17 | SyncProxyJobTrigger 18 | DefaultTriggerGroup 19 | SyncProxyJob 20 | DefaultJobGroup 21 | 3 14 5/2 * * ? 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ProxyCrawler 2 | 3 | ## Intro 4 | 5 | 代理爬虫,爬取网络上的代理并验证可用性,将可用的代理存放到 Redis 中的 windows 服务。 6 | 7 | ## Deploy 8 | 9 | 1. 安装 `Redis` 环境 10 | 11 | redis 下载 12 | 13 | - linux : 14 | - windows : 15 | 16 | 1. 在 Release 模式下编译项目 17 | 18 | 1. 在生成目录下执行命令 `ProxyCrawler install --sudo` 进行安装服务 19 | 20 | ## Contact 21 | 22 | Contact me: weihanli@outlook.com -------------------------------------------------------------------------------- /nuget.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | --------------------------------------------------------------------------------