├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── build.gradle.kts ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle.kts └── src └── main ├── kotlin ├── plugin │ └── httpclient │ │ ├── checkerproxy │ │ ├── CheckerProxy.kt │ │ └── CheckerProxyData.kt │ │ ├── freeproxyapi │ │ ├── FreeProxyApi.kt │ │ └── FreeProxyApiData.kt │ │ ├── geonode │ │ ├── GeoNode.kt │ │ └── GeoNodeData.kt │ │ ├── openproxylist │ │ └── OpenProxyList.kt │ │ ├── proxyscrape │ │ └── ProxyScrape.kt │ │ └── vpnfail │ │ ├── VpnFail.kt │ │ └── VpnFailData.kt └── scraper │ ├── Main.kt │ ├── net │ ├── ChromeWebDriver.kt │ └── CoroutinesHttpClient.kt │ ├── plugin │ ├── Plugin.kt │ ├── PluginFactory.kt │ └── hook │ │ └── ProxyWebsite.kt │ └── util │ ├── NetworkUtils.kt │ ├── ScraperExecutor.kt │ ├── ScraperThreadFactory.kt │ ├── data │ ├── ProxyData.kt │ └── ProxyOutputData.kt │ └── scripts │ └── GetAllCheckerProxies.kt └── resources └── logback.xml /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | HELP.md 2 | .gradle 3 | build/ 4 | !build/libs/* 5 | !gradle/wrapper/gradle-wrapper.jar 6 | !**/src/main/**/build/ 7 | !**/src/test/**/build/ 8 | 9 | ### STS ### 10 | .apt_generated 11 | .classpath 12 | .factorypath 13 | .project 14 | .settings 15 | .springBeans 16 | .sts4-cache 17 | bin/ 18 | !**/src/main/**/bin/ 19 | !**/src/test/**/bin/ 20 | 21 | ### IntelliJ IDEA ### 22 | .idea 23 | *.iws 24 | *.iml 25 | *.ipr 26 | out/ 27 | !**/src/main/**/out/ 28 | !**/src/test/**/out/ 29 | 30 | ### NetBeans ### 31 | /nbproject/private/ 32 | /nbbuild/ 33 | /dist/ 34 | /nbdist/ 35 | /.nb-gradle/ 36 | 37 | ### VS Code ### 38 | .vscode/ 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Kai o((ω ))o 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Proxy Scraper 2 | 3 | This is an application that scrapes various Proxy API Endpoints, then compiles the proxies into files within the `"/proxies/"` directory. 4 | 5 | You'll just need to open the application, a console will pop-up and then the proxies will be saved. 6 | 7 | Downloads 8 | --- 9 | - [Windows (x64)](https://github.com/jetkai/proxy-scraper/releases/download/1.0/windows-64-exec.zip) 10 | - [Any OS (With Java 11 or Greater)](https://github.com/jetkai/proxy-scraper/releases/download/1.0/proxy-scraper.jar) 11 | 12 | Output Files (After Running) 13 | --- 14 | - `/proxies/http.txt` 15 | - `/proxies/https.txt` 16 | - `/proxies/socks4.txt` 17 | - `/proxies/socks5.txt` 18 | - `/proxies/proxies.txt` 19 | - `/proxies/proxies.json` 20 | 21 | ![image](https://user-images.githubusercontent.com/26250917/219645876-df1a6609-c75d-479a-aa60-146d8a4847bc.png) 22 | 23 | 24 | Compile Requirements: 25 | --- 26 | ☑️ Kotlin 1.8.10 27 | 28 | ☑️ JDK-11 29 | 30 | ☑️ Gradle 8.0 31 | 32 | ☑️ IntelliJ IDEA 33 | -------------------------------------------------------------------------------- /build.gradle.kts: -------------------------------------------------------------------------------- 1 | plugins { 2 | kotlin("jvm") version "1.8.10" 3 | id("edu.sc.seis.launch4j") version "2.5.4" 4 | } 5 | 6 | group = "scraper" 7 | version = "1.0-SNAPSHOT" 8 | 9 | repositories { 10 | mavenCentral() 11 | } 12 | 13 | dependencies { 14 | //Jackson - Serialize/Deserialize 15 | implementation("com.fasterxml.jackson.core:jackson-core:2.14.2") 16 | implementation("com.fasterxml.jackson.core:jackson-annotations:2.14.2") 17 | implementation("com.fasterxml.jackson.module:jackson-module-kotlin:2.14.2") 18 | 19 | //Emulate Web Browser - Some proxy sites require JavaScript 20 | implementation("org.seleniumhq.selenium:selenium-chrome-driver:4.8.0") 21 | implementation("org.seleniumhq.selenium:selenium-devtools-v109:4.8.0") 22 | 23 | //Reflection for Plugins 24 | implementation("org.reflections:reflections:0.10.2") 25 | 26 | //Coroutines 27 | implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.6.4") 28 | 29 | //Logging 30 | implementation("io.github.microutils:kotlin-logging-jvm:3.0.5") 31 | implementation("ch.qos.logback:logback-classic:1.4.5") 32 | } 33 | 34 | tasks.test { 35 | useJUnitPlatform() 36 | } 37 | 38 | launch4j { 39 | mainClassName = "scraper.Main" 40 | headerType = "console" 41 | bundledJrePath = "jre11" 42 | bundledJre64Bit = true 43 | fileDescription = "Proxy Scraping Tool - https://github.com/jetkai/proxy-scraper" 44 | productName = "Proxy Scraper" 45 | copyright = "MIT License" 46 | companyName = "jetkai" 47 | version = "1.0" 48 | textVersion = "1.0" 49 | language = "ENGLISH_UK" 50 | } 51 | 52 | lateinit var jarFile: File 53 | 54 | tasks.withType { 55 | archiveFileName.set("proxy-scraper.jar") 56 | duplicatesStrategy = DuplicatesStrategy.EXCLUDE 57 | manifest { 58 | attributes["Main-Class"] = "scraper/Main" 59 | } 60 | from(sourceSets.main.get().output) 61 | dependsOn(configurations.runtimeClasspath) 62 | from({ 63 | configurations.runtimeClasspath.get().filter { it.name.endsWith("jar") }.map { zipTree(it) } 64 | }) 65 | jarFile = archiveFile.get().asFile 66 | } 67 | 68 | kotlin { 69 | jvmToolchain(11) 70 | } -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | kotlin.code.style=official 2 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jetkai/proxy-scraper/88f61c3253802ed6123919d279d7572dc47479a7/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.0-rc-5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Use "xargs" to parse quoted args. 209 | # 210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 211 | # 212 | # In Bash we could simply go: 213 | # 214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 215 | # set -- "${ARGS[@]}" "$@" 216 | # 217 | # but POSIX shell has neither arrays nor command substitution, so instead we 218 | # post-process each arg (as a line of input to sed) to backslash-escape any 219 | # character that might be a shell metacharacter, then use eval to reverse 220 | # that process (while maintaining the separation between arguments), and wrap 221 | # the whole thing up as a single "set" statement. 222 | # 223 | # This will of course break if any of these variables contains a newline or 224 | # an unmatched quote. 225 | # 226 | 227 | eval "set -- $( 228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 229 | xargs -n1 | 230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 231 | tr '\n' ' ' 232 | )" '"$@"' 233 | 234 | exec "$JAVACMD" "$@" 235 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | 2 | rootProject.name = "proxy-scraper" 3 | 4 | -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/checkerproxy/CheckerProxy.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.checkerproxy 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.kotlin.readValue 5 | import kotlinx.coroutines.Dispatchers 6 | import kotlinx.coroutines.async 7 | import kotlinx.coroutines.runBlocking 8 | import mu.KotlinLogging 9 | import scraper.net.CoroutinesHttpClient 10 | import scraper.plugin.Plugin 11 | import scraper.plugin.PluginFactory 12 | import scraper.plugin.hook.ProxyWebsite 13 | import scraper.util.NetworkUtils 14 | import scraper.util.data.ProxyData 15 | import java.net.http.HttpResponse 16 | import java.time.LocalDateTime 17 | import java.time.format.DateTimeFormatter 18 | 19 | /** 20 | * CheckerProxy - 16/02/2023 21 | * @author Kai 22 | * 23 | * Source: https://checkerproxy.net/ 24 | * Endpoint: https://checkerproxy.net/api/archive/yyyy-MM-dd 25 | * Method: GET 26 | * 27 | * ContentType: JSON 28 | */ 29 | @Suppress("unused") 30 | class CheckerProxy : Plugin, ProxyWebsite { 31 | 32 | private val dateFormat = DateTimeFormatter.ofPattern("yyyy-MM-dd") 33 | private val currentDate = LocalDateTime.now().format(dateFormat) 34 | 35 | private val endpointUrl = "https://checkerproxy.net/api/archive/$currentDate" 36 | 37 | private val logger = KotlinLogging.logger { } 38 | 39 | override val proxies : MutableList = mutableListOf() 40 | 41 | private var completed : Boolean = false 42 | 43 | override fun register() { 44 | PluginFactory.register(this) 45 | } 46 | 47 | override fun initialize() : Boolean { 48 | logger.info { "Initializing" } 49 | 50 | try { 51 | this.thenConnect() 52 | } catch (ex : Exception) { 53 | completed = true 54 | logger.error { ex.message } 55 | } 56 | 57 | return true 58 | } 59 | 60 | override fun thenConnect() { 61 | logger.info { "Connecting" } 62 | 63 | val response = mutableMapOf?>() 64 | runBlocking { 65 | val result = async(Dispatchers.IO) { 66 | val client = CoroutinesHttpClient() 67 | client.contentType = arrayOf("content-type", "application/json") 68 | val data = client.fetch(endpointUrl, null) 69 | data 70 | } 71 | response["json"] = result.await() 72 | } 73 | 74 | if (response.isNotEmpty()) { 75 | this.thenHandleData(response) 76 | } else { 77 | logger.error { "Failed to connect to $endpointUrl" } 78 | } 79 | } 80 | 81 | override fun thenHandleData(data : MutableMap) { 82 | logger.info { "Handling Data" } 83 | 84 | val mapper = ObjectMapper() 85 | val proxyList = mapper.readValue>((data.getValue("json") as HttpResponse).body()) 86 | for(proxy in proxyList) { 87 | if(!NetworkUtils.isValidIpAndPort(proxy.host)) { 88 | continue 89 | } 90 | val ip = proxy.host.split(":")[0] 91 | val port = proxy.host.split(":")[1].toInt() 92 | val protocol : String = when (proxy.protocolAsInt) { 93 | 1 -> { "HTTP" } 94 | 2 -> { "HTTPS" } 95 | 4 -> { "SOCKS5" } 96 | else -> { null } 97 | } ?: continue 98 | proxies.add(ProxyData(ip, port, protocol)) 99 | } 100 | 101 | logger.info { "Collected ${proxies.size} proxies" } 102 | completed = true 103 | this.finallyComplete() 104 | } 105 | 106 | override fun finallyComplete() : Boolean { 107 | return completed 108 | } 109 | 110 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/checkerproxy/CheckerProxyData.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.checkerproxy 2 | 3 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties 4 | import com.fasterxml.jackson.annotation.JsonInclude 5 | import com.fasterxml.jackson.annotation.JsonProperty 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize 7 | 8 | @JsonInclude(JsonInclude.Include.NON_NULL) 9 | @JsonIgnoreProperties(ignoreUnknown = true) 10 | @JsonSerialize 11 | data class CheckerProxyData( 12 | @JsonProperty("addr") //ip:port 13 | val host : String, 14 | @JsonProperty("type") //0=http 15 | val protocolAsInt : Int, 16 | @JsonProperty("timeout") 17 | val timeout : Int, 18 | @JsonProperty("post") 19 | val post : Boolean 20 | ) 21 | -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/freeproxyapi/FreeProxyApi.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.freeproxyapi 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.kotlin.readValue 5 | import kotlinx.coroutines.Dispatchers 6 | import kotlinx.coroutines.async 7 | import kotlinx.coroutines.runBlocking 8 | import mu.KotlinLogging 9 | import scraper.net.CoroutinesHttpClient 10 | import scraper.plugin.Plugin 11 | import scraper.plugin.PluginFactory 12 | import scraper.plugin.hook.ProxyWebsite 13 | import scraper.util.data.ProxyData 14 | import java.net.http.HttpResponse 15 | 16 | /** 17 | * GeoNode - 11/02/2023 18 | * @author Kai 19 | * 20 | * Source: https://freeproxyapi.com/ 21 | * Endpoint: https://public.freeproxyapi.com/api/Download/Json 22 | * Method: POST 23 | * 24 | * ContentType: JSON 25 | * Format: 26 | * [ 27 | * { 28 | * "Host": "39.104.62.128", 29 | * "Port": 8123, 30 | * "Type": 2, 31 | * "ProxyLevel": 1 32 | * } 33 | * ] 34 | */ 35 | @Suppress("unused") 36 | class FreeProxyApi : Plugin, ProxyWebsite { 37 | 38 | private val endpointUrl = "https://public.freeproxyapi.com/api/Download/Json" 39 | private val postData = "{\"types\":[],\"levels\":[],\"countries\":[],\"type\":\"json\",\"resultModel\":\"Mini\"}" 40 | 41 | private val logger = KotlinLogging.logger { } 42 | 43 | override val proxies : MutableList = mutableListOf() 44 | 45 | private var completed : Boolean = false 46 | 47 | override fun register() { 48 | PluginFactory.register(this) 49 | } 50 | 51 | override fun initialize() : Boolean { 52 | logger.info { "Initializing" } 53 | 54 | try { 55 | this.thenConnect() 56 | } catch (ex : Exception) { 57 | completed = true 58 | logger.error { ex.message } 59 | } 60 | 61 | return true 62 | } 63 | 64 | override fun thenConnect() { 65 | logger.info { "Connecting" } 66 | 67 | val response = mutableMapOf?>() 68 | runBlocking { 69 | val result = async(Dispatchers.IO) { 70 | val client = CoroutinesHttpClient() 71 | client.contentType = arrayOf("content-type", "application/json") 72 | val data = client.fetch(endpointUrl, postData) 73 | data 74 | } 75 | response["json"] = result.await() 76 | } 77 | 78 | if (response.isNotEmpty()) { 79 | this.thenHandleData(response) 80 | } else { 81 | logger.error { "Failed to connect to $endpointUrl" } 82 | } 83 | } 84 | 85 | override fun thenHandleData(data : MutableMap) { 86 | logger.info { "Handling Data" } 87 | 88 | val mapper = ObjectMapper() 89 | val values = mapper.readValue>((data.getValue("json") as HttpResponse).body()) 90 | for(value in values) { 91 | val protocol : String = when (value.protocolAsInt) { 92 | 1 -> "SOCKS4" 93 | 2 -> "SOCKS5" 94 | 3 -> "HTTP" 95 | 4 -> "HTTPS" 96 | else -> null 97 | } ?: continue 98 | 99 | val proxy = ProxyData(value.host, value.port, protocol) 100 | proxies.add(proxy) 101 | } 102 | 103 | logger.info { "Collected ${proxies.size} proxies" } 104 | completed = true 105 | this.finallyComplete() 106 | } 107 | 108 | override fun finallyComplete() : Boolean { 109 | return completed 110 | } 111 | 112 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/freeproxyapi/FreeProxyApiData.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.freeproxyapi 2 | 3 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties 4 | import com.fasterxml.jackson.annotation.JsonInclude 5 | import com.fasterxml.jackson.annotation.JsonProperty 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize 7 | 8 | @JsonInclude(JsonInclude.Include.NON_NULL) 9 | @JsonIgnoreProperties(ignoreUnknown = true) 10 | @JsonSerialize 11 | data class FreeProxyApiData( 12 | @JsonProperty("Host") 13 | val host : String, 14 | @JsonProperty("Port") 15 | val port : Int, 16 | @JsonProperty("Type") 17 | val protocolAsInt : Int, 18 | @JsonProperty("ProxyLevel") 19 | val proxyLevel : Int 20 | ) 21 | -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/geonode/GeoNode.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.geonode 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.kotlin.readValue 5 | import kotlinx.coroutines.Dispatchers 6 | import kotlinx.coroutines.async 7 | import kotlinx.coroutines.runBlocking 8 | import mu.KotlinLogging 9 | import scraper.net.CoroutinesHttpClient 10 | import scraper.plugin.Plugin 11 | import scraper.plugin.PluginFactory 12 | import scraper.plugin.hook.ProxyWebsite 13 | import scraper.util.data.ProxyData 14 | import java.net.http.HttpResponse 15 | 16 | /** 17 | * GeoNode - 16/02/2023 18 | * @author Kai 19 | * 20 | * Description: Grabs 2000 proxies (HTTP, HTTPS, SOCKS4 & SOCKS5) 21 | * 22 | * Source: https://proxylist.geonode.com/ 23 | * Endpoint (HTTP): https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http 24 | * Endpoint (HTTPS): https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http 25 | * Endpoint (SOCKS4): https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http 26 | * Endpoint (SOCKS5): https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http 27 | * Endpoint (ALL): https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http,https,socks4,socks5 28 | * Method: GET 29 | * 30 | * ContentType: JSON 31 | * Format: 32 | * { 33 | * "data": [ 34 | * { 35 | * "_id": "60d65a0cce5b3bb0e93f237b", 36 | * "ip": "136.243.174.243", 37 | * "port": "1080", 38 | * "anonymityLevel": "elite", 39 | * "asn": "AS24940", 40 | * "city": "Falkenstein", 41 | * "country": "DE", 42 | * "created_at": "2021-06-25T22:34:52.034Z", 43 | * "google": false, 44 | * "hostName": null, 45 | * "isp": "Hetzner Online GmbH", 46 | * "lastChecked": 1676567264, 47 | * "latency": 15.8, 48 | * "org": "Hetzner", 49 | * "protocols": [ 50 | * "socks4" 51 | * ], 52 | * "region": null, 53 | * "responseTime": 1435, 54 | * "speed": 1, 55 | * "updated_at": "2023-02-16T17:07:44.778Z", 56 | * "workingPercent": null, 57 | * "upTime": 99.94736842105263, 58 | * "upTimeSuccessCount": 7596, 59 | * "upTimeTryCount": 7600 60 | * } 61 | * ] 62 | * } 63 | */ 64 | @Suppress("unused") 65 | class GeoNode : Plugin, ProxyWebsite { 66 | 67 | private val endpointUrls = mapOf( 68 | "HTTP" to "https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=http", 69 | "HTTPS" to "https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=https", 70 | "SOCKS4" to "https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=socks4", 71 | "SOCKS5" to "https://proxylist.geonode.com/api/proxy-list?limit=500&sort_by=lastChecked&sort_type=desc&filterUpTime=90&protocols=socks5" 72 | ) 73 | 74 | private val logger = KotlinLogging.logger { } 75 | 76 | override val proxies : MutableList = mutableListOf() 77 | 78 | private var completed : Boolean = false 79 | 80 | override fun register() { 81 | PluginFactory.register(this) 82 | } 83 | 84 | override fun initialize() : Boolean { 85 | logger.info { "Initializing" } 86 | 87 | try { 88 | this.thenConnect() 89 | } catch (ex : Exception) { 90 | completed = true 91 | logger.error { ex.message } 92 | } 93 | 94 | return true 95 | } 96 | 97 | override fun thenConnect() { 98 | logger.info { "Connecting" } 99 | 100 | val responses = mutableMapOf?>() 101 | for (endpointEntry in endpointUrls.entries.iterator()) { 102 | runBlocking { 103 | val result = async(Dispatchers.IO) { 104 | val data = CoroutinesHttpClient().fetch(endpointEntry.value, null) 105 | data 106 | } 107 | responses[endpointEntry.key] = result.await() 108 | } 109 | } 110 | 111 | this.thenHandleData(responses) 112 | } 113 | 114 | override fun thenHandleData(data : MutableMap) { 115 | logger.info { "Handling Data" } 116 | 117 | for(entry in data.entries.iterator()) { 118 | if (entry.value !is HttpResponse<*>) { 119 | continue 120 | } 121 | val json = (entry.value as HttpResponse).body() 122 | val mapper = ObjectMapper() 123 | val geonodeArray = mapper.readValue(json) 124 | geonodeArray.data 125 | .filterNot { it.protocols.isEmpty() } 126 | .mapTo(proxies) { ProxyData(it.host, it.port, it.protocols[0]) } 127 | } 128 | 129 | logger.info { "Collected ${proxies.size} proxies" } 130 | completed = true 131 | this.finallyComplete() 132 | } 133 | 134 | override fun finallyComplete() : Boolean { 135 | return completed 136 | } 137 | 138 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/geonode/GeoNodeData.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.geonode 2 | 3 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties 4 | import com.fasterxml.jackson.annotation.JsonInclude 5 | import com.fasterxml.jackson.annotation.JsonProperty 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize 7 | 8 | @JsonInclude(JsonInclude.Include.NON_NULL) 9 | @JsonIgnoreProperties(ignoreUnknown = true) 10 | @JsonSerialize 11 | data class GeoNodeData(@JsonProperty("data") val data : List) 12 | 13 | @JsonInclude(JsonInclude.Include.NON_NULL) 14 | @JsonIgnoreProperties(ignoreUnknown = true) 15 | @JsonSerialize 16 | data class GeoNodeDataArray( 17 | @JsonProperty("ip") 18 | val host : String, 19 | @JsonProperty("port") 20 | val port : Int, 21 | @JsonProperty("protocols") 22 | val protocols : List, 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/openproxylist/OpenProxyList.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.openproxylist 2 | 3 | import kotlinx.coroutines.Dispatchers 4 | import kotlinx.coroutines.async 5 | import kotlinx.coroutines.runBlocking 6 | import mu.KotlinLogging 7 | import scraper.net.CoroutinesHttpClient 8 | import scraper.plugin.Plugin 9 | import scraper.plugin.PluginFactory 10 | import scraper.plugin.hook.ProxyWebsite 11 | import scraper.util.NetworkUtils 12 | import scraper.util.data.ProxyData 13 | import java.net.http.HttpResponse 14 | 15 | /** 16 | * OpenProxyList - 17/02/2023 17 | * @author Kai 18 | * 19 | * Source: https://openproxylist.xyz/ 20 | * Endpoint (HTTP): https://openproxylist.xyz/http.txt 21 | * Endpoint (SOCKS4): https://openproxylist.xyz/socks4.txt 22 | * Endpoint (SOCKS5): https://openproxylist.xyz/socks5.txt 23 | * Endpoint (ALL): https://openproxylist.xyz/all.txt 24 | * Method: GET 25 | * 26 | * ContentType: Plain-Text 27 | * Format: 28 | * 127.0.0.1:80 29 | * 127.0.0.2:8080 30 | */ 31 | @Suppress("unused") 32 | class OpenProxyList : Plugin, ProxyWebsite { 33 | 34 | private val endpointUrls = mapOf( 35 | "HTTP" to "https://openproxylist.xyz/http.txt", 36 | "SOCKS4" to "https://openproxylist.xyz/socks4.txt", 37 | "SOCKS5" to "https://openproxylist.xyz/socks5.txt" 38 | ) 39 | 40 | private val logger = KotlinLogging.logger { } 41 | 42 | override val proxies : MutableList = mutableListOf() 43 | 44 | private var completed : Boolean = false 45 | 46 | override fun register() { 47 | PluginFactory.register(this) 48 | } 49 | 50 | override fun initialize() : Boolean { 51 | logger.info { "Initializing" } 52 | 53 | try { 54 | this.thenConnect() 55 | } catch (ex : Exception) { 56 | completed = true 57 | logger.error { ex.message } 58 | } 59 | 60 | return true 61 | } 62 | 63 | override fun thenConnect() { 64 | logger.info { "Connecting" } 65 | 66 | val responses = mutableMapOf?>() 67 | for (endpointEntry in endpointUrls.entries.iterator()) { 68 | runBlocking { 69 | val result = async(Dispatchers.IO) { 70 | val data = CoroutinesHttpClient().fetch(endpointEntry.value, null) 71 | data 72 | } 73 | responses[endpointEntry.key] = result.await() 74 | } 75 | } 76 | 77 | this.thenHandleData(responses) 78 | } 79 | 80 | override fun thenHandleData(data : MutableMap) { 81 | logger.info { "Handling Data" } 82 | 83 | for(entry in data.entries.iterator()) { 84 | val type = entry.key 85 | if(entry.value is HttpResponse<*>) { 86 | val proxyIpPortArray = (entry.value as HttpResponse).body().split("\n") 87 | for (proxyIpPort in proxyIpPortArray) { 88 | if(!NetworkUtils.isValidIpAndPort(proxyIpPort)) { 89 | continue 90 | } 91 | val ip = proxyIpPort.split(":")[0] 92 | val port = proxyIpPort.split(":")[1] 93 | val proxy = ProxyData(ip, port.toInt(), type) 94 | proxies.add(proxy) 95 | } 96 | } 97 | 98 | } 99 | 100 | logger.info { "Collected ${proxies.size} proxies" } 101 | completed = true 102 | this.finallyComplete() 103 | } 104 | 105 | override fun finallyComplete() : Boolean { 106 | return completed 107 | } 108 | 109 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/proxyscrape/ProxyScrape.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.proxyscrape 2 | 3 | import kotlinx.coroutines.Dispatchers 4 | import kotlinx.coroutines.async 5 | import kotlinx.coroutines.runBlocking 6 | import mu.KotlinLogging 7 | import scraper.net.CoroutinesHttpClient 8 | import scraper.plugin.Plugin 9 | import scraper.plugin.PluginFactory 10 | import scraper.plugin.hook.ProxyWebsite 11 | import scraper.util.NetworkUtils 12 | import scraper.util.data.ProxyData 13 | import java.net.http.HttpResponse 14 | 15 | /** 16 | * ProxyScrape - 12/02/2023 17 | * @author Kai 18 | * 19 | * Source: https://proxyscrape.com/ 20 | * Endpoint (HTTP): https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000 21 | * Endpoint (HTTPS): https://api.proxyscrape.com/v2/?request=displayproxies&protocol=https&timeout=10000 22 | * Endpoint (SOCKS4): https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000 23 | * Endpoint (SOCKS5): https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000 24 | * Endpoint (ALL): https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http,https,socks4,socks5&timeout=10000 25 | * Method: GET 26 | * 27 | * ContentType: Plain-Text 28 | * Format: 29 | * 127.0.0.1:80 30 | * 127.0.0.2:8080 31 | */ 32 | @Suppress("unused") 33 | class ProxyScrape : Plugin, ProxyWebsite { 34 | 35 | private val endpointUrls = mapOf( 36 | "HTTP" to "https://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000", 37 | "HTTPS" to "https://api.proxyscrape.com/v2/?request=displayproxies&protocol=https&timeout=10000", 38 | "SOCKS4" to "https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000", 39 | "SOCKS5" to "https://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000" 40 | ) 41 | 42 | private val logger = KotlinLogging.logger { } 43 | 44 | override val proxies : MutableList = mutableListOf() 45 | 46 | private var completed : Boolean = false 47 | 48 | override fun register() { 49 | PluginFactory.register(this) 50 | } 51 | 52 | override fun initialize() : Boolean { 53 | logger.info { "Initializing" } 54 | 55 | try { 56 | this.thenConnect() 57 | } catch (ex : Exception) { 58 | completed = true 59 | logger.error { ex.message } 60 | } 61 | 62 | return true 63 | } 64 | 65 | override fun thenConnect() { 66 | logger.info { "Connecting" } 67 | 68 | val responses = mutableMapOf?>() 69 | for (endpointEntry in endpointUrls.entries.iterator()) { 70 | runBlocking { 71 | val result = async(Dispatchers.IO) { 72 | val data = CoroutinesHttpClient().fetch(endpointEntry.value, null) 73 | data 74 | } 75 | responses[endpointEntry.key] = result.await() 76 | } 77 | } 78 | 79 | this.thenHandleData(responses) 80 | } 81 | 82 | override fun thenHandleData(data : MutableMap) { 83 | logger.info { "Handling Data" } 84 | 85 | for(entry in data.entries.iterator()) { 86 | val type = entry.key 87 | if(entry.value is HttpResponse<*>) { 88 | val proxyIpPortArray = (entry.value as HttpResponse).body().split("\r\n") 89 | for (proxyIpPort in proxyIpPortArray) { 90 | if(!NetworkUtils.isValidIpAndPort(proxyIpPort)) { 91 | continue 92 | } 93 | val ip = proxyIpPort.split(":")[0] 94 | val port = proxyIpPort.split(":")[1] 95 | val proxy = ProxyData(ip, port.toInt(), type) 96 | proxies.add(proxy) 97 | } 98 | } 99 | 100 | } 101 | 102 | logger.info { "Collected ${proxies.size} proxies" } 103 | completed = true 104 | this.finallyComplete() 105 | } 106 | 107 | override fun finallyComplete() : Boolean { 108 | return completed 109 | } 110 | 111 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/vpnfail/VpnFail.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.vpnfail 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import com.fasterxml.jackson.module.kotlin.readValue 5 | import kotlinx.coroutines.Dispatchers 6 | import kotlinx.coroutines.async 7 | import kotlinx.coroutines.runBlocking 8 | import mu.KotlinLogging 9 | import scraper.net.CoroutinesHttpClient 10 | import scraper.plugin.Plugin 11 | import scraper.plugin.PluginFactory 12 | import scraper.plugin.hook.ProxyWebsite 13 | import scraper.util.NetworkUtils 14 | import scraper.util.data.ProxyData 15 | import java.net.http.HttpResponse 16 | 17 | /** 18 | * GeoNode - 17/02/2023 19 | * @author Kai 20 | * 21 | * Source: https://vpn.fail/ 22 | * Endpoint: https://vpn.fail/free-proxy/json 23 | * Method: GET 24 | * 25 | * ContentType: JSON 26 | * Format: 27 | * [ 28 | * { 29 | * "proxy": "39.104.62.128:8123", 30 | * "type": "socks5", 31 | * } 32 | * ] 33 | */ 34 | @Suppress("unused") 35 | class VpnFail : Plugin, ProxyWebsite { 36 | 37 | private val endpointUrl = "https://vpn.fail/free-proxy/json" 38 | 39 | private val logger = KotlinLogging.logger { } 40 | 41 | override val proxies : MutableList = mutableListOf() 42 | 43 | private var completed : Boolean = false 44 | 45 | override fun register() { 46 | PluginFactory.register(this) 47 | } 48 | 49 | override fun initialize() : Boolean { 50 | logger.info { "Initializing" } 51 | 52 | try { 53 | this.thenConnect() 54 | } catch (ex : Exception) { 55 | completed = true 56 | logger.error { ex.message } 57 | } 58 | 59 | return true 60 | } 61 | 62 | override fun thenConnect() { 63 | logger.info { "Connecting" } 64 | 65 | val response = mutableMapOf?>() 66 | runBlocking { 67 | val result = async(Dispatchers.IO) { 68 | val client = CoroutinesHttpClient() 69 | client.contentType = arrayOf("content-type", "application/json") 70 | val data = client.fetch(endpointUrl, null) 71 | data 72 | } 73 | response["json"] = result.await() 74 | } 75 | 76 | if (response.isNotEmpty()) { 77 | this.thenHandleData(response) 78 | } else { 79 | logger.error { "Failed to connect to $endpointUrl" } 80 | } 81 | } 82 | 83 | override fun thenHandleData(data : MutableMap) { 84 | logger.info { "Handling Data" } 85 | 86 | val mapper = ObjectMapper() 87 | val values = mapper.readValue>((data.getValue("json") as HttpResponse).body()) 88 | val allowedProtocols = arrayOf("socks4", "socks5", "http", "https") 89 | 90 | for(value in values) { 91 | if(!NetworkUtils.isValidIpAndPort(value.host) || !allowedProtocols.contains(value.protocol)) { 92 | continue 93 | } 94 | val ip = value.host.split(":")[0] 95 | val port = value.host.split(":")[1] 96 | val proxy = ProxyData(ip, port.toInt(), value.protocol) 97 | proxies.add(proxy) 98 | } 99 | 100 | logger.info { "Collected ${proxies.size} proxies" } 101 | completed = true 102 | this.finallyComplete() 103 | } 104 | 105 | override fun finallyComplete() : Boolean { 106 | return completed 107 | } 108 | 109 | } -------------------------------------------------------------------------------- /src/main/kotlin/plugin/httpclient/vpnfail/VpnFailData.kt: -------------------------------------------------------------------------------- 1 | package plugin.httpclient.vpnfail 2 | 3 | import com.fasterxml.jackson.annotation.JsonIgnoreProperties 4 | import com.fasterxml.jackson.annotation.JsonInclude 5 | import com.fasterxml.jackson.annotation.JsonProperty 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize 7 | 8 | @JsonInclude(JsonInclude.Include.NON_NULL) 9 | @JsonIgnoreProperties(ignoreUnknown = true) 10 | @JsonSerialize 11 | data class VpnFailData( 12 | @JsonProperty("proxy") 13 | val host : String, 14 | @JsonProperty("type") 15 | val protocol : String 16 | ) 17 | -------------------------------------------------------------------------------- /src/main/kotlin/scraper/Main.kt: -------------------------------------------------------------------------------- 1 | package scraper 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper 4 | import mu.KotlinLogging 5 | import scraper.plugin.PluginFactory 6 | import scraper.util.ScraperExecutor 7 | import scraper.util.data.ProxyOutputData 8 | import java.nio.file.Files 9 | import java.nio.file.Path 10 | import kotlin.io.path.exists 11 | import kotlin.system.exitProcess 12 | 13 | /** 14 | * Main - 12/02/2023 15 | * @author Kai 16 | * 17 | * Description: Main class, runs the application 18 | **/ 19 | class Main { 20 | 21 | private val logger = KotlinLogging.logger { } 22 | 23 | companion object { 24 | var outputPath = Path.of("proxies") 25 | @JvmStatic 26 | fun main(args : Array) { 27 | if(args.contentToString().contains("proxybuilder")) { 28 | outputPath = Path.of("/home/proxybuilder/IntelliJProjects/proxy-builder-2/proxies") 29 | } 30 | Main().init() 31 | } 32 | } 33 | 34 | fun init() { 35 | PluginFactory.init() 36 | for(proxy in PluginFactory.proxyWebsites) { 37 | ScraperExecutor.submitTask(proxy::initialize) 38 | } 39 | val websitesLength = PluginFactory.proxyWebsites.size 40 | while(PluginFactory.proxyWebsites.count { it.finallyComplete() } != websitesLength) { 41 | val sites = PluginFactory.proxyWebsites.count { it.finallyComplete() } 42 | logger.info { "Scraped [$sites/${PluginFactory.proxyWebsites.size}] sites, waiting on remaining sites..." } 43 | Thread.sleep(1000L) 44 | } 45 | output() 46 | exitProcess(0) 47 | } 48 | 49 | private fun output() { 50 | val mapper = ObjectMapper() 51 | 52 | val proxyList = ProxyOutputData(mutableListOf(), mutableListOf(), mutableListOf(), mutableListOf()) 53 | 54 | for(website in PluginFactory.proxyWebsites) { 55 | val websiteProxies = website.proxies.distinctBy { it.values() }.toMutableList() 56 | for(proxy in websiteProxies) { 57 | when (proxy.protocol) { 58 | "HTTP" -> { proxyList.http.add(proxy.ip + ":" + proxy.port) } 59 | "HTTPS" -> { proxyList.https.add(proxy.ip + ":" + proxy.port) } 60 | "SOCKS4" -> { proxyList.socks4.add(proxy.ip + ":" + proxy.port) } 61 | "SOCKS5" -> { proxyList.socks5.add(proxy.ip + ":" + proxy.port) } 62 | } 63 | } 64 | } 65 | 66 | val allProxies = mutableListOf() 67 | allProxies.addAll(proxyList.http) 68 | allProxies.addAll(proxyList.https) 69 | allProxies.addAll(proxyList.socks4) 70 | allProxies.addAll(proxyList.socks5) 71 | 72 | if(!outputPath.exists()) { 73 | Files.createDirectory(outputPath) 74 | } 75 | 76 | val entries = mapOf( 77 | proxyList.http to Path.of("$outputPath/http.txt"), 78 | proxyList.https to Path.of("$outputPath/https.txt"), 79 | proxyList.socks4 to Path.of("$outputPath/socks4.txt"), 80 | proxyList.socks5 to Path.of("$outputPath/socks5.txt"), 81 | allProxies to Path.of("$outputPath/proxies.txt"), 82 | ) 83 | 84 | entries.iterator().forEach { entry -> 85 | Files.write(entry.value, entry.key) 86 | } 87 | 88 | val jsonFile = Path.of("$outputPath/proxies.json").toFile() 89 | mapper.writeValue(jsonFile, proxyList) 90 | 91 | logger.info { "Total unique proxies collected: ${allProxies.size}" } 92 | logger.info { "HTTP:[${proxyList.http.size}] | HTTPS:[${proxyList.https.size}] " + 93 | "| SOCKS4:[${proxyList.socks4.size}] | SOCKS5:[${proxyList.socks5.size}]" } 94 | } 95 | 96 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/net/ChromeWebDriver.kt: -------------------------------------------------------------------------------- 1 | package scraper.net 2 | 3 | import com.fasterxml.jackson.core.JsonParseException 4 | import com.fasterxml.jackson.databind.ObjectMapper 5 | import com.fasterxml.jackson.module.kotlin.readValue 6 | import com.google.common.collect.ImmutableMap 7 | import kotlinx.coroutines.Dispatchers 8 | import kotlinx.coroutines.withContext 9 | import org.openqa.selenium.chrome.ChromeDriver 10 | import org.openqa.selenium.chrome.ChromeOptions 11 | import org.openqa.selenium.devtools.Command 12 | import org.openqa.selenium.devtools.DevTools 13 | import org.openqa.selenium.devtools.v109.network.Network 14 | 15 | /** 16 | * ChromeWebDriver - 12/02/2023 17 | * @author Kai 18 | * 19 | * Description: Used for more complex proxy sites, ones that require JavaScript & obfuscate their data 20 | **/ 21 | class ChromeWebDriver { 22 | 23 | private val options = ChromeOptions() 24 | private var driver : ChromeDriver ? = null 25 | private var devTools : DevTools? = null 26 | private var responseBody : String? = null 27 | 28 | init { createInstance() } 29 | 30 | private fun createInstance() { 31 | //Create ObjectMapper 32 | val mapper = ObjectMapper() 33 | //Config Options 34 | options.addArguments("-headless", "start-maximized") 35 | //Init Driver 36 | driver = ChromeDriver(options) 37 | //Create DevTools Session 38 | devTools = driver?.devTools 39 | devTools?.createSession() 40 | devTools?.send(Command("Network.enable", ImmutableMap.of())) 41 | //Adds a listener to read all data received, parse json from network log for proxies (if there is no public api) 42 | devTools?.addListener(Network.dataReceived()) { received -> 43 | val networkResponseBody = Network.getResponseBody(received.requestId) 44 | val body = devTools?.send(networkResponseBody)?.body 45 | if(body != null && received.dataLength > 0) { 46 | try { 47 | mapper.readValue(body) 48 | responseBody = body 49 | } catch (ex : JsonParseException) { 50 | //Unable to parse because data is not JSON 51 | } 52 | } 53 | } 54 | } 55 | 56 | suspend fun browse(url : String) : String? { 57 | val driver = driver ?: return null 58 | //Ensure the browser is fully maximized 59 | driver.manage().window().maximize() 60 | return withContext(Dispatchers.IO) { 61 | //Browse the URL 62 | driver.get(url) 63 | //Close+quit driver 64 | driver.close() 65 | driver.quit() 66 | //Finally return responseBody 67 | responseBody 68 | } 69 | } 70 | 71 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/net/CoroutinesHttpClient.kt: -------------------------------------------------------------------------------- 1 | package scraper.net 2 | 3 | import kotlinx.coroutines.Dispatchers 4 | import kotlinx.coroutines.withContext 5 | import java.net.URI 6 | import java.net.http.HttpClient 7 | import java.net.http.HttpRequest 8 | import java.net.http.HttpResponse 9 | import java.net.http.HttpTimeoutException 10 | import java.time.Duration 11 | 12 | /** 13 | * CoroutinesHttpClient - 12/02/2023 14 | * @author Kai 15 | * 16 | * Description: Used for simple web calls, such as sending GET/POST data for text content (JSON etc) 17 | **/ 18 | class CoroutinesHttpClient { 19 | 20 | companion object { 21 | 22 | val timeout : Duration = Duration.ofSeconds(15) 23 | 24 | private val httpClient : HttpClient = HttpClient.newBuilder() 25 | .version(HttpClient.Version.HTTP_2) 26 | .connectTimeout(timeout) 27 | .build() 28 | 29 | } 30 | 31 | var userAgent = arrayOf("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0") 32 | var accept = arrayOf("accept", "*/*") 33 | var acceptLanguage = arrayOf("accept-language", "en-US,en;q=0.9") 34 | var acceptEncoding = arrayOf("accept-encoding", "text/plain; charset=UTF-8") 35 | var contentType = arrayOf("content-type", "text/plain; charset=UTF-8") 36 | 37 | suspend fun fetch(url : String, postData : String?) : HttpResponse? { 38 | val request : HttpRequest = if(postData == null) { 39 | HttpRequest.newBuilder() 40 | .uri(URI.create(url)) 41 | .timeout(timeout) 42 | .headers(*userAgent) 43 | .headers(*accept) 44 | .headers(*acceptLanguage) 45 | .headers(*acceptEncoding) 46 | .headers(*contentType) 47 | .GET() 48 | .build() 49 | } else { 50 | HttpRequest.newBuilder() 51 | .uri(URI.create(url)) 52 | .timeout(timeout) 53 | .headers(*userAgent) 54 | .headers(*accept) 55 | .headers(*acceptLanguage) 56 | .headers(*acceptEncoding) 57 | .headers(*contentType) 58 | .POST(HttpRequest.BodyPublishers.ofString(postData)) 59 | .build() 60 | } 61 | val response = httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) 62 | return withContext(Dispatchers.IO) { 63 | try { 64 | response.get() 65 | } catch (timeout : HttpTimeoutException) { 66 | null 67 | } 68 | } 69 | } 70 | 71 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/plugin/Plugin.kt: -------------------------------------------------------------------------------- 1 | package scraper.plugin 2 | 3 | /** 4 | * Plugin - 12/02/2023 5 | * @author Kai 6 | * 7 | * Description: Interface for Plugin, this will append the class to a List within PluginFactory 8 | **/ 9 | interface Plugin { 10 | 11 | fun register() 12 | 13 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/plugin/PluginFactory.kt: -------------------------------------------------------------------------------- 1 | package scraper.plugin 2 | 3 | import mu.KotlinLogging 4 | import org.reflections.Reflections 5 | import scraper.plugin.hook.ProxyWebsite 6 | import java.lang.reflect.Modifier 7 | 8 | /** 9 | * PluginFactory - 12/02/2023 10 | * @author Kai 11 | * 12 | * Description: This is used for loading the Proxy Website Plugins from /src/main/kotlin/plugins/ 13 | * Makes it easier for APIs to be added / removed 14 | **/ 15 | object PluginFactory { 16 | 17 | private val logger = KotlinLogging.logger { } 18 | 19 | private val plugins : MutableMap = mutableMapOf() 20 | val proxyWebsites : MutableList = mutableListOf() 21 | 22 | fun init() { 23 | logger.info { "Loading Plugin Factory..." } 24 | val pluginClasses : Set> = Reflections("plugin") 25 | .getSubTypesOf(Plugin::class.java) 26 | for (classz in pluginClasses) { 27 | if (Modifier.isAbstract(classz.modifiers)) { 28 | continue 29 | } 30 | try { 31 | val instance: Plugin = classz.getDeclaredConstructor().newInstance() 32 | instance.register() 33 | plugins[classz.name] = instance 34 | } catch (t : Throwable) { 35 | logger.error {"Failed to initialize website plugin: ${classz.simpleName}." } 36 | } 37 | } 38 | logger.info { "Loaded " + plugins.size + " website plugins." } 39 | } 40 | 41 | fun register(proxyWebsite : ProxyWebsite) { 42 | proxyWebsites.add(proxyWebsite) 43 | } 44 | 45 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/plugin/hook/ProxyWebsite.kt: -------------------------------------------------------------------------------- 1 | package scraper.plugin.hook 2 | 3 | import scraper.util.data.ProxyData 4 | 5 | /** 6 | * ProxyWebsite - 12/02/2023 7 | * @author Kai 8 | * 9 | * Description: Interface for the Website Plugins within /src/main/kotlin/plugin/ 10 | * Plugins will follow this interface format 11 | **/ 12 | interface ProxyWebsite { 13 | 14 | val proxies : MutableList 15 | 16 | fun initialize() : Boolean 17 | 18 | fun thenConnect() 19 | 20 | fun thenHandleData(data : MutableMap) 21 | 22 | fun finallyComplete() : Boolean = false 23 | 24 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/NetworkUtils.kt: -------------------------------------------------------------------------------- 1 | package scraper.util 2 | 3 | import java.util.regex.Pattern 4 | 5 | /** 6 | * NetworkUtils - 17/02/2023 7 | * @author Kai 8 | * 9 | * Description: Any reusable network functions are in this class 10 | **/ 11 | object NetworkUtils { 12 | 13 | fun isValidIpAddress(ipAddress : String) : Boolean { 14 | val pattern = Pattern.compile("^((25[0-5]|(2[0-4]|1\\d|[1-9]|)\\d)\\.?\\b){4}\$") 15 | return pattern.matcher(ipAddress).matches() 16 | } 17 | 18 | fun isValidIpAndPort(host : String): Boolean { 19 | val pattern = Pattern.compile("^((25[0-5]|(2[0-4]|1\\d|[1-9]|)\\d)\\.?\\b){4}:\\d{1,5}\$") 20 | return pattern.matcher(host).matches() 21 | } 22 | 23 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/ScraperExecutor.kt: -------------------------------------------------------------------------------- 1 | package scraper.util 2 | 3 | import java.util.concurrent.ExecutorService 4 | import java.util.concurrent.Executors 5 | 6 | /** 7 | * ScraperExecutor - 17/02/2023 8 | * @author Kai 9 | * 10 | * Description: This is used for multi-threading to speed up the application 11 | **/ 12 | object ScraperExecutor { 13 | 14 | private val PROCESSORS = Runtime.getRuntime().availableProcessors() + 1 15 | 16 | private val scraperWorker : ExecutorService = Executors.newFixedThreadPool(PROCESSORS, ScraperThreadFactory("Scraper-Worker")) 17 | 18 | fun submitTask(task : Runnable) { 19 | scraperWorker.submit(task) 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/ScraperThreadFactory.kt: -------------------------------------------------------------------------------- 1 | package scraper.util 2 | 3 | import java.util.concurrent.ThreadFactory 4 | import java.util.concurrent.atomic.AtomicInteger 5 | 6 | /** 7 | * ScraperThreadFactory - 12/02/2023 8 | * @author Kai 9 | * 10 | * Description: This is used for multi-threading to speed up the application 11 | **/ 12 | class ScraperThreadFactory(private val name : String) : ThreadFactory { 13 | private val threadCount = AtomicInteger() 14 | override fun newThread(r : Runnable): Thread { 15 | return Thread(r, name + "-" + threadCount.getAndIncrement()) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/data/ProxyData.kt: -------------------------------------------------------------------------------- 1 | package scraper.util.data 2 | 3 | data class ProxyData(var ip : String, var port : Int, var protocol : String) { 4 | fun values() : String { 5 | return ip + port + protocol 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/data/ProxyOutputData.kt: -------------------------------------------------------------------------------- 1 | package scraper.util.data 2 | 3 | data class ProxyOutputData( 4 | val http : MutableList, 5 | val https : MutableList, 6 | val socks4 : MutableList, 7 | val socks5 : MutableList 8 | ) -------------------------------------------------------------------------------- /src/main/kotlin/scraper/util/scripts/GetAllCheckerProxies.kt: -------------------------------------------------------------------------------- 1 | package scraper.util.scripts 2 | 3 | import com.fasterxml.jackson.databind.JsonMappingException 4 | import com.fasterxml.jackson.databind.ObjectMapper 5 | import com.fasterxml.jackson.module.kotlin.readValue 6 | import kotlinx.coroutines.Dispatchers 7 | import kotlinx.coroutines.async 8 | import kotlinx.coroutines.runBlocking 9 | import plugin.httpclient.checkerproxy.CheckerProxyData 10 | import scraper.net.CoroutinesHttpClient 11 | import scraper.util.data.ProxyData 12 | import scraper.util.data.ProxyOutputData 13 | import java.net.http.HttpResponse 14 | import java.nio.file.Files 15 | import java.nio.file.Path 16 | import java.time.LocalDate 17 | import java.time.format.DateTimeFormatter 18 | import kotlin.io.path.exists 19 | import kotlin.system.exitProcess 20 | 21 | /** 22 | * GetAllCheckerProxies - 17/02/2023 23 | * @author Kai 24 | * 25 | * Description: Quick and easy script for getting all the proxies from CheckerProxy's API 26 | **/ 27 | private const val endpoint = "https://checkerproxy.net/api/archive/" 28 | 29 | private val proxies : MutableList = mutableListOf() 30 | 31 | fun main() { 32 | loopThroughDates() 33 | } 34 | 35 | private fun loopThroughDates() { 36 | val format = DateTimeFormatter.ofPattern("yyyy-MM-dd") 37 | val fromDate = LocalDate.parse("2023-01-18", format) 38 | val toDate = LocalDate.parse("2023-02-17", format) 39 | fromDate.datesUntil(toDate).forEach { 40 | println("Scraping day: $it") 41 | scrape(it.toString()) 42 | Thread.sleep(5000L) 43 | } 44 | output() 45 | exitProcess(0) 46 | } 47 | 48 | private fun scrape(date : String) { 49 | val client = CoroutinesHttpClient() 50 | val response = mutableMapOf?>() 51 | runBlocking { 52 | val result = async(Dispatchers.IO) { 53 | val data = client.fetch(endpoint + date, null) 54 | data 55 | } 56 | response["json"] = result.await() 57 | } 58 | parseJson(response) 59 | } 60 | 61 | private fun parseJson(response : MutableMap?>) { 62 | val mapper = ObjectMapper() 63 | val json = response.getValue("json")?.body() ?: return 64 | var proxyList : List? = null 65 | try { 66 | proxyList = mapper.readValue>(json) 67 | } catch (ex : JsonMappingException) { 68 | println(ex.message) 69 | } 70 | if(proxyList == null) { 71 | return 72 | } 73 | for(proxy in proxyList) { 74 | if(proxy.host.isEmpty() || !proxy.host.contains(":")) { 75 | continue 76 | } 77 | val ip = proxy.host.split(":")[0] 78 | val port = proxy.host.split(":")[1].toInt() 79 | val protocol : String = when (proxy.protocolAsInt) { 80 | 1 -> { "HTTP" } 81 | 2 -> { "HTTPS" } 82 | 4 -> { "SOCKS5" } 83 | else -> { null } 84 | } ?: continue 85 | proxies.add(ProxyData(ip, port, protocol)) 86 | } 87 | } 88 | private fun output() { 89 | val mapper = ObjectMapper() 90 | 91 | val proxyList = ProxyOutputData(mutableListOf(), mutableListOf(), mutableListOf(), mutableListOf()) 92 | 93 | val distinctProxies = proxies.distinctBy { it.values() }.toMutableList() 94 | for(proxy in distinctProxies) { 95 | when (proxy.protocol) { 96 | "HTTP" -> { proxyList.http.add(proxy.ip + ":" + proxy.port) } 97 | "HTTPS" -> { proxyList.https.add(proxy.ip + ":" + proxy.port) } 98 | "SOCKS4" -> { proxyList.socks4.add(proxy.ip + ":" + proxy.port) } 99 | "SOCKS5" -> { proxyList.socks5.add(proxy.ip + ":" + proxy.port) } 100 | } 101 | } 102 | 103 | val allProxies = mutableListOf() 104 | allProxies.addAll(proxyList.http) 105 | allProxies.addAll(proxyList.https) 106 | allProxies.addAll(proxyList.socks4) 107 | allProxies.addAll(proxyList.socks5) 108 | 109 | val outputDir = Path.of("proxies/") 110 | if(!outputDir.exists()) { 111 | Files.createDirectory(outputDir) 112 | } 113 | val entries = mapOf( 114 | proxyList.http to Path.of(outputDir.toUri().toString(), "http.txt"), 115 | proxyList.https to Path.of(outputDir.toUri().toString(), "https.txt"), 116 | proxyList.socks4 to Path.of(outputDir.toUri().toString(), "socks4.txt"), 117 | proxyList.socks5 to Path.of(outputDir.toUri().toString(), "socks5.txt"), 118 | allProxies to Path.of(outputDir.toUri().toString(), "proxies.txt"), 119 | ) 120 | entries.iterator().forEach { entry -> 121 | Files.write(entry.value, entry.key) 122 | } 123 | val jsonFile = Path.of(outputDir.toUri().toString(), "proxies.json").toFile() 124 | mapper.writeValue(jsonFile, proxyList) 125 | } 126 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} MDC=%X{user} - %msg%n 5 | 6 | 7 | 8 | 9 | 10 | 11 | --------------------------------------------------------------------------------