├── .gitignore ├── .idea ├── compiler.xml ├── copyright │ └── profiles_settings.xml ├── gradle.xml ├── misc.xml ├── modules.xml └── runConfigurations.xml ├── README.md ├── app ├── .gitignore ├── build.gradle ├── proguard-rules.pro └── src │ └── main │ ├── AndroidManifest.xml │ ├── java │ └── net │ │ └── vrgsoft │ │ └── rxurlparser_android │ │ └── MainActivity.java │ └── res │ ├── layout │ └── activity_main.xml │ ├── mipmap-hdpi │ ├── ic_launcher.png │ └── ic_launcher_round.png │ ├── mipmap-mdpi │ ├── ic_launcher.png │ └── ic_launcher_round.png │ ├── mipmap-xhdpi │ ├── ic_launcher.png │ └── ic_launcher_round.png │ ├── mipmap-xxhdpi │ ├── ic_launcher.png │ └── ic_launcher_round.png │ ├── mipmap-xxxhdpi │ ├── ic_launcher.png │ └── ic_launcher_round.png │ └── values │ ├── colors.xml │ ├── dimens.xml │ ├── strings.xml │ └── styles.xml ├── build.gradle ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── image.png ├── library ├── .gitignore ├── build.gradle ├── proguard-rules.pro └── src │ └── main │ ├── AndroidManifest.xml │ ├── java │ └── net │ │ └── vrgsoft │ │ └── library │ │ ├── LinkCrawler.java │ │ ├── OnPreloadCallback.java │ │ ├── ParseContent.java │ │ ├── Regex.java │ │ ├── Result.java │ │ └── SearchUrls.java │ └── res │ └── values │ └── strings.xml └── settings.gradle /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .gradle 3 | /local.properties 4 | /.idea/workspace.xml 5 | /.idea/libraries 6 | .DS_Store 7 | /build 8 | /captures 9 | .externalNativeBuild 10 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/gradle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 19 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | Abstraction issuesJava 39 | 40 | 41 | Android 42 | 43 | 44 | Android > Lint > Accessibility 45 | 46 | 47 | Android > Lint > Correctness 48 | 49 | 50 | Android > Lint > Correctness > Chrome OS 51 | 52 | 53 | Android > Lint > Correctness > Messages 54 | 55 | 56 | Android > Lint > Internationalization 57 | 58 | 59 | Android > Lint > Internationalization > Bidirectional Text 60 | 61 | 62 | Android > Lint > Lint 63 | 64 | 65 | Android > Lint > Performance 66 | 67 | 68 | Android > Lint > Security 69 | 70 | 71 | Android > Lint > Usability 72 | 73 | 74 | Android > Lint > Usability > Icons 75 | 76 | 77 | Android > Lint > Usability > Typography 78 | 79 | 80 | Annotations verifyingGroovy 81 | 82 | 83 | Assignment issuesGroovy 84 | 85 | 86 | Assignment issuesJava 87 | 88 | 89 | Bitwise operation issuesJava 90 | 91 | 92 | C/C++ 93 | 94 | 95 | Class metricsJava 96 | 97 | 98 | Class structureJava 99 | 100 | 101 | Cloning issuesJava 102 | 103 | 104 | Code maturity issuesJava 105 | 106 | 107 | Code style issuesJava 108 | 109 | 110 | Compiler issuesJava 111 | 112 | 113 | Concurrency annotation issuesJava 114 | 115 | 116 | Control FlowGroovy 117 | 118 | 119 | Control flow issuesJava 120 | 121 | 122 | CorrectnessLintAndroid 123 | 124 | 125 | Data flow analysisC/C++ 126 | 127 | 128 | Data flow issuesGroovy 129 | 130 | 131 | Data flow issuesJava 132 | 133 | 134 | Declaration orderC/C++ 135 | 136 | 137 | Declaration redundancyGroovy 138 | 139 | 140 | Declaration redundancyJava 141 | 142 | 143 | DeclarationGroovy 144 | 145 | 146 | Dependency issuesJava 147 | 148 | 149 | Encapsulation issuesJava 150 | 151 | 152 | Error handlingGroovy 153 | 154 | 155 | Error handlingJava 156 | 157 | 158 | Finalization issuesJava 159 | 160 | 161 | FunctionsC/C++ 162 | 163 | 164 | GPath inspectionsGroovy 165 | 166 | 167 | General 168 | 169 | 170 | GeneralC/C++ 171 | 172 | 173 | GeneralJava 174 | 175 | 176 | Google Cloud Endpoints 177 | 178 | 179 | Gradle 180 | 181 | 182 | Groovy 183 | 184 | 185 | HTML 186 | 187 | 188 | ImportsJava 189 | 190 | 191 | Inheritance issuesJava 192 | 193 | 194 | Initialization issuesJava 195 | 196 | 197 | Internationalization issues 198 | 199 | 200 | Internationalization issuesJava 201 | 202 | 203 | J2ME issuesJava 204 | 205 | 206 | JSON 207 | 208 | 209 | JUnit issuesJava 210 | 211 | 212 | Java 213 | 214 | 215 | Java language level issuesJava 216 | 217 | 218 | Java language level migration aidsJava 219 | 220 | 221 | JavaBeans issuesJava 222 | 223 | 224 | Javadoc issuesJava 225 | 226 | 227 | Language Injection 228 | 229 | 230 | LintAndroid 231 | 232 | 233 | Logging issuesJava 234 | 235 | 236 | Manifest 237 | 238 | 239 | Memory issuesJava 240 | 241 | 242 | Method MetricsGroovy 243 | 244 | 245 | Method metricsJava 246 | 247 | 248 | Modularization issuesJava 249 | 250 | 251 | Naming ConventionsGroovy 252 | 253 | 254 | Naming conventionsJava 255 | 256 | 257 | Numeric issuesJava 258 | 259 | 260 | OtherGroovy 261 | 262 | 263 | Packaging issuesJava 264 | 265 | 266 | Pattern Validation 267 | 268 | 269 | Performance issuesJava 270 | 271 | 272 | Portability issuesJava 273 | 274 | 275 | Potentially confusing code constructsGroovy 276 | 277 | 278 | Probable bugsGradle 279 | 280 | 281 | Probable bugsGroovy 282 | 283 | 284 | Probable bugsJava 285 | 286 | 287 | Properties Files 288 | 289 | 290 | Properties FilesJava 291 | 292 | 293 | RELAX NG 294 | 295 | 296 | Resource management issuesJava 297 | 298 | 299 | Security issuesJava 300 | 301 | 302 | Serialization issuesJava 303 | 304 | 305 | Spelling 306 | 307 | 308 | StyleGroovy 309 | 310 | 311 | TestNGJava 312 | 313 | 314 | Threading issuesGroovy 315 | 316 | 317 | Threading issuesJava 318 | 319 | 320 | Type checksC/C++ 321 | 322 | 323 | Unused codeC/C++ 324 | 325 | 326 | Validity issuesGroovy 327 | 328 | 329 | Verbose or redundant code constructsJava 330 | 331 | 332 | Visibility issuesJava 333 | 334 | 335 | XML 336 | 337 | 338 | toString() issuesJava 339 | 340 | 341 | 342 | 343 | Since15 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 378 | 379 | 380 | 381 | 382 | 383 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/runConfigurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Java Link Parser 2 | ### Here is our implimentation of Link Preview written in Java with RxJava2 3 | ![](https://github.com/VRGsoftUA/Java-Link-Parser/blob/master/image.png) 4 | ## Usage 5 | 1. Include the library as local library project. 6 | ```gradle 7 | allprojects { 8 | repositories { 9 | maven { url 'https://jitpack.io' } 10 | } 11 | } 12 | 13 | dependencies { 14 | 15 | compile 'com.github.VRGsoftUA:Java-Link-Parser:1.0.0' 16 | 17 | } 18 | ``` 19 | Initialize LinkCrawler 20 | ```java 21 | LinkCrawler crawler = new LinkCrawler(); 22 | ``` 23 | If you need to do somthing before parsing url, you can implement PreloadCallback 24 | ```java 25 | public class MainActivity extends AppCompatActivity implements OnPreloadCallback 26 | 27 | crawler.setPreloadCallback(this); 28 | ``` 29 | To start parsing you need to use crawler.parseUrl and pass desired url, it returs ```Flowable``` 30 | ```java 31 | crawler.parseUrl("https://github.com/") 32 | .subscribe(new Consumer() { 33 | @Override 34 | public void accept(@NonNull Result result) throws Exception { 35 | mainBinding.setContent(result.getmParseContent()); 36 | } 37 | }); 38 | ``` 39 | Result object contains ParseContent field wich contains all parsed data of passed url, such as title,description etc. 40 | #### [Kotlin version](https://github.com/VRGsoftUA/Kotlin-Link-Parser) 41 | License 42 | ================================= 43 | 44 | Copyright 2016 VRG Soft 45 | 46 | Licensed under the Apache License, Version 2.0 (the "License"); 47 | you may not use this file except in compliance with the License. 48 | You may obtain a copy of the License at 49 | 50 | http://www.apache.org/licenses/LICENSE-2.0 51 | 52 | Unless required by applicable law or agreed to in writing, software 53 | distributed under the License is distributed on an "AS IS" BASIS, 54 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 55 | See the License for the specific language governing permissions and 56 | limitations under the License. 57 | -------------------------------------------------------------------------------- /app/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /app/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.application' 2 | 3 | android { 4 | compileSdkVersion 25 5 | buildToolsVersion "25.0.3" 6 | defaultConfig { 7 | applicationId "net.vrgsoft.rxurlparser_android" 8 | minSdkVersion 16 9 | targetSdkVersion 25 10 | versionCode 1 11 | versionName "1.0" 12 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" 13 | } 14 | dataBinding 15 | { 16 | enabled = true 17 | } 18 | buildTypes { 19 | release { 20 | minifyEnabled false 21 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' 22 | } 23 | } 24 | } 25 | 26 | dependencies { 27 | compile fileTree(dir: 'libs', include: ['*.jar']) 28 | 29 | compile 'com.android.support:appcompat-v7:25.3.1' 30 | compile 'com.android.support.constraint:constraint-layout:1.0.2' 31 | compile project(path: ':library') 32 | } 33 | -------------------------------------------------------------------------------- /app/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # By default, the flags in this file are appended to flags specified 3 | # in F:\AndroidSDK/tools/proguard/proguard-android.txt 4 | # You can edit the include path and order by changing the proguardFiles 5 | # directive in build.gradle. 6 | # 7 | # For more details, see 8 | # http://developer.android.com/guide/developing/tools/proguard.html 9 | 10 | # Add any project specific keep options here: 11 | 12 | # If your project uses WebView with JS, uncomment the following 13 | # and specify the fully qualified class name to the JavaScript interface 14 | # class: 15 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 16 | # public *; 17 | #} 18 | 19 | # Uncomment this to preserve the line number information for 20 | # debugging stack traces. 21 | #-keepattributes SourceFile,LineNumberTable 22 | 23 | # If you keep the line number information, uncomment this to 24 | # hide the original source file name. 25 | #-renamesourcefileattribute SourceFile 26 | -------------------------------------------------------------------------------- /app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /app/src/main/java/net/vrgsoft/rxurlparser_android/MainActivity.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.rxurlparser_android; 2 | 3 | import android.databinding.DataBindingUtil; 4 | import android.support.v7.app.AppCompatActivity; 5 | import android.os.Bundle; 6 | 7 | import net.vrgsoft.library.LinkCrawler; 8 | import net.vrgsoft.library.Result; 9 | import net.vrgsoft.rxurlparser_android.databinding.ActivityMainBinding; 10 | 11 | import io.reactivex.annotations.NonNull; 12 | import io.reactivex.functions.Consumer; 13 | 14 | public class MainActivity extends AppCompatActivity { 15 | private ActivityMainBinding mainBinding; 16 | 17 | @Override 18 | protected void onCreate(Bundle savedInstanceState) { 19 | super.onCreate(savedInstanceState); 20 | mainBinding = DataBindingUtil.setContentView(this, R.layout.activity_main); 21 | LinkCrawler crawler = new LinkCrawler(); 22 | crawler.parseUrl("https://github.com/") 23 | .subscribe(new Consumer() { 24 | @Override 25 | public void accept(@NonNull Result result) throws Exception { 26 | mainBinding.setContent(result.getmParseContent()); 27 | } 28 | }); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /app/src/main/res/layout/activity_main.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 11 | 12 | 13 | 17 | 18 | 31 | 32 | 45 | 46 | 59 | 60 | -------------------------------------------------------------------------------- /app/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-hdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-hdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-mdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-mdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png -------------------------------------------------------------------------------- /app/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #3F51B5 4 | #303F9F 5 | #FF4081 6 | 7 | -------------------------------------------------------------------------------- /app/src/main/res/values/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 60dp 4 | 8dp 5 | -------------------------------------------------------------------------------- /app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | RxUrlParser-Android 3 | 4 | -------------------------------------------------------------------------------- /app/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | // Top-level build file where you can add configuration options common to all sub-projects/modules. 2 | 3 | buildscript { 4 | repositories { 5 | jcenter() 6 | } 7 | dependencies { 8 | classpath 'com.android.tools.build:gradle:2.3.2' 9 | 10 | // NOTE: Do not place your application dependencies here; they belong 11 | // in the individual module build.gradle files 12 | } 13 | } 14 | 15 | allprojects { 16 | repositories { 17 | jcenter() 18 | } 19 | } 20 | 21 | task clean(type: Delete) { 22 | delete rootProject.buildDir 23 | } 24 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | 3 | # IDE (e.g. Android Studio) users: 4 | # Gradle settings configured through the IDE *will override* 5 | # any settings specified in this file. 6 | 7 | # For more details on how to configure your build environment visit 8 | # http://www.gradle.org/docs/current/userguide/build_environment.html 9 | 10 | # Specifies the JVM arguments used for the daemon process. 11 | # The setting is particularly useful for tweaking memory settings. 12 | org.gradle.jvmargs=-Xmx1536m 13 | 14 | # When configured, Gradle will run in incubating parallel mode. 15 | # This option should only be used with decoupled projects. More details, visit 16 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 17 | # org.gradle.parallel=true 18 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Jun 12 12:16:12 EEST 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-3.3-all.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 10 | DEFAULT_JVM_OPTS="" 11 | 12 | APP_NAME="Gradle" 13 | APP_BASE_NAME=`basename "$0"` 14 | 15 | # Use the maximum available, or set MAX_FD != -1 to use that value. 16 | MAX_FD="maximum" 17 | 18 | warn ( ) { 19 | echo "$*" 20 | } 21 | 22 | die ( ) { 23 | echo 24 | echo "$*" 25 | echo 26 | exit 1 27 | } 28 | 29 | # OS specific support (must be 'true' or 'false'). 30 | cygwin=false 31 | msys=false 32 | darwin=false 33 | case "`uname`" in 34 | CYGWIN* ) 35 | cygwin=true 36 | ;; 37 | Darwin* ) 38 | darwin=true 39 | ;; 40 | MINGW* ) 41 | msys=true 42 | ;; 43 | esac 44 | 45 | # Attempt to set APP_HOME 46 | # Resolve links: $0 may be a link 47 | PRG="$0" 48 | # Need this for relative symlinks. 49 | while [ -h "$PRG" ] ; do 50 | ls=`ls -ld "$PRG"` 51 | link=`expr "$ls" : '.*-> \(.*\)$'` 52 | if expr "$link" : '/.*' > /dev/null; then 53 | PRG="$link" 54 | else 55 | PRG=`dirname "$PRG"`"/$link" 56 | fi 57 | done 58 | SAVED="`pwd`" 59 | cd "`dirname \"$PRG\"`/" >/dev/null 60 | APP_HOME="`pwd -P`" 61 | cd "$SAVED" >/dev/null 62 | 63 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 64 | 65 | # Determine the Java command to use to start the JVM. 66 | if [ -n "$JAVA_HOME" ] ; then 67 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 68 | # IBM's JDK on AIX uses strange locations for the executables 69 | JAVACMD="$JAVA_HOME/jre/sh/java" 70 | else 71 | JAVACMD="$JAVA_HOME/bin/java" 72 | fi 73 | if [ ! -x "$JAVACMD" ] ; then 74 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 75 | 76 | Please set the JAVA_HOME variable in your environment to match the 77 | location of your Java installation." 78 | fi 79 | else 80 | JAVACMD="java" 81 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 82 | 83 | Please set the JAVA_HOME variable in your environment to match the 84 | location of your Java installation." 85 | fi 86 | 87 | # Increase the maximum file descriptors if we can. 88 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then 89 | MAX_FD_LIMIT=`ulimit -H -n` 90 | if [ $? -eq 0 ] ; then 91 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 92 | MAX_FD="$MAX_FD_LIMIT" 93 | fi 94 | ulimit -n $MAX_FD 95 | if [ $? -ne 0 ] ; then 96 | warn "Could not set maximum file descriptor limit: $MAX_FD" 97 | fi 98 | else 99 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 100 | fi 101 | fi 102 | 103 | # For Darwin, add options to specify how the application appears in the dock 104 | if $darwin; then 105 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 106 | fi 107 | 108 | # For Cygwin, switch paths to Windows format before running java 109 | if $cygwin ; then 110 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 111 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 112 | JAVACMD=`cygpath --unix "$JAVACMD"` 113 | 114 | # We build the pattern for arguments to be converted via cygpath 115 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 116 | SEP="" 117 | for dir in $ROOTDIRSRAW ; do 118 | ROOTDIRS="$ROOTDIRS$SEP$dir" 119 | SEP="|" 120 | done 121 | OURCYGPATTERN="(^($ROOTDIRS))" 122 | # Add a user-defined pattern to the cygpath arguments 123 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 124 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 125 | fi 126 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 127 | i=0 128 | for arg in "$@" ; do 129 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 130 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 131 | 132 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 133 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 134 | else 135 | eval `echo args$i`="\"$arg\"" 136 | fi 137 | i=$((i+1)) 138 | done 139 | case $i in 140 | (0) set -- ;; 141 | (1) set -- "$args0" ;; 142 | (2) set -- "$args0" "$args1" ;; 143 | (3) set -- "$args0" "$args1" "$args2" ;; 144 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 145 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 146 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 147 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 148 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 149 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 150 | esac 151 | fi 152 | 153 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules 154 | function splitJvmOpts() { 155 | JVM_OPTS=("$@") 156 | } 157 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS 158 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" 159 | 160 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" 161 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VRGsoftUA/Java-Link-Parser/02a03c4ee125105451ff746c96e697e17c6ac312/image.png -------------------------------------------------------------------------------- /library/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /library/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.library' 2 | 3 | android { 4 | compileSdkVersion 25 5 | buildToolsVersion "25.0.3" 6 | 7 | defaultConfig { 8 | minSdkVersion 16 9 | targetSdkVersion 25 10 | versionCode 1 11 | versionName "1.0" 12 | 13 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" 14 | 15 | } 16 | buildTypes { 17 | release { 18 | minifyEnabled false 19 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' 20 | } 21 | } 22 | } 23 | 24 | dependencies { 25 | compile fileTree(dir: 'libs', include: ['*.jar']) 26 | compile "io.reactivex.rxjava2:rxjava:2.1.0" 27 | compile 'org.jsoup:jsoup:1.10.3' 28 | } 29 | -------------------------------------------------------------------------------- /library/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # By default, the flags in this file are appended to flags specified 3 | # in F:\AndroidSDK/tools/proguard/proguard-android.txt 4 | # You can edit the include path and order by changing the proguardFiles 5 | # directive in build.gradle. 6 | # 7 | # For more details, see 8 | # http://developer.android.com/guide/developing/tools/proguard.html 9 | 10 | # Add any project specific keep options here: 11 | 12 | # If your project uses WebView with JS, uncomment the following 13 | # and specify the fully qualified class name to the JavaScript interface 14 | # class: 15 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 16 | # public *; 17 | #} 18 | 19 | # Uncomment this to preserve the line number information for 20 | # debugging stack traces. 21 | #-keepattributes SourceFile,LineNumberTable 22 | 23 | # If you keep the line number information, uncomment this to 24 | # hide the original source file name. 25 | #-renamesourcefileattribute SourceFile 26 | -------------------------------------------------------------------------------- /library/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/LinkCrawler.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | import java.io.IOException; 9 | import java.net.MalformedURLException; 10 | import java.net.URL; 11 | import java.net.URLConnection; 12 | import java.util.ArrayList; 13 | import java.util.HashMap; 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.concurrent.Callable; 17 | 18 | import io.reactivex.Flowable; 19 | import io.reactivex.Single; 20 | import io.reactivex.annotations.NonNull; 21 | import io.reactivex.functions.Consumer; 22 | import io.reactivex.processors.PublishProcessor; 23 | import io.reactivex.schedulers.Schedulers; 24 | 25 | public class LinkCrawler { 26 | 27 | private final String HTTP_PROTOCOL = "http://"; 28 | private final String HTTPS_PROTOCOL = "https://"; 29 | 30 | private OnPreloadCallback callback; 31 | private Map mCache = new HashMap<>(); 32 | private PublishProcessor mProcessor = PublishProcessor.create(); 33 | 34 | public void setPreloadCallback(OnPreloadCallback callback) { 35 | this.callback = callback; 36 | } 37 | 38 | public Flowable parseUrl(String url) { 39 | initUrl(url); 40 | return mProcessor; 41 | } 42 | 43 | private void initUrl(final String url) { 44 | if (callback != null) { 45 | callback.onPre(); 46 | } 47 | if (mCache.containsKey(url)) { 48 | mProcessor.onNext(new Result(mCache.get(url), isNull(mCache.get(url)), url)); 49 | } else { 50 | getCode(url) 51 | .subscribeOn(Schedulers.io()) 52 | .subscribe(new Consumer() { 53 | @Override 54 | public void accept(@NonNull ParseContent parseContent) throws Exception { 55 | mCache.put(url, parseContent); 56 | mProcessor.onNext(new Result(parseContent, isNull(parseContent), url)); 57 | } 58 | }, new Consumer() { 59 | @Override 60 | public void accept(@NonNull Throwable throwable) throws Exception { 61 | throwable.printStackTrace(); 62 | } 63 | }); 64 | } 65 | } 66 | 67 | /** 68 | * Get html code 69 | */ 70 | private Single getCode(final String url) { 71 | 72 | final ParseContent sourceContent = new ParseContent(); 73 | 74 | return Single.fromCallable(new Callable() { 75 | @Override 76 | public ParseContent call() throws Exception { 77 | ArrayList urls; 78 | urls = SearchUrls.matches(url); 79 | 80 | if (urls.size() > 0) 81 | sourceContent 82 | .setFinalUrl(unshortenUrl(extendedTrim(urls.get(0)))); 83 | else 84 | sourceContent.setFinalUrl(""); 85 | 86 | if (!sourceContent.getFinalUrl().equals("")) { 87 | if (isImage(sourceContent.getFinalUrl()) 88 | && !sourceContent.getFinalUrl().contains("dropbox")) { 89 | sourceContent.setSuccess(true); 90 | 91 | sourceContent.getImages().add(sourceContent.getFinalUrl()); 92 | 93 | sourceContent.setTitle(""); 94 | sourceContent.setDescription(""); 95 | 96 | } else { 97 | try { 98 | Document doc = Jsoup 99 | .connect(sourceContent.getFinalUrl()) 100 | .userAgent("Mozilla").get(); 101 | 102 | sourceContent.setHtmlCode(extendedTrim(doc.toString())); 103 | 104 | HashMap metaTags = getMetaTags(sourceContent 105 | .getHtmlCode()); 106 | 107 | sourceContent.setMetaTags(metaTags); 108 | 109 | sourceContent.setTitle(metaTags.get("title")); 110 | sourceContent.setDescription(metaTags 111 | .get("description")); 112 | 113 | if (sourceContent.getTitle().equals("")) { 114 | String matchTitle = Regex.match( 115 | sourceContent.getHtmlCode(), 116 | Regex.TITLE_PATTERN, 2); 117 | 118 | if (!matchTitle.equals("")) 119 | sourceContent.setTitle(htmlDecode(matchTitle)); 120 | } 121 | 122 | if (sourceContent.getDescription().equals("")) 123 | sourceContent 124 | .setDescription(crawlCode(sourceContent 125 | .getHtmlCode())); 126 | 127 | sourceContent.setDescription(sourceContent 128 | .getDescription().replaceAll( 129 | Regex.SCRIPT_PATTERN, "")); 130 | 131 | if (!metaTags.get("image").equals("")) 132 | sourceContent.getImages().add( 133 | metaTags.get("image")); 134 | else { 135 | sourceContent.setImages(getImages(doc)); 136 | } 137 | 138 | sourceContent.setSuccess(true); 139 | } catch (Exception e) { 140 | sourceContent.setSuccess(false); 141 | } 142 | } 143 | } 144 | 145 | String[] finalLinkSet = sourceContent.getFinalUrl().split("&"); 146 | sourceContent.setUrl(finalLinkSet[0]); 147 | 148 | sourceContent.setCannonicalUrl(cannonicalPage(sourceContent 149 | .getFinalUrl())); 150 | sourceContent.setDescription(stripTags(sourceContent 151 | .getDescription())); 152 | return sourceContent; 153 | } 154 | }); 155 | // Don't forget the http:// or https:// 156 | 157 | 158 | } 159 | 160 | private boolean isNull(ParseContent parseContent) { 161 | return !parseContent.isSuccess() && 162 | extendedTrim(parseContent.getHtmlCode()).equals("") && 163 | !isImage(parseContent.getFinalUrl()); 164 | } 165 | 166 | /** 167 | * Gets content from a html tag 168 | */ 169 | 170 | private String getTagContent(String tag, String content) { 171 | 172 | String pattern = "<" + tag + "(.*?)>(.*?)"; 173 | String result = "", currentMatch = ""; 174 | 175 | List matches = Regex.matchAll(content, pattern, 2); 176 | 177 | int matchesSize = matches.size(); 178 | for (int i = 0; i < matchesSize; i++) { 179 | currentMatch = stripTags(matches.get(i)); 180 | if (currentMatch.length() >= 120) { 181 | result = extendedTrim(currentMatch); 182 | break; 183 | } 184 | } 185 | 186 | if (result.equals("")) { 187 | String matchFinal = Regex.match(content, pattern, 2); 188 | result = extendedTrim(matchFinal); 189 | } 190 | 191 | result = result.replaceAll(" ", ""); 192 | 193 | return htmlDecode(result); 194 | } 195 | 196 | /** 197 | * Gets images from the html code 198 | */ 199 | private List getImages(Document document) { 200 | List matches = new ArrayList(); 201 | 202 | Elements media = document.select("[src]"); 203 | 204 | for (Element srcElement : media) { 205 | if (srcElement.tagName().equals("img")) { 206 | matches.add(srcElement.attr("abs:src")); 207 | } 208 | } 209 | 210 | return matches; 211 | } 212 | 213 | /** 214 | * Transforms from html to normal string 215 | */ 216 | private String htmlDecode(String content) { 217 | return Jsoup.parse(content).text(); 218 | } 219 | 220 | /** 221 | * Crawls the code looking for relevant information 222 | */ 223 | private String crawlCode(String content) { 224 | String result = ""; 225 | String resultSpan = ""; 226 | String resultParagraph = ""; 227 | String resultDiv = ""; 228 | 229 | resultSpan = getTagContent("span", content); 230 | resultParagraph = getTagContent("p", content); 231 | resultDiv = getTagContent("div", content); 232 | 233 | result = resultSpan; 234 | 235 | if (resultParagraph.length() > resultSpan.length() 236 | && resultParagraph.length() >= resultDiv.length()) 237 | result = resultParagraph; 238 | else if (resultParagraph.length() > resultSpan.length() 239 | && resultParagraph.length() < resultDiv.length()) 240 | result = resultDiv; 241 | else 242 | result = resultParagraph; 243 | 244 | return htmlDecode(result); 245 | } 246 | 247 | /** 248 | * Returns the cannoncial url 249 | */ 250 | private String cannonicalPage(String url) { 251 | 252 | String cannonical = ""; 253 | if (url.startsWith(HTTP_PROTOCOL)) { 254 | url = url.substring(HTTP_PROTOCOL.length()); 255 | } else if (url.startsWith(HTTPS_PROTOCOL)) { 256 | url = url.substring(HTTPS_PROTOCOL.length()); 257 | } 258 | 259 | int urlLength = url.length(); 260 | for (int i = 0; i < urlLength; i++) { 261 | if (url.charAt(i) != '/') 262 | cannonical += url.charAt(i); 263 | else 264 | break; 265 | } 266 | 267 | return cannonical; 268 | 269 | } 270 | 271 | /** 272 | * Strips the tags from an element 273 | */ 274 | private String stripTags(String content) { 275 | return Jsoup.parse(content).text(); 276 | } 277 | 278 | /** 279 | * Verifies if the url is an image 280 | */ 281 | private boolean isImage(String url) { 282 | return url.matches(Regex.IMAGE_PATTERN); 283 | } 284 | 285 | /** 286 | * Returns meta tags from html code 287 | */ 288 | private HashMap getMetaTags(String content) { 289 | 290 | HashMap metaTags = new HashMap(); 291 | metaTags.put("url", ""); 292 | metaTags.put("title", ""); 293 | metaTags.put("description", ""); 294 | metaTags.put("image", ""); 295 | 296 | List matches = Regex.matchAll(content, 297 | Regex.METATAG_PATTERN, 1); 298 | 299 | for (String match : matches) { 300 | final String lowerCase = match.toLowerCase(); 301 | if (lowerCase.contains("property=\"og:url\"") 302 | || lowerCase.contains("property='og:url'") 303 | || lowerCase.contains("name=\"url\"") 304 | || lowerCase.contains("name='url'")) 305 | updateMetaTag(metaTags, "url", separeMetaTagsContent(match)); 306 | else if (lowerCase.contains("property=\"og:title\"") 307 | || lowerCase.contains("property='og:title'") 308 | || lowerCase.contains("name=\"title\"") 309 | || lowerCase.contains("name='title'")) 310 | updateMetaTag(metaTags, "title", separeMetaTagsContent(match)); 311 | else if (lowerCase 312 | .contains("property=\"og:description\"") 313 | || lowerCase 314 | .contains("property='og:description'") 315 | || lowerCase.contains("name=\"description\"") 316 | || lowerCase.contains("name='description'")) 317 | updateMetaTag(metaTags, "description", separeMetaTagsContent(match)); 318 | else if (lowerCase.contains("property=\"og:image\"") 319 | || lowerCase.contains("property='og:image'") 320 | || lowerCase.contains("name=\"image\"") 321 | || lowerCase.contains("name='image'")) 322 | updateMetaTag(metaTags, "image", separeMetaTagsContent(match)); 323 | } 324 | 325 | return metaTags; 326 | } 327 | 328 | private void updateMetaTag(HashMap metaTags, String url, String value) { 329 | if (value != null && (value.length() > 0)) { 330 | metaTags.put(url, value); 331 | } 332 | } 333 | 334 | /** 335 | * Gets content from metatag 336 | */ 337 | private String separeMetaTagsContent(String content) { 338 | String result = Regex.match(content, Regex.METATAG_CONTENT_PATTERN, 339 | 1); 340 | return htmlDecode(result); 341 | } 342 | 343 | /** 344 | * Unshortens a short url 345 | */ 346 | private String unshortenUrl(String shortURL) { 347 | if (!shortURL.startsWith(HTTP_PROTOCOL) 348 | && !shortURL.startsWith(HTTPS_PROTOCOL)) 349 | return ""; 350 | 351 | URLConnection urlConn = connectURL(shortURL); 352 | urlConn.getHeaderFields(); 353 | 354 | String finalResult = urlConn.getURL().toString(); 355 | 356 | urlConn = connectURL(finalResult); 357 | urlConn.getHeaderFields(); 358 | 359 | shortURL = urlConn.getURL().toString(); 360 | 361 | while (!shortURL.equals(finalResult)) { 362 | finalResult = unshortenUrl(finalResult); 363 | } 364 | 365 | return finalResult; 366 | } 367 | 368 | /** 369 | * Takes a valid url and return a URL object representing the url address. 370 | */ 371 | private URLConnection connectURL(String strURL) { 372 | URLConnection conn = null; 373 | try { 374 | URL inputURL = new URL(strURL); 375 | conn = inputURL.openConnection(); 376 | } catch (MalformedURLException e) { 377 | System.out.println("Please input a valid URL"); 378 | } catch (IOException ioe) { 379 | System.out.println("Can not connect to the URL"); 380 | } 381 | return conn; 382 | } 383 | 384 | /** 385 | * Removes extra spaces and trim the string 386 | */ 387 | static String extendedTrim(String content) { 388 | return content.replaceAll("\\s+", " ").replace("\n", " ") 389 | .replace("\r", " ").trim(); 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/OnPreloadCallback.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | public interface OnPreloadCallback { 4 | void onPre(); 5 | } 6 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/ParseContent.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | 7 | public class ParseContent { 8 | private boolean success = false; 9 | private String htmlCode = ""; 10 | private String raw = ""; 11 | private String title = ""; 12 | private String description = ""; 13 | private String url = ""; 14 | private String finalUrl = ""; 15 | private String cannonicalUrl = ""; 16 | private HashMap metaTags = new HashMap(); 17 | 18 | private List images = new ArrayList(); 19 | private List urlData = new ArrayList<>(); 20 | 21 | public boolean isSuccess() { 22 | return success; 23 | } 24 | 25 | public void setSuccess(boolean success) { 26 | this.success = success; 27 | } 28 | 29 | public String getHtmlCode() { 30 | return htmlCode; 31 | } 32 | 33 | public void setHtmlCode(String htmlCode) { 34 | this.htmlCode = htmlCode; 35 | } 36 | 37 | public String getRaw() { 38 | return raw; 39 | } 40 | 41 | public void setRaw(String raw) { 42 | this.raw = raw; 43 | } 44 | 45 | public String getTitle() { 46 | return title; 47 | } 48 | 49 | public void setTitle(String title) { 50 | this.title = title; 51 | } 52 | 53 | public String getDescription() { 54 | return description; 55 | } 56 | 57 | public void setDescription(String description) { 58 | this.description = description; 59 | } 60 | 61 | public String getUrl() { 62 | return url; 63 | } 64 | 65 | public void setUrl(String url) { 66 | this.url = url; 67 | } 68 | 69 | public String getFinalUrl() { 70 | return finalUrl; 71 | } 72 | 73 | public void setFinalUrl(String finalUrl) { 74 | this.finalUrl = finalUrl; 75 | } 76 | 77 | public String getCannonicalUrl() { 78 | return cannonicalUrl; 79 | } 80 | 81 | public void setCannonicalUrl(String cannonicalUrl) { 82 | this.cannonicalUrl = cannonicalUrl; 83 | } 84 | 85 | public HashMap getMetaTags() { 86 | return metaTags; 87 | } 88 | 89 | public void setMetaTags(HashMap metaTags) { 90 | this.metaTags = metaTags; 91 | } 92 | 93 | public List getImages() { 94 | return images; 95 | } 96 | 97 | public void setImages(List images) { 98 | this.images = images; 99 | } 100 | 101 | public List getUrlData() { 102 | return urlData; 103 | } 104 | 105 | public void setUrlData(List urlData) { 106 | this.urlData = urlData; 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/Regex.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.regex.Matcher; 6 | import java.util.regex.Pattern; 7 | 8 | public class Regex { 9 | public static final String IMAGE_PATTERN = "(.+?)\\.(jpg|png|gif|bmp)$"; 10 | public static final String IMAGE_TAG_PATTERN = "()?"; 11 | public static final String ICON_TAG_PATTERN = "()?"; 12 | public static final String ICON_REV_TAG_PATTERN = "()?"; 13 | public static final String ITEMPROP_IMAGE_TAG_PATTERN = "()?"; 14 | public static final String ITEMPROP_IMAGE_REV_TAG_PATTERN = "()?"; 15 | public static final String TITLE_PATTERN = "(.*?)"; 16 | public static final String SCRIPT_PATTERN = "(.*?)"; 17 | public static final String METATAG_PATTERN = ""; 18 | public static final String METATAG_CONTENT_PATTERN = "content=\"(.*?)\""; 19 | public static final String URL_PATTERN = "<\\b(https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]>"; 20 | 21 | public static String match(String content, String pattern, int index) { 22 | 23 | String match = ""; 24 | Matcher matcher = Pattern.compile(pattern).matcher(content); 25 | 26 | while (matcher.find()) { 27 | match = matcher.group(index); 28 | break; 29 | } 30 | 31 | return LinkCrawler.extendedTrim(match); 32 | } 33 | 34 | public static List matchAll(String content, String pattern, 35 | int index) { 36 | 37 | List matches = new ArrayList(); 38 | Matcher matcher = Pattern.compile(pattern).matcher(content); 39 | 40 | while (matcher.find()) { 41 | matches.add(LinkCrawler.extendedTrim(matcher.group(index))); 42 | } 43 | 44 | return matches; 45 | } 46 | 47 | 48 | } 49 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/Result.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | public class Result { 4 | private ParseContent mParseContent; 5 | private boolean isNull; 6 | private String mUrl; 7 | 8 | public Result(ParseContent mParseContent, boolean isNull, String mUrl) { 9 | this.mParseContent = mParseContent; 10 | this.isNull = isNull; 11 | this.mUrl = mUrl; 12 | } 13 | 14 | public ParseContent getmParseContent() { 15 | return mParseContent; 16 | } 17 | 18 | public boolean isNull() { 19 | return isNull; 20 | } 21 | 22 | public String getmUrl() { 23 | return mUrl; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /library/src/main/java/net/vrgsoft/library/SearchUrls.java: -------------------------------------------------------------------------------- 1 | package net.vrgsoft.library; 2 | 3 | import java.net.URL; 4 | import java.util.ArrayList; 5 | 6 | public class SearchUrls { 7 | 8 | public static final int ALL = 0; 9 | public static final int FIRST = 1; 10 | 11 | /** It finds urls inside the text and return the matched ones */ 12 | public static ArrayList matches(String text) { 13 | return matches(text, ALL); 14 | } 15 | 16 | /** It finds urls inside the text and return the matched ones */ 17 | public static ArrayList matches(String text, int results) { 18 | 19 | ArrayList urls = new ArrayList<>(); 20 | 21 | String[] splitString = (text.split(" ")); 22 | for (String string : splitString) { 23 | 24 | try { 25 | URL item = new URL(string); 26 | urls.add(item.toString()); 27 | } catch (Exception e) { 28 | } 29 | 30 | if (results == FIRST && urls.size() > 0) 31 | break; 32 | } 33 | 34 | return urls; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /library/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | library 3 | 4 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | include ':app', ':library' 2 | --------------------------------------------------------------------------------