├── app ├── .gitignore ├── src │ ├── main │ │ ├── res │ │ │ ├── values │ │ │ │ ├── strings.xml │ │ │ │ ├── colors.xml │ │ │ │ ├── dimens.xml │ │ │ │ └── styles.xml │ │ │ ├── mipmap-xhdpi │ │ │ │ ├── code.jpg │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-hdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-mdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-xxhdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── mipmap-xxxhdpi │ │ │ │ └── ic_launcher.png │ │ │ ├── values-w820dp │ │ │ │ └── dimens.xml │ │ │ └── layout │ │ │ │ ├── activity_radiumdetail.xml │ │ │ │ ├── adapter_layout.xml │ │ │ │ ├── my_list_item.xml │ │ │ │ └── activity_main.xml │ │ ├── AndroidManifest.xml │ │ └── java │ │ │ └── bruce │ │ │ └── chang │ │ │ └── testandroidcrawler │ │ │ ├── RadiumBean.java │ │ │ ├── RadiumDetailActivity.java │ │ │ ├── MyAdapter.java │ │ │ └── MainActivity.java │ ├── test │ │ └── java │ │ │ └── bruce │ │ │ └── chang │ │ │ └── testandroidcrawler │ │ │ └── ExampleUnitTest.java │ └── androidTest │ │ └── java │ │ └── bruce │ │ └── chang │ │ └── testandroidcrawler │ │ └── ExampleInstrumentedTest.java ├── libs │ └── jsoup-1.10.1.jar ├── proguard-rules.pro └── build.gradle ├── settings.gradle ├── .idea ├── copyright │ └── profiles_settings.xml ├── encodings.xml ├── modules.xml ├── runConfigurations.xml ├── gradle.xml ├── compiler.xml └── misc.xml ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .gitignore ├── gradle.properties ├── gradlew.bat ├── gradlew └── README.md /app/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | include ':app' 2 | -------------------------------------------------------------------------------- /app/src/main/res/values/strings.xml: -------------------------------------------------------------------------------- 1 | 2 | 爬虫 3 | 4 | -------------------------------------------------------------------------------- /app/libs/jsoup-1.10.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/libs/jsoup-1.10.1.jar -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xhdpi/code.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-xhdpi/code.jpg -------------------------------------------------------------------------------- /app/src/main/res/mipmap-hdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-hdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-mdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-mdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-xhdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-xxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /app/src/main/res/mipmap-xxxhdpi/ic_launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/201216323/TestAndroidCrawler/HEAD/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .gradle 3 | /local.properties 4 | /.idea/workspace.xml 5 | /.idea/libraries 6 | .DS_Store 7 | /build 8 | /captures 9 | .externalNativeBuild 10 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /app/src/main/res/values/colors.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | #3F51B5 4 | #303F9F 5 | #FF4081 6 | 7 | -------------------------------------------------------------------------------- /app/src/main/res/values/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16dp 4 | 16dp 5 | 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Dec 28 10:00:20 PST 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.14.1-all.zip 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /app/src/main/res/values-w820dp/dimens.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 64dp 6 | 7 | -------------------------------------------------------------------------------- /app/src/main/res/values/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /app/src/test/java/bruce/chang/testandroidcrawler/ExampleUnitTest.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.*; 6 | 7 | /** 8 | * Example local unit test, which will execute on the development machine (host). 9 | * 10 | * @see Testing documentation 11 | */ 12 | public class ExampleUnitTest { 13 | @Test 14 | public void addition_isCorrect() throws Exception { 15 | assertEquals(4, 2 + 2); 16 | } 17 | } -------------------------------------------------------------------------------- /.idea/runConfigurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | -------------------------------------------------------------------------------- /app/proguard-rules.pro: -------------------------------------------------------------------------------- 1 | # Add project specific ProGuard rules here. 2 | # By default, the flags in this file are appended to flags specified 3 | # in D:\sdk/tools/proguard/proguard-android.txt 4 | # You can edit the include path and order by changing the proguardFiles 5 | # directive in build.gradle. 6 | # 7 | # For more details, see 8 | # http://developer.android.com/guide/developing/tools/proguard.html 9 | 10 | # Add any project specific keep options here: 11 | 12 | # If your project uses WebView with JS, uncomment the following 13 | # and specify the fully qualified class name to the JavaScript interface 14 | # class: 15 | #-keepclassmembers class fqcn.of.javascript.interface.for.webview { 16 | # public *; 17 | #} 18 | -------------------------------------------------------------------------------- /.idea/gradle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 19 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # Project-wide Gradle settings. 2 | 3 | # IDE (e.g. Android Studio) users: 4 | # Gradle settings configured through the IDE *will override* 5 | # any settings specified in this file. 6 | 7 | # For more details on how to configure your build environment visit 8 | # http://www.gradle.org/docs/current/userguide/build_environment.html 9 | 10 | # Specifies the JVM arguments used for the daemon process. 11 | # The setting is particularly useful for tweaking memory settings. 12 | org.gradle.jvmargs=-Xmx1536m 13 | 14 | # When configured, Gradle will run in incubating parallel mode. 15 | # This option should only be used with decoupled projects. More details, visit 16 | # http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects 17 | # org.gradle.parallel=true 18 | -------------------------------------------------------------------------------- /app/src/androidTest/java/bruce/chang/testandroidcrawler/ExampleInstrumentedTest.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import android.content.Context; 4 | import android.support.test.InstrumentationRegistry; 5 | import android.support.test.runner.AndroidJUnit4; 6 | 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | /** 13 | * Instrumentation test, which will execute on an Android device. 14 | * 15 | * @see Testing documentation 16 | */ 17 | @RunWith(AndroidJUnit4.class) 18 | public class ExampleInstrumentedTest { 19 | @Test 20 | public void useAppContext() throws Exception { 21 | // Context of the app under test. 22 | Context appContext = InstrumentationRegistry.getTargetContext(); 23 | 24 | assertEquals("bruce.chang.testandroidcrawler", appContext.getPackageName()); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /app/src/main/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | 8 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /app/src/main/res/layout/activity_radiumdetail.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 17 | 18 | 23 | 24 | 30 | -------------------------------------------------------------------------------- /app/src/main/java/bruce/chang/testandroidcrawler/RadiumBean.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * Created by: BruceChang 7 | * Date on : 2016/12/11. 8 | * Time on: 15:30 9 | * Progect_Name:TestAndroidCrawler 10 | * Source Github: 11 | * Description: 12 | */ 13 | 14 | public class RadiumBean implements Serializable { 15 | private String targetUrl; 16 | private String img; 17 | private String name; 18 | private String address; 19 | 20 | public String getTargetUrl() { 21 | return targetUrl; 22 | } 23 | 24 | public void setTargetUrl(String targetUrl) { 25 | this.targetUrl = targetUrl; 26 | } 27 | 28 | public String getImg() { 29 | return img; 30 | } 31 | 32 | public void setImg(String img) { 33 | this.img = img; 34 | } 35 | 36 | public String getName() { 37 | return name; 38 | } 39 | 40 | public void setName(String name) { 41 | this.name = name; 42 | } 43 | 44 | public String getAddress() { 45 | return address; 46 | } 47 | 48 | public void setAddress(String address) { 49 | this.address = address; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /app/build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'com.android.application' 2 | apply plugin: 'android-apt' 3 | android { 4 | compileSdkVersion 25 5 | buildToolsVersion "25.0.0" 6 | defaultConfig { 7 | applicationId "bruce.chang.testandroidcrawler" 8 | minSdkVersion 15 9 | targetSdkVersion 25 10 | versionCode 1 11 | versionName "1.0" 12 | testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" 13 | } 14 | buildTypes { 15 | release { 16 | minifyEnabled false 17 | proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' 18 | } 19 | } 20 | } 21 | 22 | dependencies { 23 | compile fileTree(include: ['*.jar'], dir: 'libs') 24 | androidTestCompile('com.android.support.test.espresso:espresso-core:2.2.2', { 25 | exclude group: 'com.android.support', module: 'support-annotations' 26 | }) 27 | compile 'com.android.support:appcompat-v7:25.0.1' 28 | testCompile 'junit:junit:4.12' 29 | compile 'com.jakewharton:butterknife:8.2.1' 30 | apt 'com.jakewharton:butterknife-compiler:8.2.1' 31 | compile files('libs/jsoup-1.10.1.jar') 32 | compile 'com.squareup.picasso:picasso:2.5.1' 33 | } 34 | -------------------------------------------------------------------------------- /app/src/main/res/layout/adapter_layout.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 16 | 17 | 24 | 25 | 31 | 32 | 42 | 43 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /app/src/main/res/layout/my_list_item.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 12 | 18 | 22 | 23 | 24 | 28 | 34 | 38 | 39 | 40 | 44 | 50 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /app/src/main/java/bruce/chang/testandroidcrawler/RadiumDetailActivity.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import android.content.Intent; 4 | import android.os.Bundle; 5 | import android.support.annotation.Nullable; 6 | import android.support.v7.app.AppCompatActivity; 7 | import android.webkit.WebSettings; 8 | import android.webkit.WebView; 9 | import android.webkit.WebViewClient; 10 | import android.widget.TextView; 11 | 12 | import butterknife.BindView; 13 | import butterknife.ButterKnife; 14 | 15 | /** 16 | * Created by: BruceChang 17 | * Date on : 2016/12/11. 18 | * Time on: 16:07 19 | * Progect_Name:TestAndroidCrawler 20 | * Source Github: 21 | * Description: 22 | */ 23 | 24 | public class RadiumDetailActivity extends AppCompatActivity { 25 | @BindView(R.id.wvRadiumDetail) 26 | WebView wvRadiumDetail; 27 | Intent mIntent; 28 | String url; 29 | @BindView(R.id.tvTargetUrl) 30 | TextView tvTargetUrl; 31 | 32 | @Override 33 | protected void onCreate(@Nullable Bundle savedInstanceState) { 34 | super.onCreate(savedInstanceState); 35 | setContentView(R.layout.activity_radiumdetail); 36 | ButterKnife.bind(this); 37 | mIntent = getIntent(); 38 | url = mIntent.getStringExtra("url"); 39 | tvTargetUrl.setText("页面跳转地址是:"+url); 40 | WebSettings settings = wvRadiumDetail.getSettings(); 41 | settings.setJavaScriptEnabled(true); 42 | settings.setDomStorageEnabled(true); 43 | settings.setCacheMode(WebSettings.LOAD_NO_CACHE); 44 | wvRadiumDetail.setHorizontalScrollBarEnabled(false);//水平不显示 45 | wvRadiumDetail.setVerticalScrollBarEnabled(false); //垂直不显示 46 | wvRadiumDetail.setWebViewClient(new WebViewClient() { 47 | 48 | @Override 49 | public boolean shouldOverrideUrlLoading(WebView view, String url) { 50 | view.loadUrl(url); 51 | return true; 52 | } 53 | }); 54 | 55 | wvRadiumDetail.loadUrl(url); 56 | 57 | } 58 | 59 | 60 | // @Override 61 | // public boolean onKeyDown(int keyCode, KeyEvent event) { 62 | // if ((keyCode == KeyEvent.KEYCODE_BACK) && wvRadiumDetail.canGoBack()) { 63 | // wvRadiumDetail.goBack(); //goBack()表示返回WebView的上一页面 64 | // return true; 65 | // } 66 | // return false; 67 | // } 68 | } 69 | -------------------------------------------------------------------------------- /app/src/main/java/bruce/chang/testandroidcrawler/MyAdapter.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import android.content.Context; 4 | import android.view.LayoutInflater; 5 | import android.view.View; 6 | import android.view.ViewGroup; 7 | import android.widget.BaseAdapter; 8 | import android.widget.ImageView; 9 | import android.widget.TextView; 10 | 11 | import com.squareup.picasso.Picasso; 12 | 13 | import java.util.List; 14 | 15 | import butterknife.BindView; 16 | import butterknife.ButterKnife; 17 | 18 | /** 19 | * Created by: BruceChang 20 | * Date on : 2016/12/11. 21 | * Time on: 15:29 22 | * Progect_Name:TestAndroidCrawler 23 | * Source Github: 24 | * Description: 25 | */ 26 | 27 | public class MyAdapter extends BaseAdapter { 28 | 29 | List mRadiumBeanList; 30 | Context mContext; 31 | 32 | public MyAdapter(List radiumBeanList, Context context) { 33 | mRadiumBeanList = radiumBeanList; 34 | mContext = context; 35 | } 36 | 37 | @Override 38 | public int getCount() { 39 | return mRadiumBeanList.size(); 40 | } 41 | 42 | @Override 43 | public Object getItem(int i) { 44 | return mRadiumBeanList.get(i); 45 | } 46 | 47 | @Override 48 | public long getItemId(int i) { 49 | return i; 50 | } 51 | 52 | @Override 53 | public View getView(int i, View view, ViewGroup viewGroup) { 54 | ViewHolder viewHolder = null; 55 | if (view==null){ 56 | view = LayoutInflater.from(mContext).inflate(R.layout.adapter_layout,viewGroup,false); 57 | viewHolder = new ViewHolder(view); 58 | view.setTag(viewHolder); 59 | }else { 60 | viewHolder = (ViewHolder) view.getTag(); 61 | } 62 | RadiumBean radiumBean = mRadiumBeanList.get(i); 63 | Picasso.with(mContext) 64 | .load(radiumBean.getImg()) 65 | .into(viewHolder.ivRadiumLogo); 66 | viewHolder.tvRadiumName.setText(radiumBean.getName()); 67 | viewHolder.tvRadiumAddress.setText(radiumBean.getAddress()); 68 | return view; 69 | } 70 | 71 | class ViewHolder { 72 | 73 | @BindView(R.id.ivRadiumLogo) 74 | ImageView ivRadiumLogo; 75 | @BindView(R.id.tvRadiumName) 76 | TextView tvRadiumName; 77 | @BindView(R.id.tvRadiumAddress) 78 | TextView tvRadiumAddress; 79 | 80 | public ViewHolder(View view) { 81 | ButterKnife.bind(this, view); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /app/src/main/res/layout/activity_main.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 16 | 17 | 23 | 24 | 32 | 33 | 43 | 44 | 55 | 56 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 19 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 46 | 47 | C:\Users\hosa2015\AppData\Roaming\Subversion 48 | 49 | 50 | 51 | 52 | 53 | 1.8 54 | 55 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 10 | DEFAULT_JVM_OPTS="" 11 | 12 | APP_NAME="Gradle" 13 | APP_BASE_NAME=`basename "$0"` 14 | 15 | # Use the maximum available, or set MAX_FD != -1 to use that value. 16 | MAX_FD="maximum" 17 | 18 | warn ( ) { 19 | echo "$*" 20 | } 21 | 22 | die ( ) { 23 | echo 24 | echo "$*" 25 | echo 26 | exit 1 27 | } 28 | 29 | # OS specific support (must be 'true' or 'false'). 30 | cygwin=false 31 | msys=false 32 | darwin=false 33 | case "`uname`" in 34 | CYGWIN* ) 35 | cygwin=true 36 | ;; 37 | Darwin* ) 38 | darwin=true 39 | ;; 40 | MINGW* ) 41 | msys=true 42 | ;; 43 | esac 44 | 45 | # Attempt to set APP_HOME 46 | # Resolve links: $0 may be a link 47 | PRG="$0" 48 | # Need this for relative symlinks. 49 | while [ -h "$PRG" ] ; do 50 | ls=`ls -ld "$PRG"` 51 | link=`expr "$ls" : '.*-> \(.*\)$'` 52 | if expr "$link" : '/.*' > /dev/null; then 53 | PRG="$link" 54 | else 55 | PRG=`dirname "$PRG"`"/$link" 56 | fi 57 | done 58 | SAVED="`pwd`" 59 | cd "`dirname \"$PRG\"`/" >/dev/null 60 | APP_HOME="`pwd -P`" 61 | cd "$SAVED" >/dev/null 62 | 63 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 64 | 65 | # Determine the Java command to use to start the JVM. 66 | if [ -n "$JAVA_HOME" ] ; then 67 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 68 | # IBM's JDK on AIX uses strange locations for the executables 69 | JAVACMD="$JAVA_HOME/jre/sh/java" 70 | else 71 | JAVACMD="$JAVA_HOME/bin/java" 72 | fi 73 | if [ ! -x "$JAVACMD" ] ; then 74 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 75 | 76 | Please set the JAVA_HOME variable in your environment to match the 77 | location of your Java installation." 78 | fi 79 | else 80 | JAVACMD="java" 81 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 82 | 83 | Please set the JAVA_HOME variable in your environment to match the 84 | location of your Java installation." 85 | fi 86 | 87 | # Increase the maximum file descriptors if we can. 88 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then 89 | MAX_FD_LIMIT=`ulimit -H -n` 90 | if [ $? -eq 0 ] ; then 91 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 92 | MAX_FD="$MAX_FD_LIMIT" 93 | fi 94 | ulimit -n $MAX_FD 95 | if [ $? -ne 0 ] ; then 96 | warn "Could not set maximum file descriptor limit: $MAX_FD" 97 | fi 98 | else 99 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 100 | fi 101 | fi 102 | 103 | # For Darwin, add options to specify how the application appears in the dock 104 | if $darwin; then 105 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 106 | fi 107 | 108 | # For Cygwin, switch paths to Windows format before running java 109 | if $cygwin ; then 110 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 111 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 112 | JAVACMD=`cygpath --unix "$JAVACMD"` 113 | 114 | # We build the pattern for arguments to be converted via cygpath 115 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 116 | SEP="" 117 | for dir in $ROOTDIRSRAW ; do 118 | ROOTDIRS="$ROOTDIRS$SEP$dir" 119 | SEP="|" 120 | done 121 | OURCYGPATTERN="(^($ROOTDIRS))" 122 | # Add a user-defined pattern to the cygpath arguments 123 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 124 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 125 | fi 126 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 127 | i=0 128 | for arg in "$@" ; do 129 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 130 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 131 | 132 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 133 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 134 | else 135 | eval `echo args$i`="\"$arg\"" 136 | fi 137 | i=$((i+1)) 138 | done 139 | case $i in 140 | (0) set -- ;; 141 | (1) set -- "$args0" ;; 142 | (2) set -- "$args0" "$args1" ;; 143 | (3) set -- "$args0" "$args1" "$args2" ;; 144 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 145 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 146 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 147 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 148 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 149 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 150 | esac 151 | fi 152 | 153 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules 154 | function splitJvmOpts() { 155 | JVM_OPTS=("$@") 156 | } 157 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS 158 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" 159 | 160 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" 161 | -------------------------------------------------------------------------------- /app/src/main/java/bruce/chang/testandroidcrawler/MainActivity.java: -------------------------------------------------------------------------------- 1 | package bruce.chang.testandroidcrawler; 2 | 3 | import android.app.Activity; 4 | import android.app.AlertDialog; 5 | import android.app.ProgressDialog; 6 | import android.content.Context; 7 | import android.content.DialogInterface; 8 | import android.content.Intent; 9 | import android.net.ConnectivityManager; 10 | import android.net.NetworkInfo; 11 | import android.os.Bundle; 12 | import android.os.Handler; 13 | import android.os.Message; 14 | import android.support.v7.app.AppCompatActivity; 15 | import android.view.View; 16 | import android.widget.AdapterView; 17 | import android.widget.ListView; 18 | import android.widget.TextView; 19 | import android.widget.Toast; 20 | 21 | import org.jsoup.Connection; 22 | import org.jsoup.Jsoup; 23 | import org.jsoup.nodes.Document; 24 | import org.jsoup.nodes.Element; 25 | import org.jsoup.select.Elements; 26 | 27 | import java.io.IOException; 28 | import java.util.ArrayList; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.Map; 32 | 33 | import butterknife.BindView; 34 | import butterknife.ButterKnife; 35 | 36 | public class MainActivity extends AppCompatActivity implements View.OnClickListener { 37 | 38 | @BindView(R.id.info_list_view) 39 | ListView info_list_view; 40 | private List list = new ArrayList<>(); 41 | private ProgressDialog dialog; 42 | private String url = "http://www.dianping.com/search/category/2/45"; 43 | private String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"; 44 | @BindView(R.id.tvLastPage) 45 | TextView tvLastPage; 46 | 47 | @BindView(R.id.tvCurrentPage) 48 | TextView tvCurrentPage; 49 | 50 | @BindView(R.id.tvNextPage) 51 | TextView tvNextPage; 52 | 53 | List> mMapList = new ArrayList<>(); 54 | private String curPage; 55 | boolean firstLoad = true; 56 | 57 | 58 | @Override 59 | protected void onCreate(Bundle savedInstanceState) { 60 | super.onCreate(savedInstanceState); 61 | setContentView(R.layout.activity_main); 62 | ButterKnife.bind(this); 63 | switchOver(); 64 | info_list_view.setOnItemClickListener(new AdapterView.OnItemClickListener() { 65 | @Override 66 | public void onItemClick(AdapterView adapterView, View view, int i, long l) { 67 | Intent intent = new Intent(MainActivity.this, RadiumDetailActivity.class); 68 | intent.putExtra("url", list.get(i).getTargetUrl()); 69 | startActivity(intent); 70 | } 71 | }); 72 | tvLastPage.setOnClickListener(this); 73 | tvCurrentPage.setOnClickListener(this); 74 | tvNextPage.setOnClickListener(this); 75 | } 76 | 77 | Runnable runnable = new Runnable() { 78 | @Override 79 | public void run() { 80 | Connection conn = Jsoup.connect(url); 81 | // 修改http包中的header,伪装成浏览器进行抓取 82 | conn.header("User-Agent", userAgent); 83 | Document doc = null; 84 | try { 85 | doc = conn.get(); 86 | } catch (IOException e) { 87 | e.printStackTrace(); 88 | } 89 | 90 | // 获取页数的链接 91 | if (firstLoad) { 92 | Elements elementsPages = doc.getElementsByClass("content-wrap"); 93 | Elements elementsPageA = elementsPages.first().getElementsByClass("shop-wrap").first().child(1).getElementsByTag("a"); 94 | for (int i = 0; i < elementsPageA.size() - 2; i++) { 95 | Element element = elementsPageA.get(i); 96 | Element element1 = element.getElementsByClass("cur").first(); 97 | Map map = new HashMap<>(); 98 | if (element1 != null) { 99 | curPage = element1.text(); 100 | map.put("page", "" + (i + 1)); 101 | map.put("url", url); 102 | mMapList.add(map); 103 | } else { 104 | map.put("page", "" + (i + 1)); 105 | map.put("url", element.attr("href")); 106 | mMapList.add(map); 107 | } 108 | 109 | } 110 | } 111 | firstLoad = false; 112 | //获取场馆的数据 113 | Element elementDiv = doc.getElementById("shop-all-list"); 114 | Elements elementsUl = elementDiv.getElementsByTag("ul"); 115 | Elements elements = elementsUl.first().getElementsByTag("li"); 116 | for (Element element : elements) { 117 | Elements elements1 = element.children(); 118 | String targetUrl = elements1.get(0).getElementsByTag("a").attr("href"); 119 | 120 | String img = elements1.get(0).getElementsByTag("img").first().attr("data-src"); 121 | if (img.contains(".jpg")) { 122 | int a = img.indexOf(".jpg"); 123 | img = img.substring(0, a + 4); 124 | } 125 | 126 | String radiumName = elements1.get(1).child(0).getElementsByTag("h4").text(); 127 | String address0 = elements1.get(1).child(2).getElementsByTag("a").get(1).text(); 128 | 129 | String address1 = elements1.get(1).child(2).getElementsByClass("addr").text(); 130 | // StringBuilder stringBuilder = new StringBuilder(); 131 | // 132 | // if (elements1.get(2).child(0).children().size()>0){ 133 | // String youhui = ""; 134 | // if (!"".equals(elements1.get(2).child(0).child(0).getElementsByClass("more").text())){ 135 | // youhui = elements1.get(2).child(0).getElementsByTag("a").get(1).attr("title"); 136 | // }else { 137 | // youhui = elements1.get(2).child(0).getElementsByTag("a").get(1).attr("title"); 138 | // 139 | // } 140 | // 141 | // stringBuilder.append(youhui+"+++"); 142 | // } 143 | RadiumBean radiumBean = new RadiumBean(); 144 | radiumBean.setTargetUrl("http://www.dianping.com" + targetUrl); 145 | radiumBean.setImg(img); 146 | radiumBean.setName(radiumName); 147 | radiumBean.setAddress(address0 + " " + address1); 148 | list.add(radiumBean); 149 | } 150 | // 执行完毕后给handler发送一个空消息 151 | Message message = new Message(); 152 | message.arg1 = Integer.parseInt(curPage); 153 | handler.sendMessage(message); 154 | } 155 | }; 156 | 157 | 158 | Handler handler = new Handler() { 159 | @Override 160 | public void handleMessage(Message msg) { 161 | super.handleMessage(msg); 162 | // 收到消息后执行handler 163 | show(); 164 | tvCurrentPage.setText("" + msg.arg1); 165 | } 166 | }; 167 | 168 | // 将数据填充到ListView中 169 | private void show() { 170 | if (!list.isEmpty()) { 171 | MyAdapter adapter = new MyAdapter(list, MainActivity.this); 172 | info_list_view.setAdapter(adapter); 173 | } 174 | dialog.dismiss(); 175 | } 176 | 177 | // 重新抓取 178 | public void switchOver() { 179 | if (isNetworkAvailable(MainActivity.this)) { 180 | // 显示“正在加载”窗口 181 | dialog = new ProgressDialog(this); 182 | dialog.setMessage("正在抓取数据..."); 183 | dialog.setCancelable(false); 184 | dialog.show(); 185 | 186 | list.clear(); 187 | new Thread(runnable).start(); // 子线程 188 | 189 | } else { 190 | // 弹出提示框 191 | new AlertDialog.Builder(this) 192 | .setTitle("提示") 193 | .setMessage("当前没有网络连接!") 194 | .setPositiveButton("重试", new DialogInterface.OnClickListener() { 195 | @Override 196 | public void onClick(DialogInterface dialog, int which) { 197 | switchOver(); 198 | } 199 | }).setNegativeButton("退出", new DialogInterface.OnClickListener() { 200 | @Override 201 | public void onClick(DialogInterface dialog, int which) { 202 | System.exit(0); // 退出程序 203 | } 204 | }).show(); 205 | } 206 | } 207 | 208 | // 判断是否有可用的网络连接 209 | public boolean isNetworkAvailable(Activity activity) { 210 | Context context = activity.getApplicationContext(); 211 | ConnectivityManager cm = (ConnectivityManager) context.getSystemService(Context.CONNECTIVITY_SERVICE); 212 | if (cm == null) 213 | return false; 214 | else { // 获取所有NetworkInfo对象 215 | NetworkInfo[] networkInfo = cm.getAllNetworkInfo(); 216 | if (networkInfo != null && networkInfo.length > 0) { 217 | for (int i = 0; i < networkInfo.length; i++) 218 | if (networkInfo[i].getState() == NetworkInfo.State.CONNECTED) 219 | return true; // 存在可用的网络连接 220 | } 221 | } 222 | return false; 223 | } 224 | 225 | @Override 226 | public void onClick(View view) { 227 | switch (view.getId()) { 228 | case R.id.tvLastPage: 229 | if (curPage.equals("1")) { 230 | Toast.makeText(this, "首页", Toast.LENGTH_SHORT).show(); 231 | } else { 232 | curPage = "" + (Integer.parseInt(curPage) - 1); 233 | 234 | if (curPage.equals(1)) { 235 | url = "http://www.dianping.com/search/category/2/45"; 236 | } else { 237 | 238 | url = "http://www.dianping.com" + mMapList.get(Integer.parseInt(curPage) - 1).get("url").toString(); 239 | } 240 | switchOver(); 241 | tvCurrentPage.setText(curPage); 242 | } 243 | 244 | break; 245 | case R.id.tvCurrentPage: 246 | switchOver(); 247 | break; 248 | case R.id.tvNextPage: 249 | 250 | if (curPage.equals("" + (mMapList.size()))) { 251 | Toast.makeText(this, "末页", Toast.LENGTH_SHORT).show(); 252 | } else { 253 | curPage = "" + (Integer.parseInt(curPage) + 1); 254 | url = "http://www.dianping.com" + mMapList.get(Integer.parseInt(curPage) - 1).get("url").toString(); 255 | switchOver(); 256 | tvCurrentPage.setText(curPage); 257 | } 258 | break; 259 | } 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TestAndroidCrawler 2 | 网络爬虫技术Jsoup的使用,来获取大众点评 健身场馆的有关数据 3 | 4 | > 本文由我的微信公众号(bruce常)原创首发, 5 | 并同步发表到csdn博客,欢迎转载,2016年12月11日。 6 | 7 | ### 概述: 8 | 9 | 本周五,接到一个任务,要使用爬虫技术来获取某点评网站里面关于健身场馆的数据,之前从未接触过爬虫技术,于是就从网上搜了一点学习资料,本篇文章就记录爬虫技术Jsoup技术,爬虫技术听名称很牛叉,其实没什么难点,慢慢的用心学习就会了。 10 | ### Jsoup介绍: 11 | Jsoup 是一个 Java 的开源HTML解析器,可直接解析某个URL地址、HTML文本内容,Jsoup官网jar包[下载地址](https://jsoup.org/download)。 12 | 13 | Jsoup主要有以下功能: 14 | 1. 从一个URL,文件或字符串中解析HTML 15 | 2. 使用DOM或CSS选择器来查找、取出数据 16 | 3. 对HTML元素、属性、文本进行操作 17 | 4. 清除不受信任的HTML (来防止XSS攻击) 18 | 19 | 20 | ### 使用Jsoup爬虫技术你需要的能力有: 21 | 1. 我们是用安卓开发的,首先肯定要有一定的安卓开发能力,会写简单的页面。 22 | 2. Jsoup中用到了Javascript语言,没有此语言能力在获取数据的时候就比较吃力,这是此爬虫技术的重中之重。 23 | 3. 查阅文档与解决问题的能力和技巧(有点废话) 24 | 25 | 上面三条中对于一个安卓开发者来说,最难的就是熟练使用Javascript语言,小编就遇到了这个问题,小编还有一定的javascript基础,系统的学习过此语言,但是在使用中还是很吃力的,问同学、问朋友、问同事,最后还是靠自己来获取自己想要的数据。 26 | 27 | ### 爬虫技术没那么难,思路就是这么的简单 28 | 29 | 1. 得到自己想要爬取数据的url. 30 | 2. 通过Jsoup的jar包中的方法将Html解析成Document, 31 | 3. 使用Document中的一些列get、first、children等方法获取自己想要的数据,如图片地址、名称、时间。 32 | 4. 将得到的数据封装成自己的实体类。 33 | 5. 将实体中的数据在页面加载出来。 34 | 35 | ### 实战,获取**点评网站中的场馆数据: 36 | 37 | ###### 先奉上效果图,没有图不说话: 38 | ![image](http://a4.qpic.cn/psb?/V10Llwbb1wSOar/COp7gE.LCkWeebjFo0FHzIvXONixFaqsOQsBICE2WVE!/b/dGcBAAAAAAAA&ek=1&kp=1&pt=0&bo=TgFeAk4BXgICCCw!&tm=1481464800&sce=0-12-12&rf=0-18) 39 | 40 | 这就是今天要实现的效果,左边图片是场馆的logo,右边上方是场馆的名称,下边是场馆的地址信息,点击进去可以根据超链接地址跳转新的页面,页面的Url地址小编已经拿到,但可能是因为重定向的问题,webview没有加载出来,有兴趣的可以输入链接地址来验证。 41 | 42 | #### 首先:新建一个空的项目. 43 | 44 | 上面的效果,只要接触过安卓开发的都能写出来,所以不是本篇文章的重点,这里就不过多说明,大家可以使用ListView或者RecyclerView来实现,我这里用ListView。 45 | 46 | 小编这里是为了加入侧边栏所以使用的是DrawerLayout,但后来没有用到,所以也就没有侧边栏的效果,不过后期如有时间会加上去的,上一页下一页是为了简单的模仿浏览器中的操作,此效果只能显示前9页数据,网页链接中有50页的数据,为什么没有实现呢? 47 | 48 | 很简单,因为50页的链接地址不是一次性返回的,小编为了方便,只获取了前9页数据的url,毕竟是为了抓取数据显示而已。 49 | 50 | #### 其次:主程序设计 51 | 52 | 1. 通过网页得到**点评健身场馆的url地址是:http://www.dianping.com/search/category/2/45 53 | 2. 抓取数据是一个耗时的操作,需要在一个线程中完成,这里使用 new Thread(runnable).start()方式,在runnable代码中获取场馆的logo、名称、地址如下: 54 | ``` 55 | Runnable runnable = new Runnable() { 56 | @Override 57 | public void run() { 58 | Connection conn = Jsoup.connect(url); 59 | // 修改http包中的header,伪装成浏览器进行抓取 60 | conn.header("User-Agent", userAgent); 61 | Document doc = null; 62 | try { 63 | doc = conn.get(); 64 | } catch (IOException e) { 65 | e.printStackTrace(); 66 | } 67 | //获取场馆的数据 68 | Element elementDiv = doc.getElementById("shop-all-list"); 69 | Elements elementsUl = elementDiv.getElementsByTag("ul"); 70 | Elements elements = elementsUl.first().getElementsByTag("li"); 71 | for (Element element : elements) { 72 | Elements elements1 = element.children(); 73 | String targetUrl = elements1.get(0).getElementsByTag("a").attr("href"); 74 | 75 | String img = elements1.get(0).getElementsByTag("img").first().attr("data-src"); 76 | if (img.contains(".jpg")) { 77 | int a = img.indexOf(".jpg"); 78 | img = img.substring(0, a + 4); 79 | } 80 | 81 | String radiumName = elements1.get(1).child(0).getElementsByTag("h4").text(); 82 | String address0 = elements1.get(1).child(2).getElementsByTag("a").get(1).text(); 83 | 84 | String address1 = elements1.get(1).child(2).getElementsByClass("addr").text(); 85 | 86 | RadiumBean radiumBean = new RadiumBean(); 87 | radiumBean.setImg(img); 88 | radiumBean.setName(radiumName); 89 | radiumBean.setAddress(address0 + " " + address1); 90 | list.add(radiumBean); 91 | } 92 | // 执行完毕后给handler发送一个空消息 93 | Message message = new Message(); 94 | message.arg1 = Integer.parseInt(curPage); 95 | handler.sendMessage(message); 96 | 97 | } 98 | }; 99 | ``` 100 | 101 | >1. 通过Jsoup.connect()方法,根据目标地址url来得到Connection对象, 102 | >2. 将我们的app伪装成浏览器,防止人家后台发现我们在爬取人家的数据,这需要修改修改http包中的header,来设置User-Agent,此值可以在谷歌浏览器中输入“about:version”来查看,也可以访问此[地址查看](http://www.966266.com/jishu/user-agent-chaxun.html)。 103 | >3. 通过Connection对象的get()方法来获得整个页面源代码所在的Document 104 | >4. 通过分析源代码,使用Document的对象来得到我们想要的数据,上面程序中img待变场馆logo的url,radiumName是小编得到的场馆的名称,address0和address1是小编得到的场馆地址的信息,这里通过组合来使用。 105 | >5. 构造我们ListView所用到的数据 106 | >6. 通过Handle来更新页面信息,curPage(当前页)稍后说明。 107 | 108 | 3. 在得到数据后页面加载显示 109 | ``` 110 | if (!list.isEmpty()) { 111 | MyAdapter adapter = new MyAdapter(list, MainActivity.this); 112 | info_list_view.setAdapter(adapter); 113 | } 114 | ``` 115 | 116 | 4.点击跳转到场馆的详情页,这里本想用Webview加载的,但是可能是网页重定向的问题,webview也能加载出来,但一会就显示无法连接网络,所以场馆详情页就显示出了我们得到的场馆详情页的url。 117 | 118 | 基本的抓取数据、加载数据流程就是这样的,但是仅仅靠上面的数据还是不能完全实现我们的效果的。 119 | 120 | ### 完善页面,实现上下页翻页功能。 121 | 122 | 1. 页面在爬取数据的时候显示一个ProgressDialog来提示用户。 123 | 124 | ``` 125 | ProgressDialog dialog = new ProgressDialog(this); 126 | dialog.setMessage("正在抓取数据..."); 127 | dialog.setCancelable(false); 128 | dialog.show(); 129 | ``` 130 | 数据加载完毕,关闭此dialog。 131 | 132 | ``` 133 | dialog.dismiss(); 134 | ``` 135 | 2.ProgresDialog加载前做是否有网络的判断,有网的时候才显示ProgressDialog,无网络的时候给出提示。 136 | 137 | ``` 138 | public boolean isNetworkAvailable(Activity activity) { 139 | Context context = activity.getApplicationContext(); 140 | ConnectivityManager cm = (ConnectivityManager) context.getSystemService(Context.CONNECTIVITY_SERVICE); 141 | if (cm == null) 142 | return false; 143 | else { // 获取所有NetworkInfo对象 144 | NetworkInfo[] networkInfo = cm.getAllNetworkInfo(); 145 | if (networkInfo != null && networkInfo.length > 0) { 146 | for (int i = 0; i < networkInfo.length; i++) 147 | if (networkInfo[i].getState() == NetworkInfo.State.CONNECTED) 148 | return true; // 存在可用的网络连接 149 | } 150 | } 151 | return false; 152 | } 153 | ``` 154 | 155 | 3.完善runnable,抓取当前页码、上一页、下一页的链接地址。 156 | 157 | ``` 158 | // 获取页数的链接 159 | if (firstLoad) { 160 | Elements elementsPages = doc.getElementsByClass("content-wrap"); 161 | Elements elementsPageA = elementsPages.first().getElementsByClass("shop-wrap").first().child(1).getElementsByTag("a"); 162 | for (int i = 0; i < elementsPageA.size() - 2; i++) { 163 | Element element = elementsPageA.get(i); 164 | Element element1 = element.getElementsByClass("cur").first(); 165 | Map map = new HashMap<>(); 166 | if (element1 != null) { 167 | curPage = element1.text(); 168 | map.put("page", "" + (i + 1)); 169 | map.put("url", url); 170 | mMapList.add(map); 171 | } else { 172 | map.put("page", "" + (i + 1)); 173 | map.put("url", element.attr("href")); 174 | mMapList.add(map); 175 | } 176 | 177 | } 178 | } 179 | firstLoad = false; 180 | ``` 181 | 182 | 因为在网页中,第一次进入返回了前9页和第50页的数据,这里只取前9页的数据,firstLoad代表第一次加载,mMapList用来存放页码和页面跳转时候的url,对js中的代码不明白的朋友们,要好好学学js,这里小编就不介绍js了,至于我为什么知道取这些字段,那是小编盯着网页源程序代码看了半天看出来的。 183 | 184 | 4. 这个时候就用到了之前runnable中的Message对象中的curPage 185 | 186 | curPage代表当前页码,从1开始………………在handle接收到消息后显示此页码信息。 187 | 188 | ``` 189 | tvCurrentPage.setText("" + msg.arg1); 190 | ``` 191 | 192 | 5. 模仿网页的上一页下一页,我们需要处理TextView的点击事件。 193 | 194 | 下一页事件: 195 | ``` 196 | if (curPage.equals("" + (mMapList.size()))) { 197 | Toast.makeText(this, "末页", Toast.LENGTH_SHORT).show(); 198 | } else { 199 | curPage = "" + (Integer.parseInt(curPage) + 1); 200 | url = "http://www.dianping.com" + mMapList.get(Integer.parseInt(curPage) - 1).get("url").toString(); 201 | switchOver(); 202 | tvCurrentPage.setText(curPage); 203 | } 204 | ``` 205 | 206 | 上一页事件: 207 | 208 | ``` 209 | if (curPage.equals("1")) { 210 | Toast.makeText(this, "首页", Toast.LENGTH_SHORT).show(); 211 | } else { 212 | curPage = "" + (Integer.parseInt(curPage) - 1); 213 | 214 | if (curPage.equals(1)) { 215 | url = "http://www.dianping.com/search/category/2/45"; 216 | } else { 217 | 218 | url = "http://www.dianping.com" + mMapList.get(Integer.parseInt(curPage) - 1).get("url").toString(); 219 | } 220 | switchOver(); 221 | tvCurrentPage.setText(curPage); 222 | } 223 | ``` 224 | 225 | 经过小编测试,在点击下一页的时候没有bug,在点击上一页的时候,会出现doc为null,从而奔溃的bug,小编在努力解决中,但还没解决掉。 226 | 227 | 6. 附上完整的runnable代码,毕竟这是此程序的关键部分。 228 | 229 | ``` 230 | Runnable runnable = new Runnable() { 231 | @Override 232 | public void run() { 233 | Connection conn = Jsoup.connect(url); 234 | // 修改http包中的header,伪装成浏览器进行抓取 235 | conn.header("User-Agent", userAgent); 236 | Document doc = null; 237 | try { 238 | doc = conn.get(); 239 | } catch (IOException e) { 240 | e.printStackTrace(); 241 | } 242 | 243 | // 获取页数的链接 244 | if (firstLoad) { 245 | Elements elementsPages = doc.getElementsByClass("content-wrap"); 246 | Elements elementsPageA = elementsPages.first().getElementsByClass("shop-wrap").first().child(1).getElementsByTag("a"); 247 | for (int i = 0; i < elementsPageA.size() - 2; i++) { 248 | Element element = elementsPageA.get(i); 249 | Element element1 = element.getElementsByClass("cur").first(); 250 | Map map = new HashMap<>(); 251 | if (element1 != null) { 252 | curPage = element1.text(); 253 | map.put("page", "" + (i + 1)); 254 | map.put("url", url); 255 | mMapList.add(map); 256 | } else { 257 | map.put("page", "" + (i + 1)); 258 | map.put("url", element.attr("href")); 259 | mMapList.add(map); 260 | } 261 | 262 | } 263 | } 264 | firstLoad = false; 265 | //获取场馆的数据 266 | Element elementDiv = doc.getElementById("shop-all-list"); 267 | Elements elementsUl = elementDiv.getElementsByTag("ul"); 268 | Elements elements = elementsUl.first().getElementsByTag("li"); 269 | for (Element element : elements) { 270 | Elements elements1 = element.children(); 271 | String targetUrl = elements1.get(0).getElementsByTag("a").attr("href"); 272 | 273 | String img = elements1.get(0).getElementsByTag("img").first().attr("data-src"); 274 | if (img.contains(".jpg")) { 275 | int a = img.indexOf(".jpg"); 276 | img = img.substring(0, a + 4); 277 | } 278 | 279 | String radiumName = elements1.get(1).child(0).getElementsByTag("h4").text(); 280 | String address0 = elements1.get(1).child(2).getElementsByTag("a").get(1).text(); 281 | 282 | String address1 = elements1.get(1).child(2).getElementsByClass("addr").text(); 283 | // StringBuilder stringBuilder = new StringBuilder(); 284 | // 285 | // if (elements1.get(2).child(0).children().size()>0){ 286 | // String youhui = ""; 287 | // if (!"".equals(elements1.get(2).child(0).child(0).getElementsByClass("more").text())){ 288 | // youhui = elements1.get(2).child(0).getElementsByTag("a").get(1).attr("title"); 289 | // }else { 290 | // youhui = elements1.get(2).child(0).getElementsByTag("a").get(1).attr("title"); 291 | // 292 | // } 293 | // 294 | // stringBuilder.append(youhui+"+++"); 295 | // } 296 | RadiumBean radiumBean = new RadiumBean(); 297 | radiumBean.setTargetUrl("http://www.dianping.com" + targetUrl); 298 | radiumBean.setImg(img); 299 | radiumBean.setName(radiumName); 300 | radiumBean.setAddress(address0 + " " + address1); 301 | list.add(radiumBean); 302 | } 303 | // 执行完毕后给handler发送一个空消息 304 | Message message = new Message(); 305 | message.arg1 = Integer.parseInt(curPage); 306 | handler.sendMessage(message); 307 | } 308 | }; 309 | ``` 310 | 有不明白的可以对照完整的runnable代码来理解。 311 | 312 | ### 通过上面的步骤,我们已经完成了抓取、加载、上下页切换的效果。但但请看下面。 313 | 通过小编的切身体验,发现jsoup爬虫获取数据时候的几个需要注意的地方。 314 | 1. 个人要会js,再强调一遍,不会js,上面我写的js的程序应该会非常的迷糊,即便会的人,因为每个人写的也不一样,也是不好看懂的。 315 | 2. 我们在爬取数据的时候所用的class id 等字段一旦发生变化,那就得不到相应的标签了,页面就会发生奔溃,这一点也是致命的一点把。 316 | 3. 要想非常逼真的实现网页中的效果,那你就要好好的看看网页的源代码了,网页代码有很大的灵活性,需要你仔细分析记录规律。 317 | 318 | 319 | ### 测试程序已经上传到了github,有需要的可以下载源程序。 320 | 321 | 322 | 下载地址:[点我点我点我](https://github.com/201216323/TestAndroidCrawler) 323 | 324 | 微信公众号,欢迎添加关注,不定时为大家分享个人开发中的技术文章。 325 | 326 | QQ邮箱:1060140613@qq.com,如有疑问可以QQ或者邮件联系我。 327 | 328 | ![image](http://a3.qpic.cn/psb?/V10Llwbb1wSOar/f.mXHYCxXqBA9FHOkSaPIaaFK8bXsj11Qd190qFsdpw!/b/dGYBAAAAAAAA&ek=1&kp=1&pt=0&bo=AgECAQIBAgEFACM!&tm=1481468400&sce=0-12-12&rf=viewer_311) 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | --------------------------------------------------------------------------------