├── README.md ├── .mvn └── wrapper │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── src ├── main │ ├── resources │ │ └── application.yml │ └── java │ │ └── com │ │ └── ezio │ │ ├── repository │ │ ├── MusicRepository.java │ │ └── CommentRepository.java │ │ ├── NetEaseMusicApplication.java │ │ ├── service │ │ └── MusicService.java │ │ ├── pipeline │ │ └── NetEaseMusicPipeline.java │ │ ├── entity │ │ ├── Music.java │ │ └── Comment.java │ │ ├── utils │ │ └── NetEaseMusicUtils.java │ │ └── processor │ │ └── NetEaseMusicPageProcessor.java └── test │ └── java │ └── com │ └── ezio │ └── NetEaseMusicApplicationTests.java ├── .gitignore ├── package.json ├── pom.xml ├── mvnw.cmd └── mvnw /README.md: -------------------------------------------------------------------------------- 1 | # neteasemusic 2 | webmagic 爬取我喜欢的网易云歌单+评论 3 | -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EzioL/neteasemusic/HEAD/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.5.0/apache-maven-3.5.0-bin.zip 2 | -------------------------------------------------------------------------------- /src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 8888 3 | context-path: / 4 | spring: 5 | datasource: 6 | url: jdbc:mysql://localhost:3306/music?useUnicode=true&characterEncoding=utf8 7 | username: root 8 | password: root 9 | jpa: 10 | database: mysql 11 | show-sql: true 12 | hibernate: 13 | ddl-auto: update -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | 12 | ### IntelliJ IDEA ### 13 | .idea 14 | *.iws 15 | *.iml 16 | *.ipr 17 | 18 | ### NetBeans ### 19 | nbproject/private/ 20 | build/ 21 | nbbuild/ 22 | dist/ 23 | nbdist/ 24 | .nb-gradle/ -------------------------------------------------------------------------------- /src/main/java/com/ezio/repository/MusicRepository.java: -------------------------------------------------------------------------------- 1 | package com.ezio.repository; 2 | 3 | import com.ezio.entity.Music; 4 | 5 | import org.springframework.data.jpa.repository.JpaRepository; 6 | 7 | /** 8 | * Created by Ezio on 2017/6/28. 9 | */ 10 | public interface MusicRepository extends JpaRepository { 11 | int countBySongId(String songId); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/repository/CommentRepository.java: -------------------------------------------------------------------------------- 1 | package com.ezio.repository; 2 | 3 | import com.ezio.entity.Comment; 4 | 5 | import org.springframework.data.jpa.repository.JpaRepository; 6 | 7 | /** 8 | * Created by Ezio on 2017/6/28. 9 | */ 10 | 11 | public interface CommentRepository extends JpaRepository { 12 | int countByCommentId(int commentId); 13 | } 14 | -------------------------------------------------------------------------------- /src/test/java/com/ezio/NetEaseMusicApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.ezio; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class NetEaseMusicApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neteasemusic", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/EzioL/neteasemusic.git" 12 | }, 13 | "author": "", 14 | "license": "ISC", 15 | "bugs": { 16 | "url": "https://github.com/EzioL/neteasemusic/issues" 17 | }, 18 | "homepage": "https://github.com/EzioL/neteasemusic#readme", 19 | "dependencies": { 20 | "element-ui": "^1.4.4" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/NetEaseMusicApplication.java: -------------------------------------------------------------------------------- 1 | package com.ezio; 2 | 3 | import com.ezio.pipeline.NetEaseMusicPipeline; 4 | import com.ezio.processor.NetEaseMusicPageProcessor; 5 | 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.boot.SpringApplication; 8 | import org.springframework.boot.autoconfigure.SpringBootApplication; 9 | import org.springframework.web.bind.annotation.GetMapping; 10 | import org.springframework.web.bind.annotation.RestController; 11 | 12 | @RestController 13 | @SpringBootApplication 14 | public class NetEaseMusicApplication { 15 | 16 | @Autowired 17 | NetEaseMusicPageProcessor mProcessor; 18 | @Autowired 19 | NetEaseMusicPipeline mPipeline; 20 | 21 | @GetMapping("/") 22 | public String index() { 23 | new Thread(() -> mProcessor.start(mProcessor, mPipeline)).start(); 24 | 25 | return "爬虫开启"; 26 | } 27 | 28 | public static void main(String[] args) { 29 | SpringApplication.run(NetEaseMusicApplication.class, args); 30 | 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/service/MusicService.java: -------------------------------------------------------------------------------- 1 | package com.ezio.service; 2 | 3 | import com.ezio.entity.Comment; 4 | import com.ezio.entity.Music; 5 | import com.ezio.repository.CommentRepository; 6 | import com.ezio.repository.MusicRepository; 7 | 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.stereotype.Service; 10 | 11 | import java.util.List; 12 | 13 | /** 14 | * Created by Ezio on 2017/6/28. 15 | */ 16 | @Service 17 | public class MusicService { 18 | @Autowired 19 | private MusicRepository mMusicRepository; 20 | @Autowired 21 | private CommentRepository mCommentRepository; 22 | 23 | public void addMusic(Music music) { 24 | //判断数据是否存在 25 | if (mMusicRepository.countBySongId(music.getSongId()) == 0) { 26 | mMusicRepository.save(music); 27 | } 28 | 29 | } 30 | 31 | public void addComment(Comment comment) { 32 | //判断数据是否存在 33 | if (mCommentRepository.countByCommentId(comment.getCommentId()) == 0) { 34 | mCommentRepository.save(comment); 35 | } 36 | } 37 | 38 | 39 | 40 | public void addComments(List comments) { 41 | mCommentRepository.save(comments); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/pipeline/NetEaseMusicPipeline.java: -------------------------------------------------------------------------------- 1 | package com.ezio.pipeline; 2 | 3 | import com.ezio.entity.Comment; 4 | import com.ezio.entity.Music; 5 | import com.ezio.repository.CommentRepository; 6 | import com.ezio.repository.MusicRepository; 7 | 8 | import org.springframework.beans.factory.annotation.Autowired; 9 | import org.springframework.stereotype.Component; 10 | 11 | import java.util.Map; 12 | 13 | import us.codecraft.webmagic.ResultItems; 14 | import us.codecraft.webmagic.Task; 15 | import us.codecraft.webmagic.pipeline.Pipeline; 16 | 17 | /** 18 | * Created by Ezio on 2017/6/28. 19 | */ 20 | @Component 21 | public class NetEaseMusicPipeline implements Pipeline { 22 | 23 | @Autowired 24 | public MusicRepository mMusicDao; 25 | 26 | @Autowired 27 | public CommentRepository mCommentDao; 28 | 29 | @Override 30 | public void process(ResultItems resultItems, Task task) { 31 | 32 | for (Map.Entry entry : resultItems.getAll().entrySet()) { 33 | if (entry.getKey().equals("music")) { 34 | Music music = (Music) entry.getValue(); 35 | System.out.println("mMusicDao--->null" + mMusicDao == null); 36 | if (mMusicDao.countBySongId(music.getSongId()) == 0) { 37 | mMusicDao.save(music); 38 | } 39 | } else { 40 | Comment comment = (Comment) entry.getValue(); 41 | System.out.println("mCommentDao--->null" + mCommentDao == null); 42 | if (mCommentDao.countByCommentId(comment.getCommentId()) == 0) { 43 | mCommentDao.save(comment); 44 | } 45 | } 46 | 47 | } 48 | } 49 | 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/entity/Music.java: -------------------------------------------------------------------------------- 1 | package com.ezio.entity; 2 | 3 | import javax.persistence.Entity; 4 | import javax.persistence.GeneratedValue; 5 | import javax.persistence.Id; 6 | import javax.persistence.Table; 7 | 8 | /** 9 | * Created by Ezio on 2017/6/27. 10 | */ 11 | @Entity 12 | @Table(name = "music") 13 | public class Music { 14 | 15 | @Id 16 | @GeneratedValue 17 | private Integer id; 18 | private String songId; 19 | private String title; 20 | private String author; 21 | private String album; 22 | private String URL; 23 | private int commentCount; 24 | 25 | public String getSongId() { 26 | return songId; 27 | } 28 | 29 | public void setSongId(String songId) { 30 | this.songId = songId; 31 | } 32 | 33 | public int getCommentCount() { 34 | return commentCount; 35 | } 36 | 37 | public void setCommentCount(int commentCount) { 38 | this.commentCount = commentCount; 39 | } 40 | 41 | public Integer getId() { 42 | return id; 43 | } 44 | 45 | public void setId(Integer id) { 46 | this.id = id; 47 | } 48 | 49 | public String getTitle() { 50 | return title; 51 | } 52 | 53 | public void setTitle(String title) { 54 | this.title = title; 55 | } 56 | 57 | public String getAuthor() { 58 | return author; 59 | } 60 | 61 | public void setAuthor(String author) { 62 | this.author = author; 63 | } 64 | 65 | public String getAlbum() { 66 | return album; 67 | } 68 | 69 | public void setAlbum(String album) { 70 | this.album = album; 71 | } 72 | 73 | public String getURL() { 74 | return URL; 75 | } 76 | 77 | public void setURL(String URL) { 78 | this.URL = URL; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/entity/Comment.java: -------------------------------------------------------------------------------- 1 | package com.ezio.entity; 2 | 3 | import javax.persistence.Entity; 4 | import javax.persistence.GeneratedValue; 5 | import javax.persistence.Id; 6 | import javax.persistence.Table; 7 | 8 | /** 9 | * Created by Ezio on 2017/6/28. 10 | */ 11 | @Entity 12 | @Table(name = "comment") 13 | public class Comment { 14 | 15 | @Id 16 | @GeneratedValue 17 | private Integer id; 18 | private String songId; 19 | private String nickname; 20 | private Integer likedCount; 21 | private String content; 22 | private String time; 23 | private int commentId; 24 | 25 | 26 | public int getCommentId() { 27 | return commentId; 28 | } 29 | 30 | public void setCommentId(int commentId) { 31 | this.commentId = commentId; 32 | } 33 | 34 | 35 | 36 | public Integer getId() { 37 | return id; 38 | } 39 | 40 | public void setId(Integer id) { 41 | this.id = id; 42 | } 43 | 44 | public String getSongId() { 45 | return songId; 46 | } 47 | 48 | public void setSongId(String songId) { 49 | this.songId = songId; 50 | } 51 | 52 | public String getNickname() { 53 | return nickname; 54 | } 55 | 56 | public void setNickname(String nickname) { 57 | this.nickname = nickname; 58 | } 59 | 60 | public Integer getLikedCount() { 61 | return likedCount; 62 | } 63 | 64 | public void setLikedCount(Integer likedCount) { 65 | this.likedCount = likedCount; 66 | } 67 | 68 | public String getTime() { 69 | return time; 70 | } 71 | 72 | public void setTime(String time) { 73 | this.time = time; 74 | } 75 | 76 | 77 | 78 | public String getContent() { 79 | return content; 80 | } 81 | 82 | public void setContent(String content) { 83 | this.content = content; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.ezio 7 | neteasemusic 8 | 0.0.1-SNAPSHOT 9 | jar 10 | 11 | NetEaseMusic 12 | Demo project for Spring Boot 13 | 14 | 15 | org.springframework.boot 16 | spring-boot-starter-parent 17 | 1.5.4.RELEASE 18 | 19 | 20 | 21 | 22 | UTF-8 23 | UTF-8 24 | 1.8 25 | 26 | 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-starter-data-jpa 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-web 35 | 36 | 37 | 38 | mysql 39 | mysql-connector-java 40 | runtime 41 | 42 | 43 | org.springframework.boot 44 | spring-boot-starter-test 45 | test 46 | 47 | 48 | us.codecraft 49 | webmagic-core 50 | 0.7.2 51 | 52 | 53 | us.codecraft 54 | webmagic-extension 55 | 0.7.2 56 | 57 | 58 | 59 | 60 | 61 | 62 | org.springframework.boot 63 | spring-boot-maven-plugin 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM ---------------------------------------------------------------------------- 2 | @REM Licensed to the Apache Software Foundation (ASF) under one 3 | @REM or more contributor license agreements. See the NOTICE file 4 | @REM distributed with this work for additional information 5 | @REM regarding copyright ownership. The ASF licenses this file 6 | @REM to you under the Apache License, Version 2.0 (the 7 | @REM "License"); you may not use this file except in compliance 8 | @REM with the License. You may obtain a copy of the License at 9 | @REM 10 | @REM http://www.apache.org/licenses/LICENSE-2.0 11 | @REM 12 | @REM Unless required by applicable law or agreed to in writing, 13 | @REM software distributed under the License is distributed on an 14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | @REM KIND, either express or implied. See the License for the 16 | @REM specific language governing permissions and limitations 17 | @REM under the License. 18 | @REM ---------------------------------------------------------------------------- 19 | 20 | @REM ---------------------------------------------------------------------------- 21 | @REM Maven2 Start Up Batch script 22 | @REM 23 | @REM Required ENV vars: 24 | @REM JAVA_HOME - location of a JDK home dir 25 | @REM 26 | @REM Optional ENV vars 27 | @REM M2_HOME - location of maven2's installed home dir 28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending 30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 31 | @REM e.g. to debug Maven itself, use 32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 34 | @REM ---------------------------------------------------------------------------- 35 | 36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 37 | @echo off 38 | @REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' 39 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 40 | 41 | @REM set %HOME% to equivalent of $HOME 42 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 43 | 44 | @REM Execute a user defined script before this one 45 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 46 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 47 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" 48 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" 49 | :skipRcPre 50 | 51 | @setlocal 52 | 53 | set ERROR_CODE=0 54 | 55 | @REM To isolate internal variables from possible post scripts, we use another setlocal 56 | @setlocal 57 | 58 | @REM ==== START VALIDATION ==== 59 | if not "%JAVA_HOME%" == "" goto OkJHome 60 | 61 | echo. 62 | echo Error: JAVA_HOME not found in your environment. >&2 63 | echo Please set the JAVA_HOME variable in your environment to match the >&2 64 | echo location of your Java installation. >&2 65 | echo. 66 | goto error 67 | 68 | :OkJHome 69 | if exist "%JAVA_HOME%\bin\java.exe" goto init 70 | 71 | echo. 72 | echo Error: JAVA_HOME is set to an invalid directory. >&2 73 | echo JAVA_HOME = "%JAVA_HOME%" >&2 74 | echo Please set the JAVA_HOME variable in your environment to match the >&2 75 | echo location of your Java installation. >&2 76 | echo. 77 | goto error 78 | 79 | @REM ==== END VALIDATION ==== 80 | 81 | :init 82 | 83 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 84 | @REM Fallback to current working directory if not found. 85 | 86 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 87 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 88 | 89 | set EXEC_DIR=%CD% 90 | set WDIR=%EXEC_DIR% 91 | :findBaseDir 92 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 93 | cd .. 94 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 95 | set WDIR=%CD% 96 | goto findBaseDir 97 | 98 | :baseDirFound 99 | set MAVEN_PROJECTBASEDIR=%WDIR% 100 | cd "%EXEC_DIR%" 101 | goto endDetectBaseDir 102 | 103 | :baseDirNotFound 104 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 105 | cd "%EXEC_DIR%" 106 | 107 | :endDetectBaseDir 108 | 109 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 110 | 111 | @setlocal EnableExtensions EnableDelayedExpansion 112 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 113 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 114 | 115 | :endReadAdditionalConfig 116 | 117 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 118 | 119 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 120 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 121 | 122 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 123 | if ERRORLEVEL 1 goto error 124 | goto end 125 | 126 | :error 127 | set ERROR_CODE=1 128 | 129 | :end 130 | @endlocal & set ERROR_CODE=%ERROR_CODE% 131 | 132 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost 133 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 134 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" 135 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" 136 | :skipRcPost 137 | 138 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 139 | if "%MAVEN_BATCH_PAUSE%" == "on" pause 140 | 141 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% 142 | 143 | exit /B %ERROR_CODE% 144 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/utils/NetEaseMusicUtils.java: -------------------------------------------------------------------------------- 1 | package com.ezio.utils; 2 | 3 | import com.ezio.processor.NetEaseMusicPageProcessor; 4 | 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.apache.http.HttpEntity; 7 | import org.apache.http.NameValuePair; 8 | import org.apache.http.client.entity.UrlEncodedFormEntity; 9 | import org.apache.http.client.methods.CloseableHttpResponse; 10 | import org.apache.http.client.methods.HttpPost; 11 | import org.apache.http.impl.client.CloseableHttpClient; 12 | import org.apache.http.impl.client.HttpClients; 13 | import org.apache.http.message.BasicNameValuePair; 14 | import org.apache.http.util.EntityUtils; 15 | 16 | import java.io.IOException; 17 | import java.text.SimpleDateFormat; 18 | import java.util.ArrayList; 19 | import java.util.Date; 20 | import java.util.List; 21 | 22 | import javax.crypto.Cipher; 23 | import javax.crypto.spec.IvParameterSpec; 24 | import javax.crypto.spec.SecretKeySpec; 25 | 26 | import sun.misc.BASE64Encoder; 27 | 28 | /** 29 | * Created by Ezio on 2017/7/12. 30 | */ 31 | public class NetEaseMusicUtils { 32 | public static String crawlAjaxUrl(String songId, int offset) { 33 | 34 | CloseableHttpClient httpclient = HttpClients.createDefault(); 35 | CloseableHttpResponse response = null; 36 | String first_param = "{rid:\"\", offset:\"offset_param\", total:\"true\", limit:\"20\", csrf_token:\"\"}"; 37 | first_param = first_param.replace("offset_param", offset + ""); 38 | //first_param = first_param.replace("limit_param", ONE_PAGE + ""); 39 | try { 40 | // 参数加密 41 | // 16位随机字符串,直接FFF 42 | // String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16); 43 | String secKey = "FFFFFFFFFFFFFFFF"; 44 | // 两遍ASE加密 45 | String encText = NetEaseMusicUtils.aesEncrypt(aesEncrypt(first_param, "0CoJUm6Qyw8W8jud"), secKey); 46 | // 47 | String encSecKey = rsaEncrypt(); 48 | 49 | HttpPost httpPost = new HttpPost("http://music.163.com/weapi/v1/resource/comments/R_SO_4_" + songId + "/?csrf_token="); 50 | httpPost.addHeader("Referer", NetEaseMusicPageProcessor.BASE_URL); 51 | 52 | List ls = new ArrayList(); 53 | ls.add(new BasicNameValuePair("params", encText)); 54 | ls.add(new BasicNameValuePair("encSecKey", encSecKey)); 55 | 56 | UrlEncodedFormEntity paramEntity = new UrlEncodedFormEntity(ls, "utf-8"); 57 | httpPost.setEntity(paramEntity); 58 | 59 | response = httpclient.execute(httpPost); 60 | HttpEntity entity = response.getEntity(); 61 | 62 | if (entity != null) { 63 | return EntityUtils.toString(entity, "utf-8"); 64 | } 65 | 66 | } catch (Exception e) { 67 | e.printStackTrace(); 68 | } finally { 69 | try { 70 | response.close(); 71 | httpclient.close(); 72 | } catch (IOException e) { 73 | e.printStackTrace(); 74 | } 75 | } 76 | 77 | return ""; 78 | } 79 | 80 | /** 81 | * ASE-128-CBC加密模式可以需要16位 82 | * 83 | * @param src 加密内容 84 | * @param key 密钥 85 | * @return 86 | */ 87 | public static String aesEncrypt(String src, String key) throws Exception { 88 | String encodingFormat = "UTF-8"; 89 | String iv = "0102030405060708"; 90 | 91 | Cipher cipher = Cipher.getInstance("AES/CBC/PKCS5Padding"); 92 | byte[] raw = key.getBytes(); 93 | SecretKeySpec secretKeySpec = new SecretKeySpec(raw, "AES"); 94 | IvParameterSpec ivParameterSpec = new IvParameterSpec(iv.getBytes()); 95 | // 使用CBC模式,需要一个向量vi,增加加密算法强度 96 | cipher.init(Cipher.ENCRYPT_MODE, secretKeySpec, ivParameterSpec); 97 | byte[] encrypted = cipher.doFinal(src.getBytes(encodingFormat)); 98 | return new BASE64Encoder().encode(encrypted); 99 | 100 | } 101 | 102 | public static String rsaEncrypt() { 103 | String secKey = "257348aecb5e556c066de214e531faadd1c55d814f9be95fd06d6bff9f4c7a41f831f6394d5a3fd2e3881736d94a02ca919d952872e7d0a50ebfa1769a7a62d512f5f1ca21aec60bc3819a9c3ffca5eca9a0dba6d6f7249b06f5965ecfff3695b54e1c28f3f624750ed39e7de08fc8493242e26dbc4484a01c76f739e135637c"; 104 | return secKey; 105 | } 106 | 107 | public static String parseMillisecone(long millisecond) { 108 | String time = null; 109 | try { 110 | long yushu_day = millisecond % (1000 * 60 * 60 * 24); 111 | long yushu_hour = (millisecond % (1000 * 60 * 60 * 24)) 112 | % (1000 * 60 * 60); 113 | long yushu_minute = millisecond % (1000 * 60 * 60 * 24) 114 | % (1000 * 60 * 60) % (1000 * 60); 115 | @SuppressWarnings("unused") 116 | long yushu_second = millisecond % (1000 * 60 * 60 * 24) 117 | % (1000 * 60 * 60) % (1000 * 60) % 1000; 118 | if (yushu_day == 0) { 119 | return (millisecond / (1000 * 60 * 60 * 24)) + "天"; 120 | } else { 121 | if (yushu_hour == 0) { 122 | return (millisecond / (1000 * 60 * 60 * 24)) + "天" 123 | + (yushu_day / (1000 * 60 * 60)) + "时"; 124 | } else { 125 | if (yushu_minute == 0) { 126 | return (millisecond / (1000 * 60 * 60 * 24)) + "天" 127 | + (yushu_day / (1000 * 60 * 60)) + "时" 128 | + (yushu_hour / (1000 * 60)) + "分"; 129 | } else { 130 | return (millisecond / (1000 * 60 * 60 * 24)) + "天" 131 | + (yushu_day / (1000 * 60 * 60)) + "时" 132 | + (yushu_hour / (1000 * 60)) + "分" 133 | + (yushu_minute / 1000) + "秒"; 134 | 135 | } 136 | 137 | } 138 | 139 | } 140 | 141 | } catch (Exception e) { 142 | e.printStackTrace(); 143 | } 144 | return time; 145 | } 146 | 147 | /* 148 | * 将时间戳转换为时间 149 | */ 150 | public static String stampToDate(long s) { 151 | String res; 152 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 153 | long lt = s; 154 | Date date = new Date(lt); 155 | res = simpleDateFormat.format(date); 156 | return res; 157 | } 158 | 159 | /** 160 | * 将emoji表情替换成* 161 | * 162 | * @param source 163 | * @return 过滤后的字符串 164 | */ 165 | public static String filterEmoji(String source) { 166 | if (StringUtils.isNotBlank(source)) { 167 | return source.replaceAll("[\\ud800\\udc00-\\udbff\\udfff\\ud800-\\udfff]", "*"); 168 | } else { 169 | return source; 170 | } 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/main/java/com/ezio/processor/NetEaseMusicPageProcessor.java: -------------------------------------------------------------------------------- 1 | package com.ezio.processor; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.alibaba.fastjson.JSONPath; 6 | import com.ezio.entity.Comment; 7 | import com.ezio.entity.Music; 8 | import com.ezio.pipeline.NetEaseMusicPipeline; 9 | import com.ezio.service.MusicService; 10 | import com.ezio.utils.NetEaseMusicUtils; 11 | 12 | import org.springframework.beans.factory.annotation.Autowired; 13 | import org.springframework.stereotype.Component; 14 | 15 | import java.util.ArrayList; 16 | import java.util.Date; 17 | import java.util.List; 18 | 19 | import us.codecraft.webmagic.Page; 20 | import us.codecraft.webmagic.Site; 21 | import us.codecraft.webmagic.Spider; 22 | import us.codecraft.webmagic.downloader.HttpClientDownloader; 23 | import us.codecraft.webmagic.processor.PageProcessor; 24 | import us.codecraft.webmagic.proxy.Proxy; 25 | import us.codecraft.webmagic.proxy.SimpleProxyProvider; 26 | 27 | /** 28 | * Created by Ezio on 2017/6/27. 29 | */ 30 | @Component 31 | public class NetEaseMusicPageProcessor implements PageProcessor { 32 | // 正则表达式\\. \\转义java中的\ \.转义正则中的. 33 | // 主域名 34 | public static final String BASE_URL = "http://music.163.com/"; 35 | // 匹配专辑URL 36 | public static final String ALBUM_URL = "http://music\\.163\\.com/playlist\\?id=\\d+"; 37 | // 匹配歌曲URL 38 | public static final String MUSIC_URL = "http://music\\.163\\.com/song\\?id=\\d+"; 39 | // 初始地址, 褐言喜欢的音乐id 148174530 40 | public static final String START_URL = "http://music.163.com/playlist?id=148174530"; 41 | public static final int ONE_PAGE = 200; 42 | private int timestamp = (int) (new Date().getTime()/1000); 43 | private final String authHeader = authHeader("ZF20179221632tODs6v", "038de086e3b34575a4af7be000f41f89", timestamp); 44 | private Site site = Site.me() 45 | .setDomain("http://music.163.com") 46 | .setSleepTime(1000) 47 | .setRetryTimes(30) 48 | .setCharset("utf-8") 49 | .setTimeOut(30000) 50 | .addHeader("Proxy-Authorization", authHeader) 51 | .setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31"); 52 | 53 | @Override 54 | public Site getSite() { 55 | return site; 56 | } 57 | 58 | @Autowired 59 | MusicService mMusicService; 60 | 61 | @Override 62 | public void process(Page page) { 63 | // 根据URL判断页面类型 64 | if (page.getUrl().regex(ALBUM_URL).match()) { 65 | System.out.println("歌曲总数----->" + page.getHtml().xpath("//span[@id='playlist-track-count']/text()").toString()); 66 | // 爬取歌曲URl加入队列 67 | page.addTargetRequests(page.getHtml().xpath("//div[@id=\"song-list-pre-cache\"]").links().regex(MUSIC_URL).all()); 68 | } else { 69 | String url = page.getUrl().toString(); 70 | Music music = new Music(); 71 | // 单独对AJAX请求获取评论数, 使用JSON解析返回结果 72 | String songId = url.substring(url.indexOf("id=") + 3); 73 | int commentCount = getComment(page, songId, 0); 74 | // music 保存到数据库 75 | music.setSongId(songId); 76 | music.setCommentCount(commentCount); 77 | music.setTitle(page.getHtml().xpath("//em[@class='f-ff2']/text()").toString()); 78 | music.setAuthor(page.getHtml().xpath("//p[@class='des s-fc4']/span/a/text()").toString()); 79 | music.setAlbum(page.getHtml().xpath("//p[@class='des s-fc4']/a/text()").toString()); 80 | music.setURL(url); 81 | //page.putField("music", music); 82 | mMusicService.addMusic(music); 83 | } 84 | } 85 | 86 | private int getComment(Page page, String songId, int offset) { 87 | int commentCount; 88 | String s = NetEaseMusicUtils.crawlAjaxUrl(songId, offset); 89 | 90 | if (s.contains("503 Service Temporarily Unavailable")) { 91 | commentCount = -1; 92 | } else { 93 | JSONObject jsonObject = JSON.parseObject(s); 94 | commentCount = (Integer) JSONPath.eval(jsonObject, "$.total"); 95 | for (; offset < commentCount; offset = offset + ONE_PAGE) { 96 | JSONObject obj = JSON.parseObject(NetEaseMusicUtils.crawlAjaxUrl(songId, offset)); 97 | List commentIds = (List) JSONPath.eval(obj, "$.comments.commentId"); 98 | List contents = (List) JSONPath.eval(obj, "$.comments.content"); 99 | List likedCounts = (List) JSONPath.eval(obj, "$.comments.likedCount"); 100 | List nicknames = (List) JSONPath.eval(obj, "$.comments.user.nickname"); 101 | List times = (List) JSONPath.eval(obj, "$.comments.time"); 102 | List comments = new ArrayList<>(); 103 | for (int i = 0; i < contents.size(); i++) { 104 | // 保存到数据库 105 | Comment comment = new Comment(); 106 | comment.setCommentId(commentIds.get(i)); 107 | comment.setSongId(songId); 108 | comment.setContent(NetEaseMusicUtils.filterEmoji(contents.get(i))); 109 | comment.setLikedCount(likedCounts.get(i)); 110 | comment.setNickname(nicknames.get(i)); 111 | comment.setTime(NetEaseMusicUtils.stampToDate(times.get(i))); 112 | comments.add(comment); 113 | mMusicService.addComment(comment); 114 | } 115 | 116 | try { 117 | Thread.sleep(1000); 118 | } catch (InterruptedException e) { 119 | e.printStackTrace(); 120 | } 121 | } 122 | } 123 | 124 | return commentCount; 125 | } 126 | 127 | 128 | public void start(NetEaseMusicPageProcessor processor, NetEaseMusicPipeline netEaseMusicPipeline) { 129 | 130 | long start = System.currentTimeMillis(); 131 | final String ip = "forward.xdaili.cn";//这里以正式服务器ip地址为准 132 | final int port = 80;//这里以正式服务器端口地址为准 133 | 134 | //以下订单号,secret参数 须自行改动 135 | 136 | HttpClientDownloader httpClientDownloader = new HttpClientDownloader(); 137 | httpClientDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(ip,port))); 138 | 139 | 140 | Spider.create(processor) 141 | .addUrl(START_URL) 142 | .setDownloader(httpClientDownloader) 143 | .thread(5) 144 | // .addPipeline(netEaseMusicPipeline) 145 | .run(); 146 | long end = System.currentTimeMillis(); 147 | System.out.println("爬虫结束,耗时--->" + NetEaseMusicUtils.parseMillisecone(end - start)); 148 | 149 | } 150 | 151 | 152 | /** 153 | * http://www.xdaili.cn/usercenter/order 154 | * 讯代理 买了10W 的 155 | * @param orderno 156 | * @param secret 157 | * @param timestamp 158 | * @return 159 | */ 160 | 161 | public static String authHeader(String orderno, String secret, int timestamp){ 162 | //拼装签名字符串 163 | String planText = String.format("orderno=%s,secret=%s,timestamp=%d", orderno, secret, timestamp); 164 | 165 | //计算签名 166 | String sign = org.apache.commons.codec.digest.DigestUtils.md5Hex(planText).toUpperCase(); 167 | 168 | //拼装请求头Proxy-Authorization的值 169 | String authHeader = String.format("sign=%s&orderno=%s×tamp=%d", sign, orderno, timestamp); 170 | return authHeader; 171 | } 172 | 173 | } 174 | -------------------------------------------------------------------------------- /mvnw: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ---------------------------------------------------------------------------- 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # ---------------------------------------------------------------------------- 20 | 21 | # ---------------------------------------------------------------------------- 22 | # Maven2 Start Up Batch script 23 | # 24 | # Required ENV vars: 25 | # ------------------ 26 | # JAVA_HOME - location of a JDK home dir 27 | # 28 | # Optional ENV vars 29 | # ----------------- 30 | # M2_HOME - location of maven2's installed home dir 31 | # MAVEN_OPTS - parameters passed to the Java VM when running Maven 32 | # e.g. to debug Maven itself, use 33 | # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 34 | # MAVEN_SKIP_RC - flag to disable loading of mavenrc files 35 | # ---------------------------------------------------------------------------- 36 | 37 | if [ -z "$MAVEN_SKIP_RC" ] ; then 38 | 39 | if [ -f /etc/mavenrc ] ; then 40 | . /etc/mavenrc 41 | fi 42 | 43 | if [ -f "$HOME/.mavenrc" ] ; then 44 | . "$HOME/.mavenrc" 45 | fi 46 | 47 | fi 48 | 49 | # OS specific support. $var _must_ be set to either true or false. 50 | cygwin=false; 51 | darwin=false; 52 | mingw=false 53 | case "`uname`" in 54 | CYGWIN*) cygwin=true ;; 55 | MINGW*) mingw=true;; 56 | Darwin*) darwin=true 57 | # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home 58 | # See https://developer.apple.com/library/mac/qa/qa1170/_index.html 59 | if [ -z "$JAVA_HOME" ]; then 60 | if [ -x "/usr/libexec/java_home" ]; then 61 | export JAVA_HOME="`/usr/libexec/java_home`" 62 | else 63 | export JAVA_HOME="/Library/Java/Home" 64 | fi 65 | fi 66 | ;; 67 | esac 68 | 69 | if [ -z "$JAVA_HOME" ] ; then 70 | if [ -r /etc/gentoo-release ] ; then 71 | JAVA_HOME=`java-config --jre-home` 72 | fi 73 | fi 74 | 75 | if [ -z "$M2_HOME" ] ; then 76 | ## resolve links - $0 may be a link to maven's home 77 | PRG="$0" 78 | 79 | # need this for relative symlinks 80 | while [ -h "$PRG" ] ; do 81 | ls=`ls -ld "$PRG"` 82 | link=`expr "$ls" : '.*-> \(.*\)$'` 83 | if expr "$link" : '/.*' > /dev/null; then 84 | PRG="$link" 85 | else 86 | PRG="`dirname "$PRG"`/$link" 87 | fi 88 | done 89 | 90 | saveddir=`pwd` 91 | 92 | M2_HOME=`dirname "$PRG"`/.. 93 | 94 | # make it fully qualified 95 | M2_HOME=`cd "$M2_HOME" && pwd` 96 | 97 | cd "$saveddir" 98 | # echo Using m2 at $M2_HOME 99 | fi 100 | 101 | # For Cygwin, ensure paths are in UNIX format before anything is touched 102 | if $cygwin ; then 103 | [ -n "$M2_HOME" ] && 104 | M2_HOME=`cygpath --unix "$M2_HOME"` 105 | [ -n "$JAVA_HOME" ] && 106 | JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 107 | [ -n "$CLASSPATH" ] && 108 | CLASSPATH=`cygpath --path --unix "$CLASSPATH"` 109 | fi 110 | 111 | # For Migwn, ensure paths are in UNIX format before anything is touched 112 | if $mingw ; then 113 | [ -n "$M2_HOME" ] && 114 | M2_HOME="`(cd "$M2_HOME"; pwd)`" 115 | [ -n "$JAVA_HOME" ] && 116 | JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" 117 | # TODO classpath? 118 | fi 119 | 120 | if [ -z "$JAVA_HOME" ]; then 121 | javaExecutable="`which javac`" 122 | if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then 123 | # readlink(1) is not available as standard on Solaris 10. 124 | readLink=`which readlink` 125 | if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then 126 | if $darwin ; then 127 | javaHome="`dirname \"$javaExecutable\"`" 128 | javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" 129 | else 130 | javaExecutable="`readlink -f \"$javaExecutable\"`" 131 | fi 132 | javaHome="`dirname \"$javaExecutable\"`" 133 | javaHome=`expr "$javaHome" : '\(.*\)/bin'` 134 | JAVA_HOME="$javaHome" 135 | export JAVA_HOME 136 | fi 137 | fi 138 | fi 139 | 140 | if [ -z "$JAVACMD" ] ; then 141 | if [ -n "$JAVA_HOME" ] ; then 142 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 143 | # IBM's JDK on AIX uses strange locations for the executables 144 | JAVACMD="$JAVA_HOME/jre/sh/java" 145 | else 146 | JAVACMD="$JAVA_HOME/bin/java" 147 | fi 148 | else 149 | JAVACMD="`which java`" 150 | fi 151 | fi 152 | 153 | if [ ! -x "$JAVACMD" ] ; then 154 | echo "Error: JAVA_HOME is not defined correctly." >&2 155 | echo " We cannot execute $JAVACMD" >&2 156 | exit 1 157 | fi 158 | 159 | if [ -z "$JAVA_HOME" ] ; then 160 | echo "Warning: JAVA_HOME environment variable is not set." 161 | fi 162 | 163 | CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher 164 | 165 | # traverses directory structure from process work directory to filesystem root 166 | # first directory with .mvn subdirectory is considered project base directory 167 | find_maven_basedir() { 168 | 169 | if [ -z "$1" ] 170 | then 171 | echo "Path not specified to find_maven_basedir" 172 | return 1 173 | fi 174 | 175 | basedir="$1" 176 | wdir="$1" 177 | while [ "$wdir" != '/' ] ; do 178 | if [ -d "$wdir"/.mvn ] ; then 179 | basedir=$wdir 180 | break 181 | fi 182 | # workaround for JBEAP-8937 (on Solaris 10/Sparc) 183 | if [ -d "${wdir}" ]; then 184 | wdir=`cd "$wdir/.."; pwd` 185 | fi 186 | # end of workaround 187 | done 188 | echo "${basedir}" 189 | } 190 | 191 | # concatenates all lines of a file 192 | concat_lines() { 193 | if [ -f "$1" ]; then 194 | echo "$(tr -s '\n' ' ' < "$1")" 195 | fi 196 | } 197 | 198 | BASE_DIR=`find_maven_basedir "$(pwd)"` 199 | if [ -z "$BASE_DIR" ]; then 200 | exit 1; 201 | fi 202 | 203 | export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} 204 | echo $MAVEN_PROJECTBASEDIR 205 | MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" 206 | 207 | # For Cygwin, switch paths to Windows format before running java 208 | if $cygwin; then 209 | [ -n "$M2_HOME" ] && 210 | M2_HOME=`cygpath --path --windows "$M2_HOME"` 211 | [ -n "$JAVA_HOME" ] && 212 | JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` 213 | [ -n "$CLASSPATH" ] && 214 | CLASSPATH=`cygpath --path --windows "$CLASSPATH"` 215 | [ -n "$MAVEN_PROJECTBASEDIR" ] && 216 | MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` 217 | fi 218 | 219 | WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 220 | 221 | exec "$JAVACMD" \ 222 | $MAVEN_OPTS \ 223 | -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ 224 | "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ 225 | ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" 226 | --------------------------------------------------------------------------------