res) {
65 | Document doc = Jsoup.parse(content);
66 | Elements links = doc.getElementsByClass("bd");
67 | for (Element link : links) {
68 | String str = link.toString();
69 | Pattern pattern = Pattern.compile("[.\\s\\S]+?div");
70 | Matcher m = pattern.matcher(str);
71 | while (m.find()) {
72 | String s = m.group();
73 | s = s.replaceAll("<", "");
74 | s = s.replaceAll(">", "");
75 | s = s.replaceAll("/", "");
76 | s = s.replaceAll("p", "");
77 | s = s.replaceAll("div", "");
78 | s = s.replaceAll("\n", "");
79 | res.add(s);
80 | }
81 | }
82 | }
83 | }
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/src/main/java/com/tianya/util/FileUtils.java:
--------------------------------------------------------------------------------
1 | package com.tianya.util;
2 |
3 | import lombok.extern.slf4j.Slf4j;
4 |
5 | import java.io.BufferedOutputStream;
6 | import java.io.FileOutputStream;
7 | import java.util.List;
8 |
9 | @Slf4j
10 | public class FileUtils {
11 |
12 | private static final String PREFIX = "### ==**";
13 | private static final String SUFFIX = "楼: **==" + "\n";
14 |
15 | /** 写入文件中,转化为PDF文档 */
16 | public static String writeFile(List res, String uuid) {
17 | String path = "/Users/apple/" + uuid + ".md";
18 | try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(path))) {
19 | int cnt = 1;
20 | log.info("开始写入磁盘....uuid = {}", uuid);
21 | for (String s : res) {
22 | // markdown格式
23 | String ss = PREFIX + cnt + SUFFIX + "\n";
24 | bos.write(ss.getBytes());
25 | // 真正数据
26 | bos.write(s.getBytes());
27 | cnt++;
28 | }
29 | log.info("写入磁盘成功...uuid = {}", uuid);
30 | } catch (Exception e) {
31 | log.error("写入文件出错:" + e.getMessage());
32 | }
33 | return path;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/tianya/util/HttpMethod.java:
--------------------------------------------------------------------------------
1 | package com.tianya.util;
2 |
3 | import com.arronlong.httpclientutil.HttpClientUtil;
4 | import com.arronlong.httpclientutil.common.HttpConfig;
5 | import com.arronlong.httpclientutil.common.HttpHeader;
6 | import lombok.extern.slf4j.Slf4j;
7 | import org.apache.http.Header;
8 |
9 | @Slf4j
10 | public class HttpMethod {
11 |
12 | public static String get(String url) {
13 | HttpConfig config = getConfig();
14 | String content = null;
15 | try {
16 | content = HttpClientUtil.get(config.url(url));
17 | } catch (Exception e) {
18 | log.error("http get失败 e = {}", e);
19 | }
20 | return content;
21 | }
22 |
23 | private static HttpConfig getConfig() {
24 | String cookies = "Hm_lvt_bc5755e0609123f78d0e816bf7dee255=1590380369; __auc=eeb9a7fd1724a0e962f9fb718b4; __cid=CN; __guid=469014980; __guid2=469014980; deid=57432c15994418404fc637b4418eb34c; sso=r=1609006141&sid=&wsid=9ECDE10F1335A0C76BE47F3815679F80; user=w=wenber888&id=137776589&f=1; temp=k=751567138&s=&t=1590380376&b=0e281318d4992eae9d4b0486a1687b28&ct=1590380376&et=1592972376; right=web4=n&portal=n; temp4=rm=9d0be7aa8e62f78dfa10caaa528ef2da; u_tip=; vip=751567138%3D0; JSESSIONID=abcYhud_K6Iwv_Iis1ljx; __asc=079a9f731724bd526f10e329f0d; __u_a=v2.3.0; time=ct=1590410572.735; __ptime=1590410572786; Hm_lpvt_bc5755e0609123f78d0e816bf7dee255=1590410573; ty_msg=1590410602941_137776589_0_0_0_0_0_0_0_0_0_0_0; bbs_msg=1590410603184_137776589_0_0_0_0";
25 | String referer = "https://www.baidu.com";
26 | Header[] headers= HttpHeader.custom().cookie(cookies).referer(referer).build();
27 | HttpConfig config = HttpConfig.custom().headers(headers);
28 | return config;
29 | }
30 | }
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/src/main/java/com/tianya/util/MDUtils.java:
--------------------------------------------------------------------------------
1 | package com.tianya.util;
2 |
3 | import org.springframework.util.CollectionUtils;
4 |
5 | import java.util.Collections;
6 | import java.util.List;
7 |
8 | public class MDUtils {
9 |
10 | /**
11 | * 转化为markdown格式
12 | * @return
13 | */
14 | public static List transferStrToMD(List comment) {
15 | if (CollectionUtils.isEmpty(comment)) {
16 | return Collections.EMPTY_LIST;
17 | }
18 | List res = comment;
19 | for (int i = 0; i < res.size(); i++) {
20 | res.set(i, " " + " " + res.get(i) + "\n");
21 | }
22 | return res;
23 | }
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | server.port=8080
2 |
3 | logging.config=classpath:logback.xml
--------------------------------------------------------------------------------
/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 | %date{yyyy-MM-dd HH:mm:ss.SSS} [%thread] [%X{sessionId}] %-5level - [%c java:%L] [%m]%n
12 |
13 |
14 |
15 |
16 |
17 | ${APP_NAME}.log
18 |
19 |
20 |
21 |
22 |
23 |
24 | UTF-8
25 |
26 | %date{yyyy-MM-dd HH:mm:ss.SSS} [%thread] [%X{sessionId}] %-5level - [%c java:%L] [%m]%n
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/src/test/java/com/tianya/house/HouseApplicationTests.java:
--------------------------------------------------------------------------------
1 | package com.tianya.house;
2 |
3 | import org.junit.Test;
4 | import org.junit.runner.RunWith;
5 | import org.springframework.boot.test.context.SpringBootTest;
6 | import org.springframework.test.context.junit4.SpringRunner;
7 |
8 | @RunWith(SpringRunner.class)
9 | @SpringBootTest
10 | public class HouseApplicationTests {
11 |
12 | @Test
13 | public void contextLoads() {
14 | }
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/北望帝都-北京四环房价.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenbochang888/house/cc4c04d042c451f0dd91ea74ba5f3964fb99edf5/北望帝都-北京四环房价.pdf
--------------------------------------------------------------------------------