\\w+)\\s+)*");
304 | for (Element f2 : f2s) {
305 | String text = f2.text().trim();
306 | Matcher m = F2Pattern.matcher(text);
307 | if (m.find()) {
308 | if (m.group("type").equals("編輯")) {
309 | if (post.getAuthor() == null) {
310 | post.setAuthor(m.group("id"));
311 | }
312 | } else if (m.group("type").equals("轉錄者")) {
313 | if (post.getAuthor() == null) {
314 | post.setAuthor(m.group("id"));
315 | }
316 | }
317 | f2.remove();
318 | }
319 | }
320 |
321 | String content = mainContent.wrap("").text();
322 | content = content.replaceAll(PostFooterPattern.pattern(), "")
323 | .replaceAll("※\\s+.*轉錄至看板.*(\\d+:\\d+)*", "")
324 | .trim();
325 |
326 | post.setUpVoteCount(upVoteCount);
327 | post.setDownVoteCount(downVoteCount);
328 | post.setNeutralCount(neutralCount);
329 | post.setContent(content);
330 |
331 | return post;
332 |
333 | }
334 |
335 | /**
336 | * br2nl
337 | * @param html
338 | * @return
339 | */
340 | public static String br2nl(String html) {
341 | if(html == null) {
342 | return html;
343 | }
344 | Document document = Jsoup.parse(html);
345 | document.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing
346 | document.select("br").append("\\n");
347 | document.select("p").prepend("\\n\\n");
348 | String s = document.html().replaceAll("\\\\n", "\n");
349 | return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
350 | }
351 |
352 | /**
353 | * getTimeFromPttURL
354 | * @param url
355 | * @return
356 | */
357 | public static Date getTimeFromPttURL(String url) throws Exception {
358 | Matcher m = URLTimePattern.matcher(url);
359 | if (m.find()) {
360 | long timestamp = Long.parseLong(m.group("timestamp"));
361 | return timestamp2Date(timestamp);
362 | }
363 | return null;
364 | }
365 |
366 | /**
367 | * Convert Unix timestamp to Date
368 | * @param unixSeconds
369 | * @return
370 | */
371 | public static Date timestamp2Date(long unixSeconds) {
372 | return new Date(unixSeconds * 1000L);
373 | }
374 |
375 | }
376 |
--------------------------------------------------------------------------------
/src/main/java/crawler/client/PTTClient.java:
--------------------------------------------------------------------------------
1 | package crawler.client;
2 | import java.io.BufferedReader;
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 | import java.io.InputStreamReader;
6 | import java.io.OutputStream;
7 | import java.io.PushbackReader;
8 | import java.net.SocketException;
9 | import java.security.KeyManagementException;
10 | import java.security.NoSuchAlgorithmException;
11 | import java.security.SecureRandom;
12 | import java.security.cert.CertificateException;
13 | import java.security.cert.X509Certificate;
14 | import java.util.ArrayList;
15 | import java.util.Arrays;
16 | import java.util.List;
17 | import java.util.Properties;
18 | import java.util.regex.Matcher;
19 | import java.util.regex.Pattern;
20 |
21 | import javax.net.ssl.HostnameVerifier;
22 | import javax.net.ssl.HttpsURLConnection;
23 | import javax.net.ssl.SSLContext;
24 | import javax.net.ssl.SSLSession;
25 | import javax.net.ssl.X509TrustManager;
26 |
27 | import org.apache.commons.net.telnet.TelnetClient;
28 | import org.apache.log4j.Level;
29 | import org.apache.log4j.Logger;
30 | import org.json.JSONObject;
31 | import org.jsoup.Jsoup;
32 | import org.jsoup.nodes.Document;
33 | import org.jsoup.nodes.Element;
34 |
35 | import com.jcraft.jsch.Channel;
36 | import com.jcraft.jsch.ChannelShell;
37 | import com.jcraft.jsch.JSch;
38 | import com.jcraft.jsch.JSchException;
39 | import com.jcraft.jsch.Session;
40 |
41 | import crawler.base.Entry;
42 | import crawler.base.Post;
43 | import crawler.base.PostAnalysiser;
44 |
45 | public class PTTClient {
46 |
47 | private static final Logger log = Logger.getLogger(PTTClient.class);
48 | private static final boolean isPrintScreen = false;
49 | private static final boolean isPrintSource = false;
50 | static {
51 | log.setLevel(Level.ALL);
52 | }
53 |
54 | public static enum Protocol {
55 | Telnet, SSH
56 | }
57 |
58 | public static enum Screen {
59 | MainMenu, // 主選單 (【主功能表】.*批踢踢實業坊.*呼叫器)
60 | Board, // 看板 (文章選讀.*回應.*推文.*轉錄.*相關主題.*找標題/作者.*進板畫面)
61 | Post, // 貼文 (瀏覽.*第.*頁.*目前顯示.*第.*行.*離開)
62 | Unknown
63 | }
64 |
65 | private static final String UserAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";
66 | private static final Pattern VT100ControlPattern = Pattern.compile("\u001B\\[(?[0-9;]*)(?[ABCDHJKmsu])");
67 | private static final int DEFAULT_TIMEOUT = 10 * 1000;
68 |
69 | private static final String MenuHeader = "【主功能表】[\\s\\S]*呼叫器";
70 | private static final String BoardFooter = "文章選讀[\\s\\S]*相關主題[\\s\\S]*找標題/作者[\\s\\S]*進板畫面";
71 | private static final String PostFooter = "瀏覽[\\s\\S]*第[\\s\\S]*頁[\\s\\S]*目前顯示[\\s\\S]*第[\\s\\S]*行[\\s\\S]*離開";
72 |
73 | private static final Pattern ENTRYPATTER_PATTERN = Pattern.compile("[●>][ ]*(?[0-9]+|★[ ]+)[ ](?.)(?[0-9 X]+|爆)(?../..)[ ](?.*?)([\\s□轉]|R:)+(?.*)");
74 | private static final Pattern PROGRESS_PATTERN = Pattern.compile("(?\\d+)%[^\\d]*(?\\d+)~(?\\d+)");
75 |
76 | private static final Pattern AID_PATTERN = Pattern.compile("文章代碼\\(AID\\):\\s*#(?........)");
77 | private static final Pattern URL_PATTERN = Pattern.compile("文章網址:\\s*(?.*?)[\\s\\│]+");
78 |
79 | private static final Pattern URL_VERIFY = Pattern.compile("^(https?)://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]");
80 |
81 | public static final int RETV_TIMEOUT = -1;
82 | public static final int RETV_EOF = -2;
83 | public static final int RETV_IOEXCEPTION = -9;
84 |
85 | // Screen
86 | private int posX = -1;
87 | private int posY = -1;
88 | private char[][] screen = null;
89 |
90 | private Protocol protocol = null;
91 | private TelnetClient tc = null;
92 |
93 | private Channel channel = null;
94 | private Session session = null;
95 |
96 | private InputStream is = null;
97 | private OutputStream os = null;
98 | private Thread renderScreenThread = null;
99 |
100 | // Matchers
101 | @SuppressWarnings("unused")
102 | private String beforeStr = null, matchStr = null, afterStr = null;
103 |
104 | public PTTClient() {
105 | initialize();
106 | }
107 |
108 | /**
109 | * 初始化
110 | */
111 | public void initialize() {
112 | screen = new char[72][80];
113 | posY = posX = -1;
114 | clearScreen();
115 | }
116 |
117 | /**
118 | * Connect PTT by using protocol
119 | * @param protocol
120 | * @throws SocketException
121 | * @throws IOException
122 | * @throws JSchException
123 | */
124 | public void connect(Protocol protocol) throws SocketException, IOException, JSchException {
125 |
126 | this.protocol = protocol;
127 |
128 | switch (this.protocol) {
129 | case Telnet:
130 | log.info("Connect ptt.cc using telnet");
131 |
132 | tc = new TelnetClient();
133 | tc.connect("ptt.cc");
134 |
135 | is = tc.getInputStream();
136 | os = tc.getOutputStream();
137 |
138 | break;
139 | case SSH:
140 | default:
141 | log.info("Connect ptt.cc using SSH (bbs@ptt.cc)");
142 |
143 | Properties configuration = new Properties();
144 | configuration.put("kex", "diffie-hellman-group1-sha1,"
145 | + "diffie-hellman-group14-sha1,"
146 | + "diffie-hellman-group-exchange-sha1,"
147 | + "diffie-hellman-group-exchange-sha256");
148 | configuration.put("StrictHostKeyChecking", "no");
149 |
150 | session = new JSch().getSession("bbsu", "ptt.cc");
151 | session.setConfig(configuration);
152 | session.connect(10 * 1000); // Timeout 10 seconds
153 | channel = (ChannelShell) session.openChannel("shell");
154 | channel.connect();
155 |
156 | is = channel.getInputStream();
157 | os = channel.getOutputStream();
158 |
159 | break;
160 | }
161 |
162 | renderScreenThread = new Thread(new Runnable() {
163 | @Override
164 | public void run() {
165 | try {
166 | renderScreen();
167 | } catch (IOException e) {
168 | e.printStackTrace();
169 | }
170 | }
171 | });
172 | renderScreenThread.setName("Render the screen from InputStream");
173 | renderScreenThread.setDaemon(true);
174 | renderScreenThread.start();
175 |
176 | }
177 |
178 | /**
179 | * 關閉連線
180 | * @throws IOException
181 | */
182 | public void close() throws IOException {
183 |
184 | if (this.protocol == null) {
185 | return;
186 | }
187 |
188 | switch (this.protocol) {
189 | case Telnet:
190 | default:
191 | if (tc == null) { return; }
192 | log.info("Close the telnet connection.");
193 | tc.disconnect();
194 | break;
195 | case SSH:
196 | if (session == null) { return; }
197 | log.info("Close the SSH connection.");
198 | channel.disconnect();
199 | session.disconnect();
200 | break;
201 | }
202 |
203 | }
204 |
205 | /**
206 | * 登入PTT
207 | * @param username
208 | * @param password
209 | * @param isDup
210 | * @throws Exception
211 | */
212 | public void login(String username, String password, boolean isDup) throws Exception {
213 |
214 | if (this.protocol == PTTClient.Protocol.Telnet) {
215 | send(username + ",\r\n" + password + "\r\n");
216 | } else if (expect("請輸入代號,或以 guest 參觀,或以 new 註冊:") == 0) {
217 | send(username + "\r\n" + password + "\r\n");
218 | } else {
219 | log.error("Login error.");
220 | }
221 |
222 | int midx = expect("密碼不對", "錯誤", "您想刪除其他重複登入的連線嗎?", "請按任意鍵繼續");
223 | if (midx < 0) {
224 | throw new Exception("連線逾時");
225 | } else if (midx < 2) {
226 | throw new Exception("密碼不對喔!請檢查帳號及密碼有無輸入錯誤。");
227 | } else if (midx == 2) {
228 | send(isDup ? "n\r\n" : "y\r\n");
229 | if (expect("請按任意鍵繼續") == 0) {
230 | send("\r\n");
231 | }
232 | } else if (midx == 3) {
233 | send("\r\n");
234 | }
235 |
236 | midx = expect("呼叫器", "您要刪除以上錯誤嘗試的記錄嗎?");
237 | if (midx == 1) {
238 | send("Y\r\n");
239 | midx = expect("呼叫器");
240 | }
241 |
242 | if (midx == 0) {
243 | log.info("登入成功");
244 | } else {
245 | throw new Exception("登入失敗");
246 | }
247 |
248 | }
249 |
250 | /**
251 | * 登出
252 | * @throws IOException
253 | */
254 | public void logout() throws IOException {
255 | send("qqqqqqeee\nY\n");
256 | }
257 |
258 | /**
259 | * 回上一層
260 | * @throws IOException
261 | */
262 | public void quit() throws IOException {
263 | send("q");
264 | }
265 |
266 | /**
267 | * 回到主選單
268 | * @throws IOException
269 | */
270 | public void toMainMenu() throws IOException {
271 | send("qqqqqq");
272 | if (expect("【主功能表】[\\s\\S]*呼叫器") != 0) {
273 | log.warn("無法回到【主功能表】");
274 | } else {
275 | log.info("已回到【主功能表】");
276 | }
277 | }
278 |
279 | /**
280 | * Go to the board by board name (Current entry maybe 置底文)
281 | * @param boardName
282 | * @throws Exception
283 | */
284 | public void toBoard(String boardName) throws Exception {
285 | toMainMenu();
286 | send("s" + boardName + "\r\n$$");
287 | int m1 = expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter);
288 | if (m1 != 0) {
289 | throw new Exception("Fail to go Board.");
290 | }
291 | }
292 |
293 | /**
294 | * Get the popularity of the board
295 | * @param boardName
296 | * @return
297 | * @throws Exception
298 | */
299 | public int getBoardPopularity(String boardName) throws Exception {
300 | toBoard(boardName);
301 | this.refresh();
302 | if (expect("編號.*日.*期.*作.*者.*文.*章.*標.*題.*人氣:\\d+") == 0) {
303 | Matcher m = Pattern.compile("\\d+").matcher(matchStr);
304 | if (m.find()) {
305 | return Integer.parseInt(m.group());
306 | }
307 | }
308 | return -1;
309 | }
310 |
311 | /**
312 | * Get the current screen
313 | * @return
314 | * @throws IOException
315 | */
316 | public Screen getCurrentScreen(String boardName) throws IOException {
317 | refresh(100);
318 | int matchIndex = expect(
319 | MenuHeader,
320 | "看板《" + boardName + "》[\\s\\S]*" + BoardFooter,
321 | PostFooter
322 | );
323 | if (matchIndex == 0) {
324 | return Screen.MainMenu;
325 | } else if (matchIndex == 1) {
326 | return Screen.Board;
327 | } else if (matchIndex == 2) {
328 | return Screen.Post;
329 | } else {
330 | return Screen.Unknown;
331 | }
332 | }
333 |
334 | /**
335 | * setPlainTextMode
336 | * @throws IOException
337 | */
338 | public void setPlainTextMode(String boardName) throws IOException {
339 | this.refresh();
340 | send("l\\3q");
341 | expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter);
342 | }
343 |
344 | /**
345 | * Move to up entry
346 | * @return The entry information after moving
347 | * @throws Exception
348 | */
349 | public Entry moveUpEntry(String boardName) throws Exception {
350 | Entry newEntry, oldEntry = getBasicEntryInfo(boardName);
351 | if (oldEntry.number.equals("1")) {
352 | throw new Exception("Aready at the toppest entry.");
353 | }
354 | send("k");
355 | int times = 0;
356 | do {
357 | Thread.sleep(100);
358 | newEntry = getBasicEntryInfo(boardName);
359 | if (++times > 100) {
360 | throw new Exception("Can not move to the up entry.");
361 | }
362 | } while (newEntry.number == oldEntry.number);
363 | return getFullEntryInfo(boardName);
364 | }
365 |
366 | /**
367 | * Move to down entry
368 | * @return The entry information after moving
369 | * @throws Exception
370 | */
371 | public Entry moveDownEntry(String boardName) throws Exception {
372 | Entry newEntry, oldEntry = getBasicEntryInfo(boardName);
373 | if (oldEntry.number.equals("★")) {
374 | throw new Exception("Aready at the downest entry.");
375 | }
376 | send("n");
377 | int times = 0;
378 | do {
379 | Thread.sleep(100);
380 | newEntry = getBasicEntryInfo(boardName);
381 | if (++times > 100) {
382 | throw new Exception("Can not move to the down entry.");
383 | }
384 | } while (newEntry.number == oldEntry.number);
385 | return getFullEntryInfo(boardName);
386 | }
387 |
388 | /**
389 | * Go to the latest post entry
390 | * @throws Exception
391 | */
392 | public Entry toLatestPost(String boardName) throws Exception {
393 | send("$$"); // Skip the welcome of the board & to the latest article
394 | refresh(300);
395 | if (expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter) != 0) {
396 | throw new Exception("Current screen is not \"Board\"");
397 | }
398 |
399 | Entry oldEntry = getFullEntryInfo(boardName), newEntry = null;
400 | if (!oldEntry.number.equals("★") && !oldEntry.author.equals("-")) {
401 | return oldEntry;
402 | }
403 | for (int times=0; ; times++) {
404 |
405 | try {
406 |
407 | send("k");
408 |
409 | int times2 = 0;
410 | do {
411 | Thread.sleep(100);
412 | newEntry = getFullEntryInfo(boardName);
413 | if (++times2 > 100) {
414 | return newEntry;
415 | }
416 | } while (oldEntry.id == newEntry.id);
417 |
418 | if (!newEntry.number.equals("★") && !newEntry.author.equals("-")) {
419 | break;
420 | }
421 | oldEntry = newEntry;
422 |
423 | } catch (Exception e) {
424 |
425 | }
426 |
427 | if (times >= 100) {
428 | throw new Exception("Can not go to latest post.");
429 | }
430 |
431 | }
432 |
433 | return newEntry;
434 | }
435 |
436 | public Entry getBasicEntryInfo(String boardName) throws Exception {
437 | if (expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter) == 0) {
438 | Matcher matcher = ENTRYPATTER_PATTERN.matcher(matchStr);
439 | if (matcher.find()) {
440 | String id = null;
441 | String url = null;
442 | String number = matcher.group("id").trim();
443 | String status = matcher.group("status").trim();
444 | String karma = matcher.group("karma").trim();
445 | String date = matcher.group("date").trim();
446 | String author = matcher.group("author").trim();
447 | String title = matcher.group("title").trim();
448 | return new Entry(id, number, status, karma, date, author, title, url);
449 | } else {
450 | throw new Exception("Can not match entry. " + matchStr);
451 | }
452 | } else {
453 | throw new Exception("Screen is not \"Board\"");
454 | }
455 | }
456 |
457 | /**
458 | * getFullEntryInfo
459 | * @return
460 | * @throws Exception
461 | */
462 | public Entry getFullEntryInfo(String boardName) throws Exception {
463 |
464 | boolean isSuccess = false;
465 | int times = 0;
466 | Matcher matcher = null;
467 |
468 | do {
469 |
470 | expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter);
471 |
472 | matcher = ENTRYPATTER_PATTERN.matcher(matchStr);
473 | if (matcher.find()) {
474 | isSuccess = true;
475 | } else {
476 | log.warn("Faild to match entry. " + matchStr.replaceAll("\\s+", " "));
477 | times++;
478 | }
479 |
480 | if (times >= 5) {
481 | log.error("Faild to match entry. " + matchStr.replaceAll("\\s+", " "));
482 | throw new Exception("Faild to match entry.");
483 | }
484 |
485 | } while (!isSuccess);
486 |
487 | String id = null;
488 | String url = null;
489 | String number = matcher.group("id").trim();
490 | String status = matcher.group("status").trim();
491 | String karma = matcher.group("karma").trim();
492 | String date = matcher.group("date").trim();
493 | String author = matcher.group("author").trim();
494 | String title = matcher.group("title").trim();
495 |
496 | if (!author.equals("-")) {
497 | boolean success = false;
498 | int count = 0;
499 | do {
500 | String[] temp = this.getAID().split("\\t");
501 | if (temp.length > 0 && !temp[0].equals("")) {
502 | id = temp[0];
503 | }
504 | if (temp.length > 1 && !temp[1].equals("")) {
505 | url = temp[1];
506 | }
507 | if (url != null && URL_VERIFY.matcher(url).find()) {
508 | success = true;
509 | }
510 | count++;
511 | } while (!success && count < 5);
512 | }
513 |
514 | Entry entry = new Entry(id, number, status, karma, date, author, title, url);
515 | return entry;
516 | }
517 |
518 | /**
519 | * Get post ID (e.g. 1L4GI8SM)
520 | * @return
521 | * @throws IOException
522 | */
523 | public String getAID() throws IOException {
524 |
525 | String aid = "";
526 | String url = "";
527 |
528 | send("Q");
529 | if (expect("請按任意鍵繼續") != 0) {
530 | return "";
531 | }
532 |
533 | // 文章代碼(AID): #1L4GI8SM
534 | if (expect(AID_PATTERN) == 0) {
535 | Matcher m = AID_PATTERN.matcher(matchStr);
536 | if (m.find()) {
537 | aid = m.group("aid");
538 | }
539 | }
540 | if (expect(URL_PATTERN) == 0) {
541 | Matcher m = URL_PATTERN.matcher(matchStr);
542 | if (m.find()) {
543 | url = m.group("url");
544 | }
545 | }
546 |
547 | send("\n");
548 |
549 | return aid + "\t" + url;
550 | }
551 |
552 | /**
553 | * toPostByNum
554 | * @param postNum
555 | * @throws Exception
556 | */
557 | public Entry toEntryByNum(String boardName, int postNum) throws Exception {
558 | send(Integer.toString(postNum) + "\r\nhq");
559 | this.refresh(200);
560 | if (expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter) != 0) {
561 | throw new Exception("Current screen is not \"Board\"");
562 | }
563 | return getFullEntryInfo(boardName);
564 | }
565 |
566 | /**
567 | * Go to post by ID
568 | * @param postID
569 | * @return
570 | * @throws Exception
571 | */
572 | public Entry toEntryByID(String boardName, String postID) throws Exception {
573 | log.info("Go to AID: #"+ postID);
574 | send("#" + postID + "\r\nhq");
575 | this.refresh(200);
576 | if (expect("看板《" + boardName + "》[\\s\\S]*" + BoardFooter) != 0) {
577 | throw new Exception("Current screen is not \"Board\"");
578 | }
579 | return getFullEntryInfo(boardName);
580 | }
581 |
582 | /**
583 | * 下載目前游標位置的貼文
584 | * @return
585 | * @throws Exception
586 | */
587 | public String downloadCurrentPost() throws Exception {
588 |
589 | StringBuilder content = new StringBuilder();
590 | int percent = -1;
591 | int fromLine = -1, toLine = -1;
592 | int fromLine_bk = -1, toLine_bk = 0;
593 |
594 | try {
595 |
596 | send("l\f");
597 |
598 | while (true) {
599 |
600 | int midx = expect(PostFooter, "此頁內容會依閱讀者不同", "此文章無內容[\\s\\S]*按任意鍵繼續");
601 | if (midx < 0) {
602 | log.warn("[Skip] Unexpected PostFooter");
603 | break;
604 | } else if (midx == 1) {
605 | log.info("[Skip] 此頁內容會依閱讀者不同");
606 | break;
607 | } else if (midx == 2) {
608 | log.info("Screen: 此文章無內容 [按任意鍵繼續]");
609 | break;
610 | }
611 |
612 | String[] lines = beforeStr.split("\\n");
613 | String footer = matchStr;
614 |
615 | Matcher matcher = PROGRESS_PATTERN.matcher(footer);
616 | if (!matcher.find()) {
617 | throw new Exception("Faild to match footer \"" + footer + "\"");
618 | }
619 | fromLine_bk = fromLine;
620 |
621 | percent = Integer.parseInt(matcher.group("percent"));
622 | fromLine = Integer.parseInt(matcher.group("from"));
623 | toLine = Integer.parseInt(matcher.group("to"));
624 | if (percent != 100 && fromLine == fromLine_bk) {
625 | Thread.sleep(50);
626 | continue;
627 | }
628 |
629 | //log.trace(String.format("%4d ~ %4d\t%3d%%", fromLine, toLine, percent));
630 |
631 | // Append content
632 | int overlapLines = 0;
633 | if (fromLine <= toLine_bk) {
634 | overlapLines = toLine_bk - fromLine + 1;
635 | }
636 | for (int i=overlapLines; i<(lines.length-1); i++) {
637 | content.append(lines[i].trim()).append("\n");
638 | }
639 | toLine_bk = toLine;
640 |
641 | // Next page or 100% break loop
642 | if (percent == 100) {
643 | break;
644 | } else {
645 | send((char) 0x06 + "\f");
646 | }
647 |
648 | }
649 |
650 | send("q\f");
651 |
652 | } catch (IOException e) {
653 | e.printStackTrace();
654 | throw new Exception("下載貼文發生錯誤");
655 | }
656 |
657 | return content.toString();
658 | }
659 |
660 | public String getScreen() {
661 | StringBuilder sb = new StringBuilder(24 * 80);
662 | for (int i=0; i<24; i++) {
663 | for (int j=0; j<80; j++) {
664 | if (screen[i][j] != 0x00) {
665 | sb.append(screen[i][j]);
666 | }
667 | }
668 | sb.append("\n");
669 | }
670 | return sb.toString();
671 | }
672 |
673 | public int expect(Object... patterns) {
674 | return expect(DEFAULT_TIMEOUT, patterns);
675 | }
676 |
677 | public int expect(int timeout, Object... patterns) {
678 | ArrayList list = new ArrayList();
679 | for (Object obj : patterns) {
680 | if (obj instanceof String)
681 | list.add(Pattern.compile((String) obj));
682 | else if (obj instanceof Pattern)
683 | list.add((Pattern) obj);
684 | else {
685 | list.add(Pattern.compile(Pattern.quote(obj.toString())));
686 | }
687 | }
688 | return expect(timeout, list);
689 | }
690 |
691 | public int expect(int timeout, List list) {
692 |
693 | long endTime = System.currentTimeMillis() + (long) timeout;
694 |
695 | while (true) {
696 |
697 | String currentScreen = getScreen();
698 |
699 | for (int i = 0; i < list.size(); i++) {
700 | Matcher m = list.get(i).matcher(currentScreen);
701 | if (m.find()) {
702 | int matchStart = m.start(), matchEnd = m.end();
703 | beforeStr = currentScreen.substring(0, matchStart);
704 | matchStr = m.group();
705 | afterStr = currentScreen.substring(matchEnd);
706 | return i;
707 | }
708 | }
709 |
710 | long waitTime = endTime - System.currentTimeMillis();
711 | if (waitTime <= 0) {
712 | return RETV_TIMEOUT;
713 | }
714 |
715 | try {
716 | Thread.sleep(100);
717 | } catch (InterruptedException e) {
718 | e.printStackTrace();
719 | }
720 |
721 | }
722 |
723 | }
724 |
725 | public void refresh() throws IOException {
726 | refresh(300);
727 | }
728 |
729 | public void refresh(int waitTime) throws IOException {
730 | send("\f");
731 | try {
732 | Thread.sleep(waitTime);
733 | } catch (InterruptedException e) {
734 | e.printStackTrace();
735 | }
736 | }
737 |
738 | /**
739 | *
740 | * @throws KeyManagementException
741 | * @throws NoSuchAlgorithmException
742 | */
743 | public static void enableSSLSocket() throws KeyManagementException, NoSuchAlgorithmException {
744 | HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() {
745 | @Override
746 | public boolean verify(String hostname, SSLSession session) {
747 | return true;
748 | }
749 | });
750 |
751 | SSLContext context = SSLContext.getInstance("TLS");
752 | context.init(null, new X509TrustManager[]{new X509TrustManager() {
753 | public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException {
754 | }
755 |
756 | public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException {
757 | }
758 |
759 | public X509Certificate[] getAcceptedIssuers() {
760 | return new X509Certificate[0];
761 | }
762 | }}, new SecureRandom());
763 | HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());
764 | }
765 |
766 | /**
767 | * Download post by URL (PTT Web Version)
768 | * @param url
769 | * @return
770 | * @throws IOException
771 | */
772 | public static Post downloadPostByURL(String url) throws Exception {
773 | return downloadPostByURL(url, DEFAULT_TIMEOUT);
774 | }
775 |
776 | /**
777 | * Download post by URL (PTT Web Version)
778 | * @param url
779 | * @param timeout
780 | * @return
781 | * @throws Exception
782 | */
783 | public static Post downloadPostByURL(String url, int timeout) throws Exception {
784 |
785 | Document doc = Jsoup.connect(url)
786 | .userAgent(UserAgent)
787 | .timeout(timeout)
788 | .cookie("over18", "1")
789 | .get();
790 |
791 | return PostAnalysiser.parsePost(doc, url);
792 |
793 | }
794 |
795 | /**
796 | * Download post by URL with real time update (PTT Web Version)
797 | * @param url
798 | * @param timeout
799 | * @return
800 | * @throws Exception
801 | */
802 | public static Post downloadPostByURLwithRU(String url, int timeout) throws Exception {
803 |
804 | Document doc = Jsoup.connect(url)
805 | .userAgent(UserAgent)
806 | .timeout(timeout)
807 | .cookie("over18", "1")
808 | .get();
809 |
810 | try {
811 |
812 | Element pe = doc.getElementById("article-polling");
813 | String pollUrl = "http://www.ptt.cc" + pe.attr("data-pollurl");
814 | String longpollurl = "http://www.ptt.cc" + pe.attr("data-longpollurl");
815 |
816 | String longpollJSON = Jsoup.connect(longpollurl)
817 | .userAgent(UserAgent)
818 | .timeout(5000)
819 | .ignoreContentType(true)
820 | .cookie("over18", "1")
821 | .execute()
822 | .body();
823 |
824 | JSONObject obj = new JSONObject(longpollJSON);
825 | String size = obj.get("size").toString();
826 | String sizeSig = obj.get("sig").toString();
827 |
828 | String pollJSON = Jsoup.connect(pollUrl + "&size=" + size + "&size-sig=" + sizeSig)
829 | .userAgent(UserAgent)
830 | .timeout(3000)
831 | .ignoreContentType(true)
832 | .cookie("over18", "1")
833 | .execute()
834 | .body();
835 |
836 | obj = new JSONObject(pollJSON);
837 | String contentHtml = obj.get("contentHtml").toString();
838 |
839 | doc.getElementById("main-content").append(contentHtml);
840 |
841 | } catch (Exception e) {
842 | }
843 |
844 | return PostAnalysiser.parsePost(doc, url);
845 |
846 | }
847 |
848 | private void renderScreen() throws IOException {
849 |
850 | BufferedReader br = new BufferedReader(
851 | new InputStreamReader(is, "UTF-8"));
852 | PushbackReader pr = new PushbackReader(br, 128);
853 |
854 | int nc = 0;
855 | char[] cb = new char[4096];
856 |
857 | while ((nc = pr.read(cb)) != -1) {
858 |
859 | if (isPrintSource) {
860 | System.out.print(new String(cb, 0, nc));
861 | }
862 |
863 | for (int pos = 0; pos < nc; pos++) {
864 |
865 | char c = cb[pos];
866 |
867 | switch (c) {
868 | case 0x08: // BS
869 | if (--posX < 0) {
870 | --posY;
871 | posX = 79;
872 | }
873 | continue;
874 | case 0x0A: // LF
875 | posY++;
876 | break;
877 | case 0x0D: // CR
878 | posX = 0;
879 | break;
880 | case 0x1B: // ESC
881 | int endPos = findEndPosOfVT100Conctrl(cb, nc, pos);
882 | if (endPos == -1) {
883 | pr.unread(cb, pos, nc - pos);
884 | pos = nc;
885 | } else {
886 | String ctrlStr = new String(cb, pos, endPos - pos + 1);
887 | Matcher matcher = VT100ControlPattern.matcher(ctrlStr);
888 | if (matcher.find()) {
889 | String code = matcher.group("code");
890 | String type = matcher.group("type");
891 | processVT100Conctrl(code, type);
892 | } else {
893 | log.error("Unknown VT100 Conctrl");
894 | }
895 | pos = endPos;
896 | }
897 | break;
898 | default:
899 |
900 | if (c < 0x20 || c == 0x7F) {
901 | //System.out.printf("ASCII Conctrl: %c(%x)\n----------\n", ch, (int)(ch));
902 | continue;
903 | }
904 |
905 | if (posX >= 0 && posY >= 0) {
906 |
907 | if (isHalfWidth(c)) {
908 | screen[posY][posX] = c;
909 | } else {
910 | screen[posY][posX] = c;
911 | if (posX < 79) {
912 | screen[posY][++posX] = 0x00;
913 | }
914 | }
915 | posX++;
916 | if (posX >= 80) {
917 | //posY++;
918 | posX = 79;
919 | }
920 |
921 | }
922 | }
923 |
924 | }
925 |
926 | if (isPrintScreen) {
927 | printScreen();
928 | }
929 |
930 | }
931 |
932 | pr.close();
933 | br.close();
934 |
935 | }
936 |
937 | private int findEndPosOfVT100Conctrl(char[] cb, int nc, int pos) {
938 | int endPos = -1;
939 | for (int i = pos + 1; i < nc; i++) {
940 | char ec = cb[i];
941 | if (ec == 'A' || ec == 'B' || ec == 'C' || ec == 'D' || ec == 'H' ||
942 | ec == 'J' || ec == 'K' || ec == 'm' || ec == 's' || ec == 'u') {
943 | endPos = i;
944 | break;
945 | }
946 | }
947 | return endPos;
948 | }
949 |
950 | private void processVT100Conctrl(String code, String type) {
951 |
952 | switch (type) {
953 | case "m":
954 | break;
955 | case "H":
956 | if (code.equals("")) {
957 | // Cursor Home
958 | posY = posX = 0;
959 | } else {
960 | // Cursor to position
961 | final Pattern p = Pattern.compile("(?\\d+);(?\\d+)");
962 | Matcher m = p.matcher(code);
963 | if (m.find()) {
964 | posY = Integer.parseInt(m.group("Y")) - 1;
965 | posX = Integer.parseInt(m.group("X")) - 1;
966 | }
967 | }
968 | break;
969 | case "J":
970 | if (code.equals("2")) {
971 | // Erases the screen with the background colour and moves the cursor to home.
972 | clearScreen();
973 | posY = posX = 0;
974 | } else if (code.equals("1")) {
975 | // Erases the screen from the current line up to the top of the screen.
976 | for (int i=0; i<=posY; i++) {
977 | Arrays.fill(screen[i], ' ');
978 | }
979 | } else if (code.equals("")) {
980 | // Erases the screen from the current line down to the bottom of the screen.
981 | for (int i=posY; i<24; i++) {
982 | Arrays.fill(screen[i], ' ');
983 | }
984 | }
985 | break;
986 | case "K":
987 | if (code.equals("")) {
988 | // Erases from the current cursor position to the end of the current line.
989 | for (int i=posX; i<80; i++) {
990 | screen[posY][i] = ' ';
991 | }
992 | } else if (code.equals("1")) {
993 | // Erases from the current cursor position to the start of the current line.
994 | for (int i=0; i