";
124 | int start = source.indexOf(str, (descStartIndex + startIndexStr.length()));
125 | desc = source.substring(start + str.length(), source.indexOf("<",start+str.length()));
126 | }else{
127 | desc = "";
128 | }
129 | //【照片评论数】
130 | //
29回应
131 | String commentTatolStartIndexStr = "
";
132 | int commentTatolStartIndex = source.indexOf(commentTatolStartIndexStr);
133 | Integer commentTatol = null;
134 | if (commentTatolStartIndex != -1) {
135 | //“3回应”
136 | String s = source.substring(commentTatolStartIndex + commentTatolStartIndexStr.length(), source.indexOf("",commentTatolStartIndex));
137 | commentTatol = Integer.valueOf(s.replace("回应", ""));
138 | }
139 | //【照片】
140 | // http://img3.douban.com/view/photo/thumb/public/p2125663360.jpg
141 | // http://img3.douban.com/view/photo/photo/public/p2109950882.jpg
142 | imageURL = imageURL.replace("photo/m", "photo/l").trim(); //thumb——>photo:缩略图——>大图
143 | desc = desc.replace("\\t\\n","").trim();
144 | if (!map.containsKey(imageURL)) {
145 | BGImage bgImage = new BGImage(desc, imageURL, commentTatol);
146 | map.put(imageURL, bgImage);
147 | }
148 | }
149 |
150 | @Override
151 | public boolean checkBGImage(BGImage bgImage) {
152 | return bgImage.getUrl().indexOf("albumicon") < 0;
153 | }
154 |
155 | @Override
156 | public String getCommentURL(Album album, BGImage image) {
157 | //http://img3.douban.com/view/photo/thumb/public/p2109950882.jpg
158 | //http://movie.douban.com/photos/photo/2109950882/
159 | return "http://movie.douban.com/photos/photo/" + image.getId();
160 | }
161 |
162 | @Override
163 | public String getAlbumDesc(String source) {
164 | return null;
165 | }
166 |
167 | }
168 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/handler/AlbumHandlerFactory.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.handler;
2 |
3 | import java.util.ArrayList;
4 | import java.util.HashMap;
5 | import java.util.List;
6 | import java.util.Map;
7 | import java.util.Map.Entry;
8 |
9 | import cn.blackgray.douban.album.download.common.Console;
10 | import cn.blackgray.douban.album.download.common.utils.ReflectUtils;
11 | import cn.blackgray.douban.album.download.service.handler.finder.IAlbumURLFinder;
12 | import cn.blackgray.douban.album.download.service.handler.handler.DefaultAlbumHandler;
13 |
14 |
15 | /**
16 | * 相册处理器工厂类,根据不同的消息,生成不同的消息处理器实现
17 | */
18 | public class AlbumHandlerFactory {
19 |
20 | public static final String PACKAGE_FINDER = "cn.blackgray.douban.album.download.service.handler.finder.impl";
21 | public static final String PACKAGE_HANDER = "cn.blackgray.douban.album.download.service.handler.handler";
22 |
23 | public static Map
albumURLFinderMap = new HashMap();
24 | public static Map> albumHandlerClassMap = new HashMap>();
25 |
26 |
27 | static{
28 | //反射获取所有相册地址查询器对象
29 | List> finderClassList = ReflectUtils.getClassWithPackage(PACKAGE_FINDER);
30 | for (Class> finderClass : finderClassList) {
31 | try {
32 | IAlbumURLFinder obj = (IAlbumURLFinder) finderClass.newInstance();
33 | albumURLFinderMap.put(obj.getURLRegex(), obj);
34 | } catch (InstantiationException e) {
35 | e.printStackTrace();
36 | } catch (IllegalAccessException e) {
37 | e.printStackTrace();
38 | }
39 | }
40 | //反射获取所有处理器类
41 | List> handlerClassList = ReflectUtils.getClassWithPackage(PACKAGE_HANDER);
42 | for (Class> handerClass : handlerClassList) {
43 | try {
44 | AlbumHandler obj = (AlbumHandler) handerClass.newInstance();
45 | albumHandlerClassMap.put(obj.getURLRegex(), handerClass);
46 | } catch (InstantiationException e) {
47 | e.printStackTrace();
48 | } catch (IllegalAccessException e) {
49 | e.printStackTrace();
50 | }
51 | }
52 | }
53 |
54 | public static List getHandler(String url) {
55 | return getHandler(url,true);
56 | }
57 |
58 | public static List getHandler(String url, boolean isPrintLog) {
59 | List albumURLList = new ArrayList();
60 | //1.查看是否存在URL查询器,如果有,执行查询,获取相册地址
61 | boolean hasFinder = false;
62 | for (Entry element : albumURLFinderMap.entrySet()) {
63 | if (url.matches(element.getKey())) {
64 | IAlbumURLFinder albumURLFinder = element.getValue();
65 | if (albumURLFinder.getFindFailMsg() != null) {
66 | //提示相册地址解析失败
67 | Console.print("地址:" + url);
68 | Console.print(albumURLFinder.getFindFailMsg());
69 | hasFinder = true;
70 | break;
71 | }else {
72 | //正常获取相册地址
73 | List albumURLs = albumURLFinder.findAlbumURL(url);
74 | for (String u : albumURLs) {
75 | if(isPrintLog){
76 | Console.print("获取相册地址:" + u);
77 | }
78 | }
79 | albumURLList.addAll(albumURLs);
80 | hasFinder = true;
81 | break;
82 | }
83 | }
84 | }
85 | if (hasFinder == false) {
86 | albumURLList.add(url);
87 | if(isPrintLog){
88 | Console.print("获取图片地址:" + url);
89 | }
90 | }
91 | //2.根据相册地址,获取处理器
92 | List handlerList = new ArrayList();
93 | boolean hasHander = false;
94 | for (String albumURL : albumURLList) {
95 | for (Entry> element : albumHandlerClassMap.entrySet()) {
96 | if (element.getKey() != null && albumURL.matches(element.getKey())) {
97 | Class> clazz = element.getValue();
98 | AlbumHandler handler;
99 | try {
100 | handler = (AlbumHandler) clazz.newInstance();
101 | handler.setAlbumURL(albumURL);
102 | handlerList.add(handler);
103 | if(isPrintLog){
104 | Console.print("创建相册处理器:" + clazz.getSimpleName() + " - " + albumURL);
105 | }
106 | hasHander = true;
107 | } catch (IllegalArgumentException e) {
108 | e.printStackTrace();
109 | } catch (SecurityException e) {
110 | e.printStackTrace();
111 | } catch (InstantiationException e) {
112 | e.printStackTrace();
113 | } catch (IllegalAccessException e) {
114 | e.printStackTrace();
115 | }
116 | break;
117 | }
118 | }
119 | if (hasHander == false) {
120 | DefaultAlbumHandler defaultAlbumHandler = new DefaultAlbumHandler();
121 | defaultAlbumHandler.setAlbumURL(albumURL);
122 | handlerList.add(defaultAlbumHandler);
123 | if(isPrintLog){
124 | Console.print("创建默认相册处理器:" + defaultAlbumHandler.getClass().getSimpleName() + " - " + albumURL);
125 | }
126 | }
127 | }
128 | return handlerList;
129 |
130 | }
131 |
132 | public static void main(String[] args) {
133 |
134 | System.out.println("-------------相册列表--------------");
135 | getHandler("http://www.douban.com/people/blackgray/photos/");
136 | System.out.println("-------------相册--------------");
137 | getHandler("http://www.douban.com/photos/album/67952443/");
138 |
139 | System.out.println("-------------小站--------------");
140 | getHandler("http://site.douban.com/108128/widget/photos/7528342/");
141 | getHandler("http://site.douban.com/zheng/widget/photos/17304118/");
142 |
143 | System.out.println("-------------影人首页--------------");
144 | getHandler("http://movie.douban.com/celebrity/1048027/");
145 | System.out.println("-------------影人--------------");
146 | getHandler("http://movie.douban.com/celebrity/1048027/photos/");
147 |
148 | System.out.println("-------------活动首页--------------");
149 | getHandler("http://www.douban.com/online/11127307/");
150 | System.out.println("-------------活动--------------");
151 | getHandler("http://www.douban.com/online/11127307/album/72416214/");
152 |
153 | System.out.println("-------------其他--------------");
154 | getHandler("http://www.baidu.com/");
155 |
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/ui/FailFileFrame.form:
--------------------------------------------------------------------------------
1 |
2 |
3 |
117 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/common/Common.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.common;
2 |
3 | import java.awt.Desktop;
4 | import java.io.BufferedReader;
5 | import java.io.IOException;
6 | import java.io.InputStream;
7 | import java.io.InputStreamReader;
8 | import java.io.UnsupportedEncodingException;
9 | import java.net.URI;
10 | import java.net.URLDecoder;
11 | import java.text.SimpleDateFormat;
12 |
13 | import javax.swing.JFrame;
14 | import javax.swing.JOptionPane;
15 |
16 | import cn.blackgray.douban.album.download.common.utils.CommonUtils;
17 |
18 | /**
19 | * 公共变量
20 | * @author BlackGray
21 | */
22 | public class Common {
23 |
24 | public static String VERSION = "v20241003";
25 |
26 | public static final String DEFAULT_DOC_NAME = "描述.txt";
27 | public static final String DEFAULT_FAIL_FILE_DOC_NAME = "下载失败图片记录.txt";
28 | public static final String DEFAULT_HTML_NAME = "index.html";
29 | public static final String DEFAULT_HTML_RESOURCE_DIR = "resource";
30 | public static final String DEFAULT_RAW_DIR = "raw";
31 | public static final String DEFAULT_ALBUM_ROOT_PATH_STR = "根目录"; //相册默认根目录占位符
32 |
33 | //时间格式化
34 | public static SimpleDateFormat SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
35 |
36 | //图片下载结果&状态
37 | /** 图片下载结果&状态 - 图片已存在 */
38 | public static final int IMAGE_DOWNLOAD_STATUS_EXISTS = 0;
39 | /** 图片下载结果&状态 - 下载完成 */
40 | public static final int IMAGE_DOWNLOAD_STATUS_FINISH = 1;
41 | /** 图片下载结果&状态 - 图片网络资源不存在 */
42 | public static final int IMAGE_DOWNLOAD_STATUS_URL_NOT_EXISTS = 2;
43 | /** 图片下载结果&状态 - 图片下载异常,已下载文件小于网络资源大小 */
44 | public static final int IMAGE_DOWNLOAD_STATUS_DOWNLOAD_FAIL = 3;
45 |
46 | // public static int albumType; //相册类型 - 普通、豆瓣相册、豆瓣活动
47 |
48 | public static final String URL_HELP = "http://www.douban.com/note/206320326/";
49 | public static final String URL_DOUPIC = "http://www.douban.com/group/doupic/";
50 |
51 | public static final long TIME_PROCESS_MIN = 1*60*1000; //边界时间 - 单位毫秒
52 | public static final long TIME_PROCESS_SLEEP = 60; //休眠时间 - 单位秒
53 | public static final long TIME_ONE_PAGE_INFO_PROCESS_SLEEP = 500; //单页面图片信息处理间隔时间 - 单位毫秒
54 |
55 | public static final int PROCESS_UNIT_SIZE = 20; //处理单元大小
56 |
57 | public static final String CHARTSET_UTF8 = "utf-8";
58 | public static final String CHARTSET_GBK = "gbk";
59 | public static final String CHARTSET_GB2312 = "gb2312";
60 |
61 | public static final int DOWNLOAD_THREAD = 15; //下载线程数
62 |
63 | public static String PATH_DOWNLOAD = "";
64 | public static String PATH_APP = "";
65 | public static boolean IS_UPDATE = false;
66 | public static boolean IS_DOWNLOAD_RAW = false;
67 |
68 | public static final String IMAGE_TYPE = "gif|jpg|png";
69 |
70 | public static final Integer AUTO_DOWNLOAD_FAIL_FILE = 10; //自动下载错误文件次数
71 |
72 | public static String HTML_TEMPLATE_PAGE = "";
73 | public static String HTML_TEMPLATE_IMAGE = "";
74 |
75 | public static final String HTML_TAG_IMAGES = "${images}";
76 | public static final String HTML_TAG_IMAGES_TOTAL = "${imagesTotal}";
77 | public static final String HTML_TAG_NAME = "${name}";
78 | public static final String HTML_TAG_URL = "${url}";
79 | public static final String HTML_TAG_TITLE = "${title}";
80 | public static final String HTML_TAG_ALBUM_DESC = "${albumDesc}";
81 | public static final String HTML_TAG_DOWNLOAD_TIME = "${downloadTime}";
82 | public static final String HTML_TAG_CHARSET = "GBK";
83 |
84 |
85 | public static final String HTML_TAG_OWNER = "${owner}";
86 | public static final String HTML_TAG_DESC = "${desc}";
87 | public static final String HTML_TAG_COMMENT_URL = "${commentURL}";
88 | public static final String HTML_TAG_NUMBER = "${num}";
89 | public static final String HTML_TAG_IMAGE = "${image}";
90 | public static final String HTML_TAG_RAW = "${raw}";
91 |
92 |
93 | /*大图类型 - 1大图,2大小相同,高质量未压缩图*/
94 | public static final Integer RAW_TYPE_LARGE = 1;
95 | public static final Integer RAW_TYPE_UNCOMPRESSED = 2;
96 |
97 | static{
98 | //JAR包路径
99 | String jarPath;
100 | try {
101 | jarPath = URLDecoder.decode(Common.class.getProtectionDomain().getCodeSource().getLocation().getPath(),"utf-8");
102 | //配置文件路径
103 | PATH_DOWNLOAD = jarPath.substring(1, jarPath.lastIndexOf("/"));
104 | //设置默认下载路径为程序所在目录
105 | PATH_DOWNLOAD = jarPath;
106 | if (CommonUtils.isWindows()) {
107 | //如果是Windows系统,若路径首字符为/,去除
108 | if(PATH_DOWNLOAD.startsWith("/")) {
109 | PATH_DOWNLOAD = jarPath.substring(1, jarPath.lastIndexOf("/"));
110 | }
111 | }else {
112 | PATH_DOWNLOAD = jarPath.substring(0, jarPath.lastIndexOf("/"));
113 | }
114 | // //程序所在目录
115 | PATH_APP = PATH_DOWNLOAD;
116 | } catch (UnsupportedEncodingException e) {
117 | e.printStackTrace();
118 | }
119 |
120 | //加载HTML页面模版
121 | StringBuffer sb = new StringBuffer();
122 | InputStream inputStream = Common.class.getResourceAsStream("/cn/blackgray/douban/album/download/resources/html/Template.html");
123 | BufferedReader bw;
124 | try {
125 | bw = new BufferedReader(new InputStreamReader(inputStream,"UTF-8"));
126 | String str;
127 | try {
128 | while ((str = bw.readLine()) != null) {
129 | sb.append(str);
130 | }
131 | } catch (IOException e) {
132 | e.printStackTrace();
133 | }
134 | HTML_TEMPLATE_PAGE = sb.toString();
135 |
136 | //初始化HTML图片单元模版
137 | HTML_TEMPLATE_IMAGE = "${owner}${desc}
${num}

";
138 |
139 | } catch (UnsupportedEncodingException e) {
140 | e.printStackTrace();
141 | }
142 | }
143 |
144 | /**
145 | * 通过系统默认浏览器打开页面
146 | * @param url
147 | * @param frame
148 | */
149 | public static void openURLWithBrowse(String url, JFrame frame){
150 | //判断当前系统是否支持Java AWT Desktop扩展
151 | if(Desktop.isDesktopSupported()){
152 | try {
153 | //创建一个URI实例
154 | URI uri = URI.create(url);
155 | //获取当前系统桌面扩展
156 | Desktop desktop = Desktop.getDesktop();
157 | //判断系统桌面是否支持要执行的功能
158 | if(desktop.isSupported(Desktop.Action.BROWSE)){
159 | //获取系统默认浏览器打开链接
160 | desktop.browse(uri);
161 | }
162 | } catch(java.lang.NullPointerException e){
163 | e.printStackTrace();
164 | } catch (java.io.IOException e) {
165 | e.printStackTrace();
166 | String msg = "无法获取系统默认浏览器,地址:" + url;
167 | JOptionPane.showMessageDialog(frame, msg, "555~",JOptionPane.ERROR_MESSAGE);
168 | }
169 | }else{
170 | String msg = "当前JDK版本过低,无法执行打开操作,地址:" + url;
171 | JOptionPane.showMessageDialog(frame, msg, "555~",JOptionPane.ERROR_MESSAGE);
172 | }
173 | }
174 |
175 | }
176 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/common/utils/HTMLUtils.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.common.utils;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 | import java.util.Map.Entry;
6 |
7 | /**
8 | * HTML字符转义工具类
9 | * @author BlackGray
10 | */
11 | public class HTMLUtils {
12 |
13 | private static Map htmlSignMap = new HashMap(); //HTML特殊字符集合(HTML-TEXT)
14 | private static Map htmlSignMapReversed = new HashMap(); //HTML特殊字符集合(TEXT-HTML)
15 |
16 | static{
17 | //初始化符号集合
18 | htmlSignMap.put("Α","Α");
19 | htmlSignMap.put("Β", "Β");
20 | htmlSignMap.put("Γ", "Γ");
21 | htmlSignMap.put("Δ", "Δ");
22 | htmlSignMap.put("Ε", "Ε");
23 | htmlSignMap.put("Ζ", "Ζ");
24 | htmlSignMap.put("Η", "Η");
25 | htmlSignMap.put("Θ", "Θ");
26 | htmlSignMap.put("Ι", "Ι");
27 | htmlSignMap.put("Κ", "Κ");
28 | htmlSignMap.put("Λ", "Λ");
29 | htmlSignMap.put("Μ", "Μ");
30 | htmlSignMap.put("Ν", "Ν");
31 | htmlSignMap.put("Ξ", "Ξ");
32 | htmlSignMap.put("Ο", "Ο");
33 | htmlSignMap.put("Ρ", "Ρ");
34 | htmlSignMap.put("Σ", "Σ");
35 | htmlSignMap.put("Τ", "Τ");
36 | htmlSignMap.put("Υ", "Υ");
37 | htmlSignMap.put("Φ", "Φ");
38 | htmlSignMap.put("Χ", "Χ");
39 | htmlSignMap.put("Ψ", "Ψ");
40 | htmlSignMap.put("Ω", "Ω");
41 | htmlSignMap.put("α", "α");
42 | htmlSignMap.put("β", "β");
43 | htmlSignMap.put("γ", "γ");
44 | htmlSignMap.put("δ", "δ");
45 | htmlSignMap.put("ε", "ε");
46 | htmlSignMap.put("ζ", "ζ");
47 | htmlSignMap.put("η", "η");
48 | htmlSignMap.put("θ", "θ");
49 | htmlSignMap.put("ι", "ι");
50 | htmlSignMap.put("κ", "κ");
51 | htmlSignMap.put("λ", "λ");
52 | htmlSignMap.put("μ", "μ");
53 | htmlSignMap.put("ν", "ν");
54 | htmlSignMap.put("ξ", "ξ");
55 | htmlSignMap.put("ο", "ο");
56 | htmlSignMap.put("ρ", "ρ");
57 | htmlSignMap.put("ς", "ς");
58 | htmlSignMap.put("σ", "σ");
59 | htmlSignMap.put("τ", "τ");
60 | htmlSignMap.put("υ", "υ");
61 | htmlSignMap.put("χ", "φ");
62 | htmlSignMap.put("ψ", "ψ");
63 | htmlSignMap.put("ω", "ω");
64 | htmlSignMap.put("ϑ", "ϑ");
65 | htmlSignMap.put("ϒ", "ϒ");
66 | htmlSignMap.put("ϖ", "ϖ");
67 | htmlSignMap.put("•", "•");
68 | htmlSignMap.put("…", "…");
69 | htmlSignMap.put("′", "′");
70 | htmlSignMap.put("″", "″");
71 | htmlSignMap.put("‾", "‾");
72 | htmlSignMap.put("⁄", "⁄");
73 | htmlSignMap.put("℘", "℘");
74 | htmlSignMap.put("ℑ", "ℑ");
75 | htmlSignMap.put("ℜ", "ℜ");
76 | htmlSignMap.put("™", "™");
77 | htmlSignMap.put("ℵ", "ℵ");
78 | htmlSignMap.put("←", "←");
79 | htmlSignMap.put("↑", "↑");
80 | htmlSignMap.put("→", "→");
81 | htmlSignMap.put("↓", "↓");
82 | htmlSignMap.put("↔", "↔");
83 | htmlSignMap.put("↵", "↵");
84 | htmlSignMap.put("⇐", "⇐");
85 | htmlSignMap.put("⇑", "⇑");
86 | htmlSignMap.put("⇒", "⇒");
87 | htmlSignMap.put("⇓", "⇓");
88 | htmlSignMap.put("⇔", "⇔");
89 | htmlSignMap.put("∀", "∀");
90 | htmlSignMap.put("∂", "∂");
91 | htmlSignMap.put("∃", "∃");
92 | htmlSignMap.put("∅", "∅");
93 | htmlSignMap.put("∇", "∇");
94 | htmlSignMap.put("∈", "∈");
95 | htmlSignMap.put("∉", "∉");
96 | htmlSignMap.put("∋", "∋");
97 | htmlSignMap.put("∏", "∏");
98 | htmlSignMap.put("∑", "∑");
99 | htmlSignMap.put("−", "−");
100 | htmlSignMap.put("∗", "∗");
101 | htmlSignMap.put("√", "√");
102 | htmlSignMap.put("∝", "∝");
103 | htmlSignMap.put("∞", "∞");
104 | htmlSignMap.put("∠", "∠");
105 | htmlSignMap.put("∧", "∧");
106 | htmlSignMap.put("∨", "∨");
107 | htmlSignMap.put("∩", "∩");
108 | htmlSignMap.put("∪", "∪");
109 | htmlSignMap.put("∫", "∫");
110 | htmlSignMap.put("∴", "∴");
111 | htmlSignMap.put("∼", "∼");
112 | htmlSignMap.put("≅", "≅");
113 | htmlSignMap.put("≈", "≈");
114 | htmlSignMap.put("≠", "≠");
115 | htmlSignMap.put("≡", "≡");
116 | htmlSignMap.put("≤", "≤");
117 | htmlSignMap.put("≥", "≥");
118 | htmlSignMap.put("⊂", "⊂");
119 | htmlSignMap.put("⊃", "⊃");
120 | htmlSignMap.put("⊄", "⊄");
121 | htmlSignMap.put("⊆", "⊆");
122 | htmlSignMap.put("⊇", "⊇");
123 | htmlSignMap.put("⊕", "⊕");
124 | htmlSignMap.put("⊗", "⊗");
125 | htmlSignMap.put("⊥", "⊥");
126 | htmlSignMap.put("⋅", "⋅");
127 | htmlSignMap.put("⌈", "⌈");
128 | htmlSignMap.put("⌉", "⌉");
129 | htmlSignMap.put("⌊", "⌊");
130 | htmlSignMap.put("⌋", "⌋");
131 | htmlSignMap.put("◊", "◊");
132 | htmlSignMap.put("♠", "♠");
133 | htmlSignMap.put("♣", "♣");
134 | htmlSignMap.put("♥", "♥");
135 | htmlSignMap.put("♦", "♦");
136 | htmlSignMap.put(" ", " ");
137 | htmlSignMap.put("¡", "¡");
138 | htmlSignMap.put("¢", "¢");
139 | htmlSignMap.put("£", "£");
140 | htmlSignMap.put("¤", "¤");
141 | htmlSignMap.put("¥", "¥");
142 | htmlSignMap.put("¦", "¦");
143 | htmlSignMap.put("§", "§");
144 | htmlSignMap.put("¨", "¨");
145 | htmlSignMap.put("©", "©");
146 | htmlSignMap.put("ª", "ª");
147 |
148 | htmlSignMap.put("«", "«");
149 | htmlSignMap.put("¬", "¬");
150 | htmlSignMap.put("®", "®");
151 | htmlSignMap.put("µ", "µ");
152 | htmlSignMap.put("¯", "¯");
153 | htmlSignMap.put("°", "°");
154 | htmlSignMap.put("±", "±");
155 | htmlSignMap.put("²", "²");
156 | htmlSignMap.put("³", "³");
157 | htmlSignMap.put("´", "´");
158 |
159 | htmlSignMap.put("&", "&");
160 | htmlSignMap.put("'", "'");
161 | htmlSignMap.put(""", "\"");
162 |
163 | for (Entry entry : htmlSignMap.entrySet()) {
164 | htmlSignMapReversed.put(entry.getValue(), entry.getKey());
165 | }
166 | htmlSignMapReversed.remove("&");
167 | }
168 |
169 | /**
170 | * 特殊字符转换为HTML使用字符
171 | * @param str
172 | * @return
173 | */
174 | public static String textToHTML(String str){
175 | if (str.contains("&")) {
176 | str.replace("&", "&");
177 | };
178 | for (Entry entry : htmlSignMapReversed.entrySet()) {
179 | if (str.contains(entry.getKey())) {
180 | str = str.replaceAll(entry.getKey(), entry.getValue());
181 | }
182 | }
183 | return str;
184 | }
185 |
186 | /**
187 | * 特殊字符转换为Json使用字符
188 | * @param str
189 | * @return
190 | */
191 | public static String textToJson(String str){
192 | str = htmlToText(str);
193 | str = str.replace("'", "\"");
194 | if (str.endsWith("\\")) {
195 | str += " ";
196 | }
197 | return str;
198 | }
199 |
200 | /**
201 | * HTML特殊字符转换为正常字符
202 | * @param str
203 | * @return
204 | */
205 | public static String htmlToText(String str){
206 | for (Entry element : htmlSignMap.entrySet()) {
207 | if (str.contains(element.getKey())) {
208 | str = str.replace(element.getKey(), element.getValue());
209 | };
210 | }
211 | return str;
212 | }
213 |
214 |
215 | }
216 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/model/Album.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.model;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Date;
5 | import java.util.List;
6 |
7 | import cn.blackgray.douban.album.download.service.download.DownloadProcessing;
8 | import cn.blackgray.douban.album.download.service.handler.AlbumHandler;
9 | import cn.blackgray.douban.album.download.service.handler.PageAnalyzer;
10 |
11 | /**
12 | * 相册MODEL
13 | * @author BlackGray
14 | */
15 | public class Album {
16 |
17 | //1.※图片 - 文件名(相对路径获取,不需要路径)
18 | //2.※备注 - 备注.txt
19 | //3.※相册名称 - 文件夹名称
20 | //4.※相册地址 - 备注.txt第一行
21 | //5.文档生成时间 - 程序执行时间
22 |
23 | // public static final int TYPE_NOMAL = 1;
24 | // public static final int TYPE_DOUBAN_ALBUM = 2;
25 | // public static final int TYPE_DOUBAN_ONLINE = 3;
26 | // public static final int TYPE_DOUBAN_SITE = 4;
27 | // public static final int TYPE_DOUBAN_CELEBRITY = 5;
28 |
29 | private String name; //相册名称
30 | private String url; //相册地址
31 | private Date date; //日期
32 | private String path; //储存路径
33 | private String charset; //字符集
34 | private String desc; //相册描述
35 | // private int type = TYPE_NOMAL; //相册类型
36 |
37 | private boolean update = false; //是否为更新
38 |
39 | private AlbumHandler albumHandler; //相册处理器
40 |
41 | private boolean isVisibleToSelf = false; //是否仅对自己可见
42 | private boolean isVisibleToFriend = false; //是否仅对朋友可见
43 | private boolean isPrivateAlbum = false; //是否私密相册
44 | private long privatePhotoURLAnalyzeTotal = 0; //私密相册照片URL分析获取总数,用于计数、界面提醒
45 |
46 |
47 | //照片集合
48 | private List photosList = new ArrayList();
49 | //页面集合 - 每个页面下多张照片
50 | private List pageURLLsit = new ArrayList();
51 |
52 | public Album() {
53 | super();
54 | }
55 |
56 | // public Album(String url, List photosList, String path, String charset) {
57 | // super();
58 | // this.name = path.substring(path.lastIndexOf("/") + 1);
59 | // this.date = new Date();
60 | // this.path = path;
61 | // this.charset = charset;
62 | // this.photosList = photosList;
63 | // if (url.endsWith("/")) {
64 | // url = url.substring(0,url.lastIndexOf("/"));
65 | // }
66 | // this.url = url;
67 | // }
68 |
69 | // /**
70 | // * 检查相册类型
71 | // * @param url
72 | // * @return
73 | // */
74 | // public static int checkType(String url) {
75 | // //豆瓣相册下载
76 | // if (url.matches(DownloadService.REGEX_DOUBAN_ALBUM)) {
77 | // return TYPE_DOUBAN_ALBUM;
78 | // }
79 | // //豆瓣活动下载
80 | // if (url.matches(DownloadService.REGEX_DOUBAN_ONLINE_ALBUM)) {
81 | // return TYPE_DOUBAN_ONLINE;
82 | // }
83 | // //豆瓣小站
84 | // if (url.matches(DownloadService.REGEX_DOUBAN_SITE) || url.matches(DownloadService.REGEX_DOUBAN_SITE_NEW)) {
85 | // return TYPE_DOUBAN_SITE;
86 | // }
87 | // //普通下载
88 | // return TYPE_NOMAL;
89 | // }
90 |
91 | public AlbumHandler getAlbumHandler() {
92 | return albumHandler;
93 | }
94 |
95 |
96 |
97 | public void setAlbumHandler(AlbumHandler albumHandler) {
98 | this.albumHandler = albumHandler;
99 | }
100 |
101 | public boolean isUpdate() {
102 | return update;
103 | }
104 |
105 | public void setUpdate(boolean update) {
106 | this.update = update;
107 | }
108 |
109 |
110 | // public int getType() {
111 | // return type;
112 | // }
113 | //
114 | // public void setType(int type) {
115 | // this.type = type;
116 | // }
117 |
118 | public String getDesc() {
119 | return desc;
120 | }
121 |
122 | public void setDesc(String desc) {
123 | this.desc = desc;
124 | }
125 |
126 | public String getCharset() {
127 | return charset;
128 | }
129 |
130 |
131 | public void setCharset(String charset) {
132 | this.charset = charset;
133 | }
134 |
135 |
136 | public String getPath() {
137 | return path;
138 | }
139 |
140 |
141 | public void setPath(String path) {
142 | this.path = path;
143 | }
144 |
145 |
146 | public String getName() {
147 | return name;
148 | }
149 |
150 | public String getUrl() {
151 | return url;
152 | }
153 |
154 | public Date getDate() {
155 | return date;
156 | }
157 |
158 | public List getPhotosList() {
159 | return photosList;
160 | }
161 |
162 | public void setName(String name) {
163 | this.name = name;
164 | }
165 |
166 | public void setUrl(String url) {
167 | this.url = url;
168 | }
169 |
170 | public void setDate(Date date) {
171 | this.date = date;
172 | }
173 |
174 | public void setPhotosList(List photosList) {
175 | this.photosList = photosList;
176 | }
177 |
178 | public List getPageURLLsit() {
179 | return pageURLLsit;
180 | }
181 |
182 | public void setPageURLLsit(List pageURLLsit) {
183 | this.pageURLLsit = pageURLLsit;
184 | }
185 |
186 | public boolean getIsVisibleToSelf() {
187 | return isVisibleToSelf;
188 | }
189 |
190 | public void setIsVisibleToSelf(boolean isVisibleToSelf) {
191 | this.isVisibleToSelf = isVisibleToSelf;
192 | }
193 |
194 |
195 | public boolean getIsVisibleToFriend() {
196 | return isVisibleToFriend;
197 | }
198 |
199 | public void setIsVisibleToFriend(boolean isVisibleToFriend) {
200 | this.isVisibleToFriend = isVisibleToFriend;
201 | }
202 |
203 | public boolean getIsPrivateAlbum() {
204 | return isPrivateAlbum;
205 | }
206 |
207 | public void setIsPrivateAlbum(boolean isPrivateAlbum) {
208 | this.isPrivateAlbum = isPrivateAlbum;
209 | }
210 |
211 |
212 | public long getPrivatePhotoURLAnalyzeTotal() {
213 | return privatePhotoURLAnalyzeTotal;
214 | }
215 |
216 | public void setPrivatePhotoURLAnalyzeTotal(long privatePhotoURLAnalyzeTotal) {
217 | this.privatePhotoURLAnalyzeTotal = privatePhotoURLAnalyzeTotal;
218 | }
219 |
220 | /**
221 | * 下载
222 | */
223 | public void download(){
224 | //创建相册模型
225 | //根据模型下载相册
226 | DownloadProcessing.downloadAlbum(this);
227 |
228 | };
229 |
230 | /**
231 | * 生成描述文档
232 | * @param imageAndDescMap
233 | */
234 | public void createDescDoc(){
235 | this.albumHandler.createDescDoc(this);
236 | }
237 |
238 | /**
239 | * 初始化相册
240 | */
241 | public void init(){
242 |
243 | // this.name = path.substring(path.lastIndexOf("/") + 1);
244 | // this.date = new Date();
245 | // this.path = path;
246 | // this.charset = charset;
247 | // this.photosList = photosList;
248 | // if (url.endsWith("/")) {
249 | // url = url.substring(0,url.lastIndexOf("/"));
250 | // }
251 | // this.url = url;
252 |
253 | //【初始化相册信息】
254 | //URL
255 | this.setUrl(this.albumHandler.getAlbumURL());
256 | //所有页面
257 | List pageURLLsit = PageAnalyzer.findPageURL(albumHandler);
258 | this.setPageURLLsit(pageURLLsit);
259 | //相册名称
260 | String name = PageAnalyzer.findAlbumName().trim();
261 | this.setName(albumHandler.albumNameProcess(name));
262 | //相册描述
263 | String desc = PageAnalyzer.findAlbumDesc(albumHandler);
264 | if (desc != null) {
265 | this.setDesc(desc.trim());
266 | }
267 | //下载日期
268 | this.setDate(new Date());
269 |
270 | }
271 |
272 | @Override
273 | public String toString() {
274 | return "Album [name=" + name + ", url=" + url + ", date=" + date
275 | + ", path=" + path + ", charset=" + charset
276 | + ", update=" + update + ", photosList=" + photosList
277 | + ", pageURLLsit=" + pageURLLsit + ", isVisibleToSelf=" + isVisibleToSelf
278 | + ", isVisibleToFriend=" + isVisibleToFriend + ", isPrivateAlbum=" + isPrivateAlbum
279 | + ", privatePhotoURLAnalyzeTotal=" + privatePhotoURLAnalyzeTotal
280 | + "]";
281 | }
282 |
283 | }
284 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/download/DownloadFailManager.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.download;
2 |
3 | import java.io.BufferedWriter;
4 | import java.io.File;
5 | import java.io.FileWriter;
6 | import java.io.IOException;
7 | import java.util.List;
8 | import java.util.Map;
9 | import java.util.Map.Entry;
10 | import java.util.TreeMap;
11 |
12 | import javax.swing.JProgressBar;
13 |
14 | import cn.blackgray.douban.album.download.common.Common;
15 | import cn.blackgray.douban.album.download.common.Console;
16 | import cn.blackgray.douban.album.download.common.utils.FileUtils;
17 | import cn.blackgray.douban.album.download.ui.MainFrame;
18 |
19 | /**
20 | * 下载失败后处理类
21 | * @author BlackGray
22 | * @createTime 2022-10-23 23:32:56
23 | */
24 | public class DownloadFailManager {
25 |
26 | private static Map failFileMap = new TreeMap(); //下载失败的文件集合Map
27 |
28 |
29 | /**
30 | * 添加下载失败图片
31 | * @return
32 | */
33 | public static void add(String url, String path) {
34 | synchronized (failFileMap) {
35 | if (!failFileMap.containsKey(url)) {
36 | //删除下载不完整图片文件
37 | FileUtils.deleteImageFile(url, path);
38 | failFileMap.put(url,path);
39 | }
40 | }
41 | }
42 |
43 |
44 | /**
45 | * 批量添加下载失败图片
46 | * @param failMap
47 | */
48 | public static void add(Map failMap) {
49 | synchronized (failFileMap) {
50 | failFileMap.putAll(failMap);
51 | }
52 | }
53 |
54 |
55 | /**
56 | * 获取下载失败文件集合
57 | * @return
58 | */
59 | public static Map getFailFileMap() {
60 | return failFileMap;
61 | }
62 |
63 | /**
64 | * 获取下载失败的图片数量
65 | * @return
66 | */
67 | public static int getFailSize() {
68 | return failFileMap.size();
69 | }
70 |
71 | /**
72 | * 清除集合中所有记录
73 | * @return
74 | */
75 | public static void clearAll() {
76 | failFileMap.clear();
77 | }
78 |
79 | /**
80 | * 下载未成功下载的图片
81 | * @return true = 下载失败文件成功,false = 下载失败文件未成功(未完全下载)。
82 | */
83 | public static boolean downloadFailFile(){
84 | int num = 1;
85 | int size = DownloadFailManager.getFailSize();
86 | //DownloadThread中下载异常时,将继续向公共Map中添加记录,为防止冲突,新建Map用于异常文件重新下载
87 | Map failFileMapForRetry = new TreeMap();
88 | failFileMapForRetry.putAll(DownloadFailManager.getFailFileMap());
89 |
90 | JProgressBar progressBar = MainFrame.getInstance().progressBar;
91 | progressBar.setMaximum(size);
92 | progressBar.setValue(0);
93 | Console.print("=====================================");
94 | Console.print("下载上次下载失败图片:" + size + "(张)");
95 | Map newFailFileMap = new TreeMap();
96 | for (Entry element : failFileMapForRetry.entrySet()) {
97 | String url = element.getKey();
98 | String path = element.getValue();
99 | try {
100 | //下载
101 | Console.print("下载图片(" + num + "/" + size + "):" + url);
102 | DownloadThread downloadThread = new DownloadThread();
103 | int state = downloadThread.downloadImage(url, path);
104 | if (state == Common.IMAGE_DOWNLOAD_STATUS_DOWNLOAD_FAIL) {
105 | Console.print("失败重下 - 图片下载异常,已下载文件小于网络资源大小,等待再次重试:" + url);
106 | //加入下载异常集合,待重试
107 | FileUtils.deleteImageFile(url, path);
108 | newFailFileMap.put(url, path);
109 | }
110 | } catch (IOException e) {
111 | Console.print("图片下载失败:" + url);
112 | FileUtils.deleteImageFile(url, path);
113 | newFailFileMap.put(url, path);
114 | }
115 | progressBar.setValue(num);
116 | num++;
117 | }
118 | DownloadFailManager.clearAll();
119 | if (newFailFileMap.size() > 0) {
120 | Console.print("【FINISH】成功:" + (size - newFailFileMap.size()) + ",失败" + newFailFileMap.size());
121 | //批量添加下载失败图片
122 | DownloadFailManager.add(newFailFileMap);
123 | return false;
124 | } else {
125 | Console.print("【FINISH】成功:" + size + ",失败" + 0);
126 | return true;
127 | }
128 | }
129 |
130 |
131 | /**
132 | * 生成下载失败的图片记录文档 - 多个相册
133 | *
134 | * 图片下载失败、且重试后仍有失败,点击取消重试后将调用此方法生成记录文档
135 | * 用户可使用其他下载软件下载图片,或用于知晓下载失败内容
136 | *
137 | * 相册更新时将自动重新下载缺失图片,因此不再单独实现记录文档识别与重新下载功能
138 | *
139 | * @param albumPathList 相册目录
140 | */
141 | public static void createAlbumFailFileDoc(List albumPathList, Map failFileMap) {
142 |
143 | Console.print("【正在生成下载失败图片记录文件(下载失败图片记录.txt),请稍等...】");
144 |
145 | //按相册对下载异常文件做分组Map>
146 | Map> failFileMapGroupByAlbumPath = new TreeMap>();
147 | for (String albumPath : albumPathList) {
148 | for (Entry entry : failFileMap.entrySet()) {
149 | String imageUrl = entry.getKey();
150 | String imagePath = entry.getValue();
151 | if(imagePath.startsWith(albumPath)) {
152 | Map tempMap = null;
153 | if(failFileMapGroupByAlbumPath.containsKey(albumPath)) {
154 | tempMap = failFileMapGroupByAlbumPath.get(albumPath);
155 | }else {
156 | tempMap = new TreeMap();
157 | }
158 | tempMap.put(imageUrl, imagePath);
159 | failFileMapGroupByAlbumPath.put(albumPath, tempMap);
160 | }
161 | }
162 |
163 | }
164 |
165 | //生成单个相册文档
166 | for (String albumPath : albumPathList) {
167 | createAlbumFailFileDoc(albumPath, failFileMapGroupByAlbumPath.get(albumPath));
168 | }
169 |
170 | Console.print("【下载失败图片记录文件生成成功】");
171 |
172 | }
173 |
174 |
175 | /**
176 | * 生成下载失败的图片记录文档 - 单个相册
177 | */
178 | private static void createAlbumFailFileDoc(String albumPath, Map failFileMap) {
179 | if(failFileMap != null && failFileMap.size() > 0) {
180 | try {
181 | String path = albumPath + File.separator + Common.DEFAULT_FAIL_FILE_DOC_NAME;
182 | Console.print("开始生成文档:" + path);
183 | BufferedWriter bw = new BufferedWriter(new FileWriter(path));
184 | //输出记录
185 | bw.write("------------------------------------------------------------------");
186 | bw.newLine();
187 | bw.write("下载失败图片URL地址及保存目录");
188 | bw.newLine();
189 | bw.write("------------------------------------------------------------------");
190 | bw.newLine();
191 | for (Entry entry : failFileMap.entrySet()) {
192 | String imageUrl = entry.getKey();
193 | String imagePath = entry.getValue();
194 | bw.write(imageUrl + " → " + imagePath.replace(albumPath, Common.DEFAULT_ALBUM_ROOT_PATH_STR));
195 | bw.newLine();
196 | }
197 | //输出纯地址
198 | bw.newLine();
199 | bw.newLine();
200 | bw.newLine();
201 | bw.write("------------------------------------------------------------------");
202 | bw.newLine();
203 | bw.write("下载失败图片URL地址,若希望使用其他工具下载缺失图片,可复制以下地址");
204 | bw.newLine();
205 | bw.write("------------------------------------------------------------------");
206 | bw.newLine();
207 | for (Entry entry : failFileMap.entrySet()) {
208 | String imageUrl = entry.getKey();
209 | bw.write(imageUrl);
210 | bw.newLine();
211 | }
212 |
213 | bw.flush();
214 | bw.close();
215 | Console.print("文档生成成功");
216 | } catch (IOException e) {
217 | Console.print("文档生成失败");
218 | e.printStackTrace();
219 | }
220 | }
221 | }
222 |
223 | /**
224 | * 根据相册路径删除“下载失败图片记录.txt”文档,更新相册时调用
225 | */
226 | public static void deleteAlbumFailFileDoc(String albumPath) {
227 | File doc = new File(albumPath + File.separator + Common.DEFAULT_FAIL_FILE_DOC_NAME);
228 | if (doc.exists()) {
229 | Console.print("更新相册,删除早期生成的“下载失败图片记录.txt”文档");
230 | doc.delete();
231 | }
232 | }
233 |
234 |
235 |
236 | }
237 |
238 |
239 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/download/DownloadThread.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.download;
2 |
3 | import java.io.BufferedInputStream;
4 | import java.io.BufferedOutputStream;
5 | import java.io.File;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileOutputStream;
8 | import java.io.IOException;
9 | import java.io.InputStream;
10 | import java.net.HttpURLConnection;
11 | import java.net.MalformedURLException;
12 | import java.net.URL;
13 | import java.util.List;
14 |
15 | import javax.swing.JProgressBar;
16 |
17 | import cn.blackgray.douban.album.download.common.Common;
18 | import cn.blackgray.douban.album.download.common.Console;
19 | import cn.blackgray.douban.album.download.common.utils.LoginUtils;
20 | import cn.blackgray.douban.album.download.common.utils.URLUtils;
21 | import cn.blackgray.douban.album.download.model.Album;
22 |
23 | /**
24 | * 下载线程
25 | * @author BlackGray
26 | */
27 | public class DownloadThread extends Thread{
28 |
29 | private Album album;
30 | private List imageURLList;
31 | private String path; //保存路径
32 | private String url; //正在处理中的图片url
33 | private int imageCount;
34 | private JProgressBar mainProgressBar;
35 |
36 | private BufferedInputStream inputStream;
37 | private BufferedOutputStream outputStream;
38 |
39 | public DownloadThread() {
40 | super();
41 | }
42 |
43 | public DownloadThread(Album album, String threadName, List imageURLList,int imageCount , String path,JProgressBar mainProgressBar) {
44 | this.album = album;
45 | this.imageURLList = imageURLList;
46 | this.path = path.trim();
47 | this.imageCount = imageCount;
48 | this.mainProgressBar = mainProgressBar;
49 | this.setName(threadName);
50 | }
51 |
52 | public void closeStream() throws IOException {
53 | if (inputStream != null) {
54 | inputStream.close();
55 | inputStream = null;
56 | }
57 | if (outputStream != null) {
58 | outputStream.close();
59 | outputStream = null;
60 | }
61 | }
62 |
63 | public String getPath() {
64 | return path;
65 | }
66 |
67 | public String getUrl() {
68 | return url;
69 | }
70 |
71 | public BufferedInputStream getInputStream() {
72 | return inputStream;
73 | }
74 |
75 | public BufferedOutputStream getOutputStream() {
76 | return outputStream;
77 | }
78 |
79 | public Album getAlbum() {
80 | return album;
81 | }
82 |
83 | @Override
84 | public void run() {
85 | while (true) {
86 | int listSize;
87 | synchronized (imageURLList) {
88 | //判断是否有图片需要下载
89 | if (imageURLList.size() != 0) {
90 | url = imageURLList.get(0);
91 | listSize = imageURLList.size() - 1;
92 | imageURLList.remove(url);
93 | }else{
94 | break;
95 | }
96 | }
97 | try {
98 | int state = downloadImage(url, path, album.getIsPrivateAlbum(), LoginUtils.IS_LOGIN);
99 | if (state == Common.IMAGE_DOWNLOAD_STATUS_EXISTS) {
100 | Console.print(this.getName() + " - 图片已存在(" + (imageCount - listSize) + "/" + imageCount + "):" + url);
101 | }
102 | if(state == Common.IMAGE_DOWNLOAD_STATUS_FINISH){
103 | Console.print(this.getName() + " - 图片下载完成(" + (imageCount - listSize) + "/" + imageCount + "):" + url);
104 | }
105 | if(state == Common.IMAGE_DOWNLOAD_STATUS_URL_NOT_EXISTS){
106 | Console.print(this.getName() + " - 图片不存在(" + (imageCount - listSize) + "/" + imageCount + "):" + url);
107 | }
108 | if (state == Common.IMAGE_DOWNLOAD_STATUS_DOWNLOAD_FAIL) {
109 | Console.print(this.getName() + " - 图片下载异常,已下载文件小于网络资源大小(" + (imageCount - listSize) + "/" + imageCount + "):" + url);
110 | //加入下载异常集合,待重试
111 | DownloadFailManager.add(url, path);
112 | }
113 | DownloadManager.updateCount += 1;
114 | } catch (Exception e) {
115 | if (!e.getClass().equals(FileNotFoundException.class)) {
116 | Console.print("图片下载失败:" + url + " - " + e.getMessage());
117 | //删除下载失败图片,并将图片信息加入失败文件集合
118 | DownloadFailManager.add(url, path);
119 | e.printStackTrace();
120 | }else{
121 | Console.print("图片不存在:" + url + " - " + e.getMessage());
122 | e.printStackTrace();
123 | }
124 | }finally{
125 | synchronized (mainProgressBar) {
126 | mainProgressBar.setValue(mainProgressBar.getValue() + 1);
127 | }
128 | }
129 | }
130 | }
131 |
132 | /**
133 | * 下载图片 - 是否私有相册、是否登陆默认值为false
134 | * @param url
135 | * @param filePath
136 | * @return
137 | * @throws MalformedURLException
138 | * @throws FileNotFoundException
139 | * @throws IOException
140 | */
141 | public int downloadImage(String url, String filePath) throws MalformedURLException, FileNotFoundException, IOException{
142 | return downloadImage(url, filePath, false, false);
143 | }
144 |
145 |
146 | /**
147 | * 下载图片
148 | * @param url
149 | * @param filePath
150 | * @throws MalformedURLException
151 | * @throws FileNotFoundException
152 | * @throws IOException
153 | */
154 | public int downloadImage(String url, String filePath, boolean isPrivateAlbum, boolean isLogin) throws MalformedURLException, FileNotFoundException, IOException{
155 |
156 | //私密相册,且已登录,生成请求所需的Cookie信息
157 | String cookieStr = null;
158 | if (isPrivateAlbum && isLogin) {
159 | cookieStr = LoginUtils.getCookiesStr(LoginUtils.CHROME_DRIVER);
160 | }
161 |
162 | //相册下载处理
163 | String fileName = url.substring(url.lastIndexOf('/'));
164 | File file = new File(filePath + File.separatorChar + fileName);
165 | if(file.exists()) {
166 | //如果本地文件已存在,不执行下载,返回已存在标识
167 | return Common.IMAGE_DOWNLOAD_STATUS_EXISTS;
168 | }else {
169 | //如果本地图不存在,执行下载
170 | //判断网络资源是否存在
171 | if (URLUtils.exists(url, cookieStr)) {
172 | //执行下载
173 | //配置网络资源
174 | URL image = new URL(url);
175 | HttpURLConnection conn = (HttpURLConnection) image.openConnection();
176 |
177 | //2016-03-16 如不加referer信息,下载影人相册时,大图监测返回403异常
178 | conn.setRequestProperty("referer", "https://www.douban.com/");
179 | //2024-06-23 新增参数,不加会报403
180 | conn.setRequestProperty("User-Agent", URLUtils.randomUserAgentStr());
181 | //2024-10-02 设置通过selenium模拟登陆后获取的Cookie值
182 | if (cookieStr != null) {
183 | conn.setRequestProperty("Cookie", cookieStr);
184 | }
185 |
186 | conn.setConnectTimeout(10*1000); //设置连接超时
187 | conn.setReadTimeout(10*1000); //设置读取超时
188 | conn.setDoInput(true); //默认为true
189 | conn.connect();
190 | //获取网络资源文件大小
191 | long contentLength = conn.getContentLengthLong();
192 |
193 | InputStream in = conn.getInputStream();
194 | inputStream = new BufferedInputStream(in);
195 | outputStream = new BufferedOutputStream(new FileOutputStream(file));
196 | byte[] data = new byte[2048];
197 | int n = 0;
198 | while ((n = inputStream.read(data)) != -1) {
199 | outputStream.write(data,0,n);
200 | }
201 | outputStream.flush();
202 | in.close();
203 | inputStream.close();
204 | outputStream.close();
205 | conn.disconnect();
206 |
207 | //验证文件大小
208 | if(file.length() < contentLength) {
209 | // 图片下载异常,已下载文件小于网络资源大小
210 | return Common.IMAGE_DOWNLOAD_STATUS_DOWNLOAD_FAIL;
211 | }else {
212 | // 下载完成
213 | return Common.IMAGE_DOWNLOAD_STATUS_FINISH;
214 | }
215 | }else{
216 | return Common.IMAGE_DOWNLOAD_STATUS_URL_NOT_EXISTS;
217 | }
218 | }
219 |
220 | }
221 |
222 | public static void main(String[] args) throws MalformedURLException, FileNotFoundException, IOException {
223 | //免登陆图片下载测试
224 | // WebDriver driver = LoginUtils.onlyInit();
225 | // LoginUtils.CHROME_DRIVER = driver;
226 | // DownloadThread thread = new DownloadThread();
227 | //
228 | // String url = "https://img9.doubanio.com/view/photo/l/public/p881707594.jpg";
229 | // thread.downloadImage(url, "/Users/blackgray/Downloads/selenium/", true, true);
230 |
231 |
232 | //登陆后图片下载测试
233 | LoginUtils.login();
234 | DownloadThread thread = new DownloadThread();
235 |
236 | String url = "https://simg.douban.com/view/photo/m/tKu8-lZS3kZdc5E9Fgcrww/2745512/x2196275895.jpg";
237 | thread.downloadImage(url, "/Users/blackgray/Downloads/selenium/", true, true);
238 | }
239 |
240 | }
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/handler/handler/OnlineAlbumHandler.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.handler.handler;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.File;
6 | import java.io.FileReader;
7 | import java.io.FileWriter;
8 | import java.io.IOException;
9 | import java.util.ArrayList;
10 | import java.util.Collections;
11 | import java.util.HashMap;
12 | import java.util.List;
13 | import java.util.Map;
14 |
15 | import cn.blackgray.douban.album.download.common.Common;
16 | import cn.blackgray.douban.album.download.common.Console;
17 | import cn.blackgray.douban.album.download.common.utils.URLUtils;
18 | import cn.blackgray.douban.album.download.model.Album;
19 | import cn.blackgray.douban.album.download.model.BGImage;
20 | import cn.blackgray.douban.album.download.service.handler.AlbumHandler;
21 | import cn.blackgray.douban.album.download.service.image.ImageListComparator;
22 |
23 | /**
24 | * 活动相册处理器
25 | */
26 | public class OnlineAlbumHandler extends AlbumHandler {
27 |
28 | public static final int PAGE_SIZE_IMAGES_ONLINE = 90; //活动照片分页大小(一页30张图)
29 | public static final String PAGE_TAG = "start";
30 | public static final String IMAGE_NAME_REGEX = "p\\d+.(" + Common.IMAGE_TYPE + ")";
31 | public static final String ALBUM_URL_REGEX = "(http|https)://www.douban.com/online/\\d+/album/\\d+/";
32 |
33 | @Override
34 | public String getURLRegex() {
35 | return ALBUM_URL_REGEX;
36 | }
37 |
38 | @Override
39 | public String getPageRegex() {
40 | return super.getAlbumURL() + "\\?\\w+=\\d+";
41 | }
42 |
43 | @Override
44 | public boolean removeURLParameter() {
45 | return true;
46 | }
47 |
48 | @Override
49 | public Integer getPageSize() {
50 | return PAGE_SIZE_IMAGES_ONLINE;
51 | }
52 |
53 | @Override
54 | public String getPageTag() {
55 | return PAGE_TAG;
56 | }
57 |
58 | @Override
59 | public String getImageNameRegex() {
60 | return IMAGE_NAME_REGEX;
61 | }
62 |
63 | @Override
64 | public String getRawURL(String imageURL) {
65 | return null;
66 | }
67 |
68 | @Override
69 | public void createBGImage(Album album, String source, String pageURL, String imageURL, Map map) {
70 |
71 | //
82 | String imageId = imageURL.substring(imageURL.lastIndexOf("/p") + 2,imageURL.lastIndexOf("."));
83 | String onlineId = pageURL.substring(pageURL.indexOf("online/") + 7, pageURL.indexOf("/album"));
84 |
85 | //【描述】
86 | String startIndexStr = "", ownerStartIndex));
99 | String ownerURL = "@@@@";
100 | try {
101 | ownerURL = ownerA.substring(0,ownerA.indexOf("/\">"));
102 | } catch (Exception e) {
103 | System.out.println("=====================");
104 | System.out.println(ownerA);
105 | System.out.println(ownerA.indexOf(""));
107 | System.out.println("=====================");
108 | e.printStackTrace();
109 | }
110 | String ownerName = ownerA.substring(ownerA.indexOf(">") + 1);
111 |
112 | //【照片评论数】
113 | String commentTatolStartIndexStr = "";
114 | int commentTatolStartIndex = source.indexOf(commentTatolStartIndexStr);
115 | Integer commentTatol = null;
116 | if (commentTatolStartIndex != -1) {
117 | //“3回应”
118 | String s = source.substring(commentTatolStartIndex + commentTatolStartIndexStr.length(), source.indexOf("",commentTatolStartIndex));
119 | commentTatol = Integer.valueOf(s.replace("回应", ""));
120 | }
121 |
122 |
123 | //【照片】
124 | imageURL = imageURL.replace("thumb", "photo").trim(); //thumb——>photo:缩略图——>大图
125 | desc = desc.replace("\\t\\n","").trim();
126 | if (!map.containsKey(imageURL)) {
127 | BGImage bgImage = new BGImage(desc, imageURL, commentTatol);
128 | bgImage.setOwnerName(ownerName);
129 | bgImage.setOwnerURL(ownerURL);
130 | map.put(imageURL, bgImage);
131 | }
132 | }
133 |
134 | @Override
135 | public boolean checkBGImage(BGImage bgImage) {
136 | return bgImage.getUrl().indexOf("albumicon") < 0;
137 | }
138 |
139 | @Override
140 | public void createDescDoc(Album album) {
141 | List imageList = album.getPhotosList();
142 | Map map = new HashMap();
143 | for (BGImage bgImage : imageList) {
144 | map.put(bgImage.getUrl(), bgImage);
145 | }
146 | List keyList = new ArrayList(map.keySet());
147 | //排序
148 | Collections.sort(keyList,new ImageListComparator());
149 | try {
150 | BufferedWriter bw = new BufferedWriter(new FileWriter(album.getPath() + "/" + Common.DEFAULT_DOC_NAME));
151 | //输出相册地址
152 | bw.write(URLUtils.charset + " " + album.getUrl() + " " +album.getDate().getTime() + " -");
153 | bw.newLine();
154 | //输出照片地址和描述
155 | for (int i = 0; i < keyList.size(); i++) {
156 | BGImage bgImage = map.get(keyList.get(i));
157 | Integer commentTotal = bgImage.getCommentTotal();
158 | String commentTotalStr = commentTotal==null?"-":String.valueOf(commentTotal);
159 | bw.write((i + 1) + " " + keyList.get(i) + " " + commentTotalStr + " " + bgImage.getDesc());
160 | bw.newLine();
161 | //活动相册,输出用户名&主页地址
162 | bw.write(bgImage.getOwnerURL() + " " + bgImage.getOwnerName());
163 | bw.newLine();
164 | }
165 | bw.flush();
166 | bw.close();
167 | Console.print("生成描述文档:成功");
168 | } catch (IOException e) {
169 | Console.print("生成描述文档:失败");
170 | e.printStackTrace();
171 | }
172 | }
173 |
174 |
175 | @Override
176 | public List getBGImageFromDescDoc(File descFile) throws IOException {
177 | List list = new ArrayList();
178 | BufferedReader reader = new BufferedReader(new FileReader(descFile));
179 | BGImage tempBGImage = null;
180 | int line = 0;
181 | String str;
182 | while ((str = reader.readLine()) != null) {
183 | if (line == 0) {
184 | line++;
185 | }else{
186 | //0行为相册&页面信息,奇数行照片信息,偶数行用户信息
187 | if (line%2 == 1) {
188 | String[] info = str.split(" ",4);
189 | //info[0],info[1],info[2],info[3]分别为照片编号、原始URL地址、评论数、照片描述
190 | tempBGImage = new BGImage(info[0],info[1],info[3]);
191 | //评论数
192 | if (!info[2].equals("-")) {
193 | tempBGImage.setCommentTotal(Integer.valueOf(info[2]));
194 | }
195 | line++;
196 | }else{
197 | String[] info = str.split(" ",2);
198 | //info[0],info[1]分别为用户首页URL、用户名
199 | BGImage bgImage = tempBGImage;
200 | bgImage.setOwnerURL(info[0]);
201 | bgImage.setOwnerName(info[1]);
202 | list.add(bgImage);
203 | line++;
204 | }
205 | }
206 | }
207 | reader.close();
208 | return list;
209 | }
210 |
211 | @Override
212 | public String getCommentURL(Album album, BGImage image) {
213 | //评论地址
214 | // http://www.douban.com/online/11127307/album/72416214/
215 | // http://www.douban.com/online/11127307/photo/1573338563/
216 | return album.getUrl().substring(0, album.getUrl().indexOf("/album/")) + "/photo/" + image.getId();
217 | }
218 |
219 | @Override
220 | public String getAlbumDesc(String source) {
221 | return null;
222 | }
223 |
224 | }
225 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/common/utils/LoginUtils.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.common.utils;
2 |
3 | import java.io.File;
4 | import java.util.Set;
5 |
6 | import javax.swing.ImageIcon;
7 | import javax.swing.JLabel;
8 |
9 | import org.openqa.selenium.By;
10 | import org.openqa.selenium.Cookie;
11 | import org.openqa.selenium.OutputType;
12 | import org.openqa.selenium.WebDriver;
13 | import org.openqa.selenium.WebElement;
14 | import org.openqa.selenium.chrome.ChromeDriver;
15 | import org.openqa.selenium.chrome.ChromeOptions;
16 |
17 | import cn.blackgray.douban.album.download.common.Common;
18 | import cn.blackgray.douban.album.download.common.Console;
19 | import cn.blackgray.douban.album.download.ui.LoginQrFrame;
20 | import cn.blackgray.douban.album.download.ui.MainFrame;
21 |
22 | /**
23 | * 登录工具类
24 | *
25 | * @author BlackGray
26 | * @createTime 2024-10-02 03:12:27
27 | */
28 | public class LoginUtils {
29 |
30 | public static WebDriver CHROME_DRIVER = null;
31 | //是否已登录,默认为false
32 | public static boolean IS_LOGIN = false;
33 | //豆瓣登录首页
34 | private static String DOUBAN_LOGIN_URL = "https://accounts.douban.com/passport/login";
35 | //是否正在执行登陆中
36 | public static boolean IS_LOGGING_IN = false;
37 | //chromedirver下载地址
38 | private static String CHROME_DRIVER_DOWNLOAD_URL = "https://googlechromelabs.github.io/chrome-for-testing/";
39 |
40 |
41 |
42 | /**
43 | * 初始化ChromeDriver
44 | */
45 | public static WebDriver initChromeDriver() {
46 | // 获取 chromedirver 的存放位置
47 | // String chromedriverPath = "/Users/blackgray/Downloads/chromedriver-mac-arm64/chromedriver";
48 | String chromedriverPath = Common.PATH_APP + File.separator + "chromedriver";
49 | if (CommonUtils.isWindows()) {
50 | chromedriverPath = chromedriverPath + ".exe";
51 | }
52 |
53 | //检查chromedriver文件是否存在
54 | File chromedriverFile = new File(chromedriverPath);
55 | if (chromedriverFile.exists()) {
56 | //若存在,执行初始化
57 | Console.print("检测chromedriver文件是否存在 - 存在");
58 |
59 | Console.print("加载chromedriver - " + chromedriverPath);
60 | System.getProperties().setProperty("webdriver.chrome.driver", chromedriverPath);
61 |
62 | ChromeOptions chromeOptions = new ChromeOptions();
63 |
64 | chromeOptions.addArguments("--no-sandbox"); // 不使用沙箱
65 | chromeOptions.addArguments("--disable-dev-shm-usage");
66 | // chromeOptions.addArguments("blink-settings=imagesEnabled=false"); //不加载图片
67 | chromeOptions.addArguments("--disable-gpu"); // 禁用GPU
68 | chromeOptions.addArguments("--remote-allow-origins=*");
69 |
70 | // 使用后台打开chrome的方式
71 | chromeOptions.addArguments("--headless");
72 | return new ChromeDriver(chromeOptions);
73 | }else {
74 | //若不存在,提醒下载、打开下载地址
75 | Console.print("检测chromedriver文件是否存在 - 不存在。");
76 | Common.openURLWithBrowse(CHROME_DRIVER_DOWNLOAD_URL, MainFrame.getInstance());
77 | Console.print("已使用默认浏览器打开chromedriver下载界面。");
78 | Console.print("请下载与自己Chrome大版本一致的chromedriver。");
79 | Console.print("下载后拷贝文件至下载器相同目录下,再重新执行登陆操作。");
80 | return null;
81 | }
82 | }
83 |
84 | /**
85 | * 登陆执行
86 | */
87 | public static void login() {
88 |
89 | Console.print("正在准备登陆,请稍等...");
90 |
91 | try {
92 | CHROME_DRIVER = initChromeDriver();
93 | if (CHROME_DRIVER != null) {
94 | // 1.模拟打开登陆页面
95 | String url = DOUBAN_LOGIN_URL;
96 | Console.print("打开登陆页面,URL:" + url);
97 | CHROME_DRIVER.get(url);
98 |
99 | // 2.等5秒钟响应后再操作,不然内容可能还没有返回
100 | Console.print("睡眠3s,等待页面加载完整。");
101 | Thread.sleep(3 * 1000l);
102 | Console.print("打页面标题:" + CHROME_DRIVER.getTitle());
103 | // System.out.println("========================");
104 | // System.out.println(driver.getPageSource());
105 | // System.out.println("========================");
106 | // webDriver.manage().timeouts().implicitlyWait(30, TimeUnit.SECONDS);
107 |
108 | // 3.获取二维码登录切换按钮
109 | WebElement loginTypeTab = CHROME_DRIVER.findElement(By.xpath("/html/body/div[1]/div[2]/div[2]/div/div[1]/div/div[1]/a[1]"));
110 | // 点击TAB,切换至密码登录表单
111 | loginTypeTab.click();
112 | Console.print("睡眠1s。");
113 | Thread.sleep(1 * 1000l);
114 | // Console.print("输出页面源码,检查二维码是否已生成。");
115 | // System.out.println(driver.getPageSource());
116 |
117 | // //4.获取二维码 - 保存到本地
118 | // //存储截图文件的文件夹位置
119 | // String dir = "/Users/blackgray/Downloads/selenium/";
120 | // //文件名
121 | // String imageFileName = "qr.png";
122 | // //文件完整路径
123 | // String imageFileFullName = dir + imageFileName;
124 | // //本地文件转为文件类型
125 | // File imageFile = new File(imageFileFullName);
126 | //
127 | // WebElement qrParent = CHROME_DRIVER.findElement(By.className("account-qr-scan"));
128 | // WebElement qr = qrParent.findElements(By.xpath("./*")).get(0);
129 | //
130 | // //将元素对应的部分截图并转为文件类型
131 | // File eleScreenShotFile = qr.getScreenshotAs(OutputType.FILE);
132 | // //将截图文件复制到本地文件
133 | // FileUtils.copyFile(eleScreenShotFile, imageFile);
134 |
135 | // 4.获取二维码
136 | // 本地文件转为文件类型
137 | WebElement qrParent = CHROME_DRIVER.findElement(By.className("account-qr-scan"));
138 | WebElement qr = qrParent.findElements(By.xpath("./*")).get(0);
139 | // 将元素对应的部分截图并转为文件类型 - selenium截图默认使用了Java临时文件,程序关闭,文件将自动删除
140 | File eleScreenShotFile = qr.getScreenshotAs(OutputType.FILE);
141 | Console.print("获取登陆二维码 - " + eleScreenShotFile.getAbsolutePath());
142 |
143 | // 5.显示二维码
144 | LoginQrFrame frame = new LoginQrFrame();
145 | ImageIcon qrImage = new ImageIcon(eleScreenShotFile.getAbsolutePath());
146 | JLabel lable = new JLabel(qrImage);
147 | lable = new JLabel(qrImage);
148 | frame.setLayout(null);
149 | frame.add(lable);
150 | lable.setBounds(0, 0, frame.getWidth(), frame.getHeight() - 40);
151 |
152 | // 显示界面
153 | MainFrame mainFrame = MainFrame.getInstance();
154 | frame.setLocationRelativeTo(mainFrame);
155 | frame.setLocation(mainFrame.getLocation().x + 400, mainFrame.getLocation().y + 80);
156 | frame.setVisible(true);
157 | frame.revalidate();
158 | frame.setResizable(false);
159 |
160 | Console.print("等待使用豆瓣APP扫描二维码登录。");
161 | //检查是否登陆成功
162 | //设置登录中标识
163 | LoginUtils.IS_LOGGING_IN = true;
164 | new Thread(new Runnable() {
165 | @Override
166 | public void run() {
167 | //定时检查页面,确认是否登陆成功后跳转首页
168 | while (true) {
169 | if (LoginUtils.IS_LOGGING_IN) {
170 | try {
171 | Console.print("检查是否已登录 - 未登录。");
172 | if (!LoginUtils.CHROME_DRIVER.getCurrentUrl().equals(DOUBAN_LOGIN_URL)) {
173 | // 是否已登录标识更新
174 | LoginUtils.IS_LOGIN = true;
175 | Console.print("登陆成功,可开始执行下载。");
176 | //取消登录中标识
177 | LoginUtils.IS_LOGGING_IN = false;
178 | frame.setVisible(false);
179 | //若有录入相册地址,自动开始下载
180 | MainFrame mainFrame = MainFrame.getInstance();
181 | if (mainFrame.albumTextArea.getText().length() > 0) {
182 | mainFrame.downloadBtn.doClick();
183 | }
184 | break;
185 | }
186 | Thread.sleep(1 * 1000);
187 | } catch (InterruptedException e) {
188 | e.printStackTrace();
189 | }
190 | }else {
191 | Console.print("取消登录。");
192 | break;
193 | }
194 | }
195 | }
196 | }).run();
197 | }
198 | } catch (Exception e) {
199 | e.printStackTrace();
200 | }
201 | }
202 |
203 | /**
204 | * 获取Cookie字符串
205 | * @param cookies
206 | * @return
207 | */
208 | public static String getCookiesStr(WebDriver driver) {
209 | Set cookies = driver.manage().getCookies();
210 | String cookieStr = convertCookiesToString(cookies);
211 | // System.out.println("【CookiesStr】" + cookieStr);
212 | return cookieStr;
213 | }
214 |
215 | /**
216 | * Cookie集合转为字符串
217 | * @param cookies
218 | * @return
219 | */
220 | private static String convertCookiesToString(Set cookies) {
221 | StringBuilder cookieBuilder = new StringBuilder();
222 | for (Cookie cookie : cookies) {
223 | if (cookieBuilder.length() > 0) {
224 | cookieBuilder.append("; ");
225 | }
226 | cookieBuilder.append(cookie.getName()).append("=").append(cookie.getValue());
227 | }
228 | return cookieBuilder.toString();
229 | }
230 |
231 | }
232 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/common/utils/URLUtils.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.common.utils;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.IOException;
5 | import java.io.InputStreamReader;
6 | import java.io.UnsupportedEncodingException;
7 | import java.net.HttpURLConnection;
8 | import java.net.MalformedURLException;
9 | import java.net.ProtocolException;
10 | import java.net.URL;
11 | import java.util.Random;
12 |
13 | import org.openqa.selenium.WebDriver;
14 |
15 | import cn.blackgray.douban.album.download.common.Common;
16 | import cn.blackgray.douban.album.download.common.Console;
17 |
18 |
19 | /**
20 | * 网络资源相关工具类
21 | * @author BlackGray
22 | */
23 | public class URLUtils {
24 |
25 |
26 | public static String charset = Common.CHARTSET_UTF8;
27 |
28 | /**
29 | * 获取页面源码
30 | * @param url
31 | * @return
32 | */
33 | public static String readSource(String url){
34 |
35 | StringBuffer sb = new StringBuffer();
36 |
37 | //判断是否已登录
38 | if (LoginUtils.IS_LOGIN) {
39 | //若已登录使用CHROME_DRIVER获取源码
40 | WebDriver driver = LoginUtils.CHROME_DRIVER;
41 | driver.get(url);
42 | Console.print("睡眠2s,等待页面加载完整。");
43 | try {
44 | Thread.sleep(2*1000l);
45 | } catch (InterruptedException e) {
46 | e.printStackTrace();
47 | }
48 |
49 | // System.out.println("获取页面源码。");
50 | String pageSource = driver.getPageSource();
51 |
52 | // System.out.println("输出页面源码。");
53 | // System.out.println(pageSource);
54 |
55 | sb.append(pageSource.toString());
56 |
57 | }else {
58 | //若未登录,使用HttpURLConnection获取源码
59 | //获取页面源码
60 | try {
61 | //代理
62 | // SocketAddress add = new InetSocketAddress("203.66.187.246", 81);
63 | // Proxy p = new Proxy(Proxy.Type.HTTP , add);
64 | // HttpURLConnection connection = (HttpURLConnection) u.openConnection(p);
65 | // String headerKey = "Proxy-Authorization";
66 | // String headerValue = "Basic " + Base64.encode(user+":"+password);
67 | // conn.setRequestProperty(headerKey, headerValue);
68 |
69 | URL u = new URL(url);
70 | HttpURLConnection connection = (HttpURLConnection) u.openConnection();
71 | connection.setRequestProperty("User-Agent", randomUserAgentStr());
72 |
73 | //connection.setRequestProperty("referer", "https://www.douban.com/");
74 |
75 | //默认UTF-8读取
76 | BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream(),charset));
77 | String str;
78 | while ((str = reader.readLine()) != null) {
79 | sb.append(str);
80 | }
81 | reader.close();
82 | } catch (MalformedURLException e) {
83 | e.printStackTrace();
84 | }catch (UnsupportedEncodingException e) {
85 | e.printStackTrace();
86 | } catch (IOException e) {
87 | e.printStackTrace();
88 | }
89 | }
90 |
91 |
92 | String result = sb.toString();
93 | if (result.trim().length() == 0) {
94 | Console.print("源码获取失败:" + url);
95 | return result;
96 | }else{
97 | String charsetCheck = charset;
98 | //判断字符集
99 | if (result.indexOf(Common.CHARTSET_GBK) != -1) {
100 | charsetCheck = Common.CHARTSET_GBK;
101 | }else if(result.indexOf(Common.CHARTSET_GB2312) != -1){
102 | //GB2312
103 | charsetCheck = Common.CHARTSET_GB2312;
104 | }else{
105 | //utf-8
106 | charsetCheck = Common.CHARTSET_UTF8;
107 | }
108 | if (!charsetCheck.equals(charset)) {
109 | Console.print("字符集:" + charset + " -> " + charsetCheck);
110 | charset = charsetCheck;
111 | return readSource(url);
112 | }else{
113 | return result;
114 | }
115 | }
116 |
117 | }
118 |
119 |
120 | /**
121 | * 随机生成user-agent字符串
122 | */
123 | public static String randomUserAgentStr() {
124 |
125 | Random r = new Random();
126 | Integer chromeVesionFirstNum = r.nextInt(14) + 100;
127 | Integer chromeVesionThirdNum = r.nextInt(3800);
128 | Integer chromeVesionFourthNum = r.nextInt(140);
129 |
130 | String[] osTypeArray = new String[] {
131 | "(Windows NT 6.1; WOW64) ",
132 | "(Windows NT 10.0; WOW64) ",
133 | "(X11; Linux x86_64) ",
134 | "(Macintosh; Intel Mac OS X 10_15_7) "
135 | };
136 |
137 | String result = "";
138 | result += "Mozilla/5.0 " ;
139 | result += osTypeArray[r.nextInt(3)] ;
140 | result += "AppleWebKit/537.36 (KHTML, like Gecko) ";
141 | result += "Chrome/" + chromeVesionFirstNum + ".0." + chromeVesionThirdNum + "." + chromeVesionFourthNum + " ";
142 | result += "Safari/537.36";
143 |
144 | return result;
145 | }
146 |
147 | /**
148 | * 判断URL资源是否存在
149 | * @param url
150 | * @return
151 | * @throws MalformedURLException
152 | * @throws ProtocolException
153 | * @throws IOException
154 | */
155 | public static boolean exists(String url) throws MalformedURLException, ProtocolException, IOException{
156 | return exists(url);
157 | }
158 |
159 | /**
160 | * 判断URL资源是否存在
161 | * @param url
162 | * @param cookie字符串
163 | * @return
164 | * @throws MalformedURLException
165 | * @throws ProtocolException
166 | * @throws IOException
167 | */
168 | public static boolean exists(String url, String cookieStr) throws MalformedURLException, ProtocolException, IOException{
169 | URL u = new URL(url);
170 | HttpURLConnection conn = (HttpURLConnection) u.openConnection();
171 | //允许重定向,否则部分图片无法下载
172 | HttpURLConnection.setFollowRedirects(true);
173 | conn.setInstanceFollowRedirects(true);
174 | /* 设置 URL 请求的方法, GET POST HEAD OPTIONS PUT DELETE TRACE 以上方法之一是合法的,具体取决于协议的限制。*/
175 | conn.setRequestMethod("GET");
176 | conn.setRequestProperty("User-Agent", URLUtils.randomUserAgentStr());
177 |
178 | //2016-03-16 如不加referer信息,下载影人相册时,大图监测返回403异常
179 | conn.setRequestProperty("referer", "https://www.douban.com/");
180 |
181 | //2024-10-02 如果有Cookie,设置参数。用于登录后页面访问。
182 | if (cookieStr != null && cookieStr != "") {
183 | conn.setRequestProperty("Cookie", cookieStr);
184 | }
185 |
186 | //=======信息=======
187 | // Map> map = conn.getHeaderFields();
188 | // for (Entry> element : map.entrySet()) {
189 | // System.out.println(element.getKey() + " = " + element.getValue());
190 | // }
191 | // System.out.println("getContentType = " + conn.getContentType());
192 | // System.out.println("getContentLength = " + conn.getContentLength());
193 | // System.out.println("getInstanceFollowRedirects = " + conn.getInstanceFollowRedirects());
194 | // System.out.println("getDefaultUseCaches = " + conn.getDefaultUseCaches());
195 | // System.out.println("getContentEncoding = " + conn.getContentEncoding());
196 | // System.out.println("getExpiration = " + conn.getExpiration());
197 | // System.out.println("getResponseCode = " + conn.getResponseCode());
198 | // System.out.println("getResponseMessage = " + conn.getResponseMessage());
199 | //=======/信息=======
200 | System.out.println("ResponseCode:" + conn.getResponseCode());
201 | if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) {
202 | return true;
203 | }else{
204 | return false;
205 | }
206 | }
207 |
208 |
209 | public static void main(String[] args) throws MalformedURLException, ProtocolException, IOException {
210 | // //资源验证
211 | // System.out.println(URLUtils.exists("http://img5.douban.com/view/photo/photo/public/p814563030.jpg"));
212 | // System.out.println("==========");
213 | // System.out.println(URLUtils.exists("http://img5.douban.com/view/photo/large/public/p814563030.jpg"));
214 | // System.out.println("==========");
215 | // System.out.println(URLUtils.exists("http://img5.douban.com/view/photo/large/public/p814563030.jpgx"));
216 |
217 | //获取页面源码
218 | // System.out.println(readSource("http://www.douban.com/photos/album/67952443/"));
219 | // System.out.println(readSource("https://www.douban.com/photos/album/120012756/"));
220 | // System.out.println(readSource("https://movie.douban.com/celebrity/1138320/photos/?start=0"));
221 | //2023-07-05:因豆瓣改版“用户相册首页”只支持登录后访问,程序无权限,无法获取相册清单。
222 | System.out.println(readSource("https://www.douban.com/people/BlackGray/photos"));
223 |
224 | // try {
225 | // System.out.println(URLUtils.exists("https://img1.doubanio.com/view/photo/raw/public/p2321685527.jpg"));
226 | // } catch (MalformedURLException e) {
227 | // // TODO Auto-generated catch block
228 | // e.printStackTrace();
229 | // } catch (ProtocolException e) {
230 | // // TODO Auto-generated catch block
231 | // e.printStackTrace();
232 | // } catch (IOException e) {
233 | // // TODO Auto-generated catch block
234 | // e.printStackTrace();
235 | // }
236 |
237 | //此前默认user-agent
238 | // System.out.println("Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36");
239 | //随机生成user-agent
240 | // System.out.println(randomUserAgentStr());
241 |
242 | }
243 |
244 | }
245 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/handler/handler/UserAlbumHandler.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.handler.handler;
2 |
3 | import java.util.Map;
4 | import java.util.regex.Matcher;
5 | import java.util.regex.Pattern;
6 |
7 | import cn.blackgray.douban.album.download.common.Common;
8 | import cn.blackgray.douban.album.download.common.Console;
9 | import cn.blackgray.douban.album.download.common.utils.URLUtils;
10 | import cn.blackgray.douban.album.download.model.Album;
11 | import cn.blackgray.douban.album.download.model.BGImage;
12 | import cn.blackgray.douban.album.download.service.handler.AlbumHandler;
13 |
14 | /**
15 | * 个人相册处理器
16 | */
17 | public class UserAlbumHandler extends AlbumHandler {
18 |
19 | //照片分页大小(一页18张图)
20 | public static final int PAGE_SIZE_IMAGES = 18;
21 | public static final String PAGE_TAG = "m_start";
22 |
23 | //若为私密相册,照片前缀为x,公共相册照片前缀为p
24 | public static final String IMAGE_NAME_REGEX = "(x|p)\\d+.(" + Common.IMAGE_TYPE + ")";
25 | public static final String ALBUM_URL_REGEX = "(http|https)://www.douban.com/photos/album/\\d+/";
26 |
27 | @Override
28 | public String getURLRegex() {
29 | return ALBUM_URL_REGEX;
30 | }
31 |
32 | @Override
33 | public String getPageRegex() {
34 | return super.getAlbumURL() + "\\?\\w+=\\d+";
35 | }
36 |
37 | @Override
38 | public boolean removeURLParameter() {
39 | return true;
40 | }
41 |
42 | @Override
43 | public Integer getPageSize() {
44 | return PAGE_SIZE_IMAGES;
45 | }
46 |
47 | @Override
48 | public String getPageTag() {
49 | return PAGE_TAG;
50 | }
51 |
52 | @Override
53 | public String getImageNameRegex() {
54 | return IMAGE_NAME_REGEX;
55 | }
56 |
57 | @Override
58 | public boolean hasRaw(Album album) {
59 | if (album.getIsPrivateAlbum()) {
60 | //私密相册没有原始大图,不做判断下载
61 | return false;
62 | }else {
63 | return true;
64 | }
65 | }
66 |
67 | @Override
68 | public String getRawURL(String imageURL) {
69 | //最初 - 没有原始图 - 已失效
70 | //http://img3.douban.com/view/photo/photo/public/p1932887553.jpg - 小图
71 | //http://img3.douban.com/view/photo/large/public/p1932887553.jpg - 大图
72 |
73 | //2017-11-06之前
74 | //小图
75 | //https://img1.doubanio.com/view/photo/m/public/p2504126549.webp
76 | //大图
77 | //https://img3.doubanio.com/view/photo/l/public/p2504126600.webp
78 | //原始图
79 | //https://img3.doubanio.com/view/photo/large/public/p2504126600.jpg
80 |
81 | //2017-11-06
82 | //原始图地址有变,地址中photo/large变为photo/raw
83 |
84 | //2024-10-02
85 | //检查豆瓣,发现页面中已经无法访问原始图,点击“查看大图”,显示的图与此前"l"类型图片大小一致。
86 | //但通过程序仍能下载原始大图,地址中photo/l,改为photo/raw。
87 | //若在界面中访问原始大图,需手动调整地址,地址中photo/l,改为photo/large。
88 | //很迷惑的操作,不理解。豆瓣内部有问题?
89 |
90 | return imageURL.replace("photo/l", "photo/raw").trim();
91 | }
92 |
93 | @Override
94 | public void createBGImage(Album album, String source, String pageURL, String imageURL, Map map) {
95 |
96 | //【1】若未做过判断,判断是否为私密相册
97 | if (!album.getIsPrivateAlbum()) {
98 | //通过图片前缀判断,x为私密,p为公开
99 | String imageNameRegexToPrivate = "x\\d+.(" + Common.IMAGE_TYPE + ")";
100 | String imageName = imageURL.substring(imageURL.lastIndexOf("/") + 1);
101 | if (imageName.matches(imageNameRegexToPrivate)) {
102 | //私密相册
103 | album.setIsPrivateAlbum(true);
104 | }
105 | }
106 |
107 |
108 |
109 | //【2】获取照片描述、评论信息
110 | //获取照片ID
111 | String imageId;
112 | if (album.getIsPrivateAlbum()) {
113 | //私密相册处理实现
114 | imageId = imageURL.substring(imageURL.lastIndexOf("/x") + 2,imageURL.lastIndexOf("."));
115 | }else {
116 | //公共相册处理实现
117 | imageId = imageURL.substring(imageURL.lastIndexOf("/p") + 2,imageURL.lastIndexOf("."));
118 | }
119 | //照片描述
120 | String descStartIndexStr = "",descStartIndex));
125 | }else{
126 | desc = "";
127 | }
128 | //照片评论
129 | String commentTatolStartIndexStr = "";
130 | int commentTatolStartIndex = source.indexOf(commentTatolStartIndexStr);
131 | Integer commentTatol = null;
132 | if (commentTatolStartIndex != -1) {
133 | //“3回应”
134 | String s = source.substring(commentTatolStartIndex + commentTatolStartIndexStr.length(), source.indexOf("",commentTatolStartIndex));
135 | commentTatol = Integer.valueOf(s.replace("回应", ""));
136 | }
137 |
138 |
139 | //【3】获取照片地址
140 | if (album.getIsPrivateAlbum()) {
141 | //私密相册处理实现
142 | //如果是私密相册,涉及每张照片地址单独获取,时间较长,输出日志明细
143 | Console.print("---------------------------------------");
144 | album.setPrivatePhotoURLAnalyzeTotal(album.getPrivatePhotoURLAnalyzeTotal() + 1);
145 | Console.print("获取私密相册照片地址开始 - " + album.getPrivatePhotoURLAnalyzeTotal() + " - " + pageURL);
146 |
147 | //根据照片ID生成照片单独页面URL
148 | String photoPageURL = "https://www.douban.com/photos/photo/" + imageId;
149 | Console.print("获取私密相册照片页面源码 - " + photoPageURL);
150 | String privatePhotoPageSource = URLUtils.readSource(photoPageURL);
151 |
152 | //获取照片URL
153 | imageURL = getPrivatePhotoURL(privatePhotoPageSource);
154 | if (imageURL != null) {
155 | Console.print("获取私密照片URL成功 - " + imageURL);
156 | }else{
157 | Console.print("获取私密照片URL失败 - 无有效照片URL");
158 | }
159 |
160 | }else {
161 | //公共相册处理实现
162 | /**
163 | * 2024-10-02 增加备注
164 | * 个人相册分公开相册、私有相册
165 | * 公开相册缩略图地址示例:https://img3.doubanio.com/view/photo/m/public/p881668782.jpg
166 | * 私有相册缩略图地址示例:https://simg.douban.com/view/photo/m/-urE8H6nKPAUXOxX_PVPWA/2745512/x2196276230.jpg
167 | *
168 | * 图片名称公开的前缀为p,私有的为x。
169 | *
170 | * 私有照片大图地址示例:https://simg.douban.com/view/photo/l/6NrUwD0fG4RjxvP7FjkzLA/2745512/x2196276230.jpg
171 | * 地址中包含随机生成字符串"6fph28J_c7MrPu9_LQT0Pw",每张图片值不同。
172 | * 且字符串只在明细界面可见,在清单界面中无信息。
173 | * 导致无法通过简单修改固定字符串方式推测大图地址。
174 | *
175 | * 私有照片原始大图示例:https://simg.douban.com/view/photo/l/6NrUwD0fG4RjxvP7FjkzLA/2745512/x2196276230.jpg
176 | * 和大图地址相同?与老图片没有RAW原图有关?待检查其他图片示例
177 | * 检查豆瓣,发现页面中已经无法访问原始图,点击“查看大图”,显示的图与此前"l"类型图片大小一致。
178 | * 但通过程序仍能下载原始大图。
179 | */
180 | //推测大图地址,加入集合。若为私密相册,无法通过此方式推测地址。
181 | //m——>l:缩略图——>大图
182 | imageURL = imageURL.replace("photo/m", "photo/l").trim();
183 | }
184 |
185 | //【4】照片加入集合
186 | if (imageURL != null) {
187 | if (!map.containsKey(imageURL)) {
188 | desc = desc.replace("\\t\\n","").trim();
189 | map.put(imageURL, new BGImage(desc, imageURL, commentTatol));
190 | }else{
191 | //标注相册首页
192 | BGImage bgImage = map.get(imageURL);
193 | if (bgImage.getCommentTotal()!=null && commentTatol==null) {
194 | commentTatol = bgImage.getCommentTotal();
195 | }
196 | map.put(imageURL, new BGImage("※" + bgImage.getDesc(), imageURL, commentTatol));
197 | }
198 | }
199 |
200 | }
201 |
202 |
203 | @Override
204 | public boolean checkBGImage(BGImage bgImage) {
205 | return bgImage.getUrl().indexOf("albumicon") < 0;
206 | }
207 |
208 | @Override
209 | public String getCommentURL(Album album, BGImage image) {
210 | //http://www.douban.com/photos/album/67952443/
211 | //http://www.douban.com/photos/photo/1560777504/
212 | return "http://www.douban.com/photos/photo/" + image.getId();
213 | }
214 |
215 | @Override
216 | public String getAlbumDesc(String source) {
217 | String startTag = "data-desc=\"";
218 | if (source.indexOf(startTag) != -1) {
219 | int startIndex = source.indexOf(startTag) + startTag.length();
220 | String desc = source.substring(startIndex,source.indexOf("\"", startIndex)).replace("\\t\\n","").trim();
221 | if (desc.indexOf("【") != -1) {
222 | desc = desc.substring(desc.lastIndexOf("】") + 1);
223 | }
224 | return desc;
225 | }else{
226 | return null;
227 | }
228 | }
229 |
230 |
231 | /**
232 | * 获取私密相册照片地址
233 | */
234 | private String getPrivatePhotoURL(String source) {
235 |
236 | //示例源码
237 | //view-source:https://www.douban.com/photos/photo/2196276230/
238 | //需获取的照片地址
239 | //https://simg.douban.com/view/photo/l/6NrUwD0fG4RjxvP7FjkzLA/2745512/x2196276230.jpg
240 |
241 | //截取出包含照片地址的代码段
242 | int beginIndex = source.indexOf("image-show-inner");
243 | int endIndex = source.indexOf("", beginIndex);
244 | source = source.substring(beginIndex, endIndex);
245 |
246 | //获取照片地址
247 | String regex = "(http|https)://(\\w|\\s|\\.|-|_|/)+[\\.](" + Common.IMAGE_TYPE + ")";
248 | Pattern p = Pattern.compile(regex);
249 | Matcher m = p.matcher(source);
250 | while (m.find()) {
251 | return m.group().trim();
252 | }
253 |
254 | return null;
255 | }
256 |
257 |
258 | }
259 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/download/DownloadProcessing.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.download;
2 |
3 | import java.io.File;
4 | import java.io.FileNotFoundException;
5 | import java.io.IOException;
6 | import java.util.ArrayList;
7 | import java.util.HashMap;
8 | import java.util.HashSet;
9 | import java.util.List;
10 | import java.util.Map;
11 | import java.util.Map.Entry;
12 | import java.util.Set;
13 |
14 | import javax.swing.JLabel;
15 | import javax.swing.JProgressBar;
16 |
17 | import cn.blackgray.douban.album.download.common.Common;
18 | import cn.blackgray.douban.album.download.common.Console;
19 | import cn.blackgray.douban.album.download.common.utils.FileUtils;
20 | import cn.blackgray.douban.album.download.model.Album;
21 | import cn.blackgray.douban.album.download.model.BGImage;
22 | import cn.blackgray.douban.album.download.service.handler.AlbumHandler;
23 | import cn.blackgray.douban.album.download.service.handler.PageAnalyzer;
24 | import cn.blackgray.douban.album.download.ui.MainFrame;
25 |
26 | /**
27 | * 下载处理工具类
28 | * @author BlackGray
29 | */
30 | public class DownloadProcessing {
31 |
32 | private static MainFrame mainFrame = MainFrame.getInstance();
33 |
34 | private static JProgressBar processUnitProgressBar = mainFrame.processUnitProgressBar; //处理单元进度条
35 | private static JLabel processUnitCountLabel = mainFrame.processUnitCountLabel; //处理单元进度条
36 |
37 |
38 |
39 | /**
40 | * 根据相册模型下载相册
41 | * @throws FileNotFoundException
42 | * @throws IOException
43 | */
44 | public static void downloadAlbum(Album album){
45 |
46 | //单相册下载时间
47 | long albumDownloadTime = System.currentTimeMillis();
48 | //更新统计
49 | int updateCount = 0;
50 |
51 | Map imageMap = new HashMap(); //【单相册全照片集合】
52 |
53 | //【启动处理单元】
54 | //处理单元总数
55 | int processUnitMax = new Double(Math.ceil((double)album.getPageURLLsit().size()/Common.PROCESS_UNIT_SIZE)).intValue();
56 | int processUnitNumber = 0; //处理单元计数
57 | processUnitProgressBar.setMaximum(processUnitMax);
58 | processUnitProgressBar.setValue(0);
59 | processUnitCountLabel.setText("0/" + processUnitMax + " ");
60 | for (int j = 0; j < processUnitMax; j++) {
61 | //处理单元耗时
62 | long processUnitTime = System.currentTimeMillis();
63 | //取出规定条数记录,执行处理单元
64 | List pageURLList = new ArrayList();
65 | int start = processUnitNumber * Common.PROCESS_UNIT_SIZE;
66 | int end = start + Common.PROCESS_UNIT_SIZE;
67 | if (end > album.getPageURLLsit().size()) {
68 | end = album.getPageURLLsit().size();
69 | }
70 | for (int k = start; k < end; k++) {
71 | pageURLList.add(album.getPageURLLsit().get(k));
72 | }
73 | //处理 - 返回更新信息
74 | updateCount += processUnit(processUnitNumber, album, imageMap, pageURLList);
75 |
76 | //处理单元计数+1
77 | processUnitNumber++;
78 | processUnitProgressBar.setValue(j + 1);
79 | processUnitCountLabel.setText((j + 1) + "/" + processUnitMax + " ");
80 | //【判断启动休眠】
81 | //处理单元数大于1,并且不是最后一次处理才执行休眠判断
82 | if (processUnitMax > 1 && (j + 1) != processUnitMax) {
83 | long t = System.currentTimeMillis() - processUnitTime;
84 | Console.print("处理单元耗时:" + (t/1000) + "s");
85 | if (t < Common.TIME_PROCESS_MIN) {
86 | Console.print("短时间访问页面次数过多,启动休眠~");
87 | Console.print("( ̄ε(# ̄)☆╰╮o( ̄皿 ̄///)");
88 | long c = Common.TIME_PROCESS_SLEEP;
89 | while (true) {
90 | if (c <= 0) {
91 | Console.print("[]~( ̄▽ ̄)~* ");
92 | break;
93 | }
94 | try {
95 | Thread.sleep(1000);
96 | Console.print("休眠倒计时:" + c + "\t (#°Д°)\"");
97 | } catch (InterruptedException e) {
98 | e.printStackTrace();
99 | }
100 | c--;
101 | }
102 | }
103 | }
104 | }
105 | //单相册处理完成,map赋值,提供后续操作使用
106 | album.setPhotosList(new ArrayList(imageMap.values()));
107 |
108 | //后续操作
109 | if (album.getPhotosList().size() != 0) {
110 | //【生成描述文件】
111 | album.createDescDoc();
112 | //【输出统计信息】
113 | Console.print("相册下载完成 - " + album.getName());
114 | Console.print(" 数量:" + album.getPhotosList().size());
115 | if (album.isUpdate()) {
116 | Console.print(" 新增:" + updateCount + "(张)");
117 | }
118 | Console.print(" 单相册耗时:" + (System.currentTimeMillis() - albumDownloadTime)/1000 + "s");
119 | }else{
120 | if (album.getIsVisibleToSelf()) {
121 | Console.print("提示:此相册已被设为“仅自己可见”,抱歉无法访问,无法下载。");
122 | Console.print("提示:若是自身相册,或自己账号具有访问权限,可尝试登陆后下载。");
123 | }else if(album.getIsVisibleToFriend()){
124 | Console.print("提示:此相册设置为仅朋友可见。你还不是他/她的朋友,所以无法访问。");
125 | Console.print("提示:若已是朋友,可尝试登陆后下载。");
126 | }else {
127 | Console.print("提示:失败或页面无图片,取消下载。");
128 | Console.print("提示:可能触发豆瓣反爬虫机制被禁IP,数小时后或第二天可恢复。");
129 | Console.print("提示:切勿打开多个软件同时下载,勿短时间内下载大量图片,容易被禁。");
130 | }
131 | }
132 |
133 | }
134 |
135 |
136 | /**
137 | * 处理单元
138 | * @param album
139 | * @param imageMap
140 | * @param pageURLList
141 | * @param processUnitMax
142 | * @param processUnitNumber
143 | * @return 图片更新数
144 | */
145 | private static int processUnit(int processUnitNumber, Album album, Map imageMap,List pageURLList){
146 | int update = 0;
147 | //【信息获取】
148 | Console.print("处理单元[" + processUnitNumber + "]:启动信息获取");
149 | Set imageURLSet = infoProcess(album, imageMap, pageURLList);
150 | //【下载图片】
151 | if(imageURLSet.size() == 0) {
152 | Console.print("处理单元[" + processUnitNumber + "]:页面未检测到图片");
153 | }else {
154 | //【创建目录】
155 | if(processUnitNumber == 0) {
156 | FileUtils.createDir(album);
157 | }
158 |
159 | Console.print("处理单元[" + processUnitNumber + "]:开始下载:" + album.getName() + "(" + imageURLSet.size() + "张)");
160 | update = DownloadManager.downloadImage(album, new ArrayList(imageURLSet),album.getPath());
161 |
162 | //【下载大图】
163 | AlbumHandler albumHandler = album.getAlbumHandler();
164 | if (Common.IS_DOWNLOAD_RAW && albumHandler.hasRaw(album)) {
165 | Console.print("处理单元[" + processUnitNumber + "]:检测并下载大图");
166 | //创建目录
167 | String path = album.getPath() + File.separatorChar + "raw";
168 | File file = new File(path);
169 | if (!file.exists()) {
170 | file.mkdir();
171 | }
172 | //【获取地址】
173 | //小站大图
174 | List list = new ArrayList();
175 | for (String url : imageURLSet) {
176 | list.add(albumHandler.getRawURL(url));
177 | }
178 | //执行下载
179 | update += DownloadManager.downloadImage(album, list,path);
180 | }
181 | }
182 | return update;
183 | }
184 |
185 |
186 | /**
187 | * 照片信息处理
188 | * @param imageMap
189 | * @param pageURLList
190 | * @param processUnitMax
191 | * @param processUnitNumber
192 | * @return
193 | */
194 | private static Set infoProcess(Album album, Map imageMap,List pageURLList){
195 |
196 | Set imageURLSet = new HashSet(); //需要下载的图片 - 单处理单元
197 |
198 | for (int i = 0; i < pageURLList.size(); i++) {
199 | Console.print("分析页面(" + (i + 1) + "/" + pageURLList.size() + "):" + pageURLList.get(i));
200 | //查询单页所有照片地址和描述,可能会出现错误,如果出错,尝试重新分析一次
201 | Map map = new HashMap();
202 | try {
203 | map = PageAnalyzer.findImageURLAndDesc(album, pageURLList.get(i));
204 | } catch (Exception e) {
205 | try {
206 | map = PageAnalyzer.findImageURLAndDesc(album, pageURLList.get(i));
207 | } catch (Exception e2) {
208 | Console.print("页面分析错误,下载失败:" + pageURLList.get(i));
209 | }
210 | e.printStackTrace();
211 | }
212 |
213 | //保存照片地址和描述信息
214 | for (Entry entry : map.entrySet()) {
215 | if (!imageMap.containsKey(entry.getKey())) {
216 | imageMap.put(entry.getKey(), entry.getValue());
217 | imageURLSet.add(entry.getKey());
218 | }else{
219 | //※每个页面都有相册封面照片的链接,扫描照片地址时,有可能每次都对这张照片进行处理
220 | //封面照片可能在任意一页中,获取描述信息时,除了所在页可以得到描述外,其他页的描述都为空
221 | //这里判断每页图片是否存在交集,存在,说明该图是封面照片
222 | //根据图片是否有描述,可以确定图片是否在该页
223 | if (imageMap.get(entry.getKey()).getDesc().equals("")) {
224 | //如果之前添加的这张图没有描述,并且当前图有描述,设置描述信息,并添加首页图片标识,否则不执行任何操作
225 | if (entry.getValue().getDesc().equals("")) {
226 | //如果不加photo.getValue().equals("")判断,可能出现的情况:
227 | //1 false ""
228 | //2 false "" -> ※
229 | //3 true "DESC" -> ※
230 | BGImage image = imageMap.get(entry.getKey());
231 | image.setDesc("※" + entry.getValue().getDesc());
232 | imageMap.put(entry.getKey(), image);
233 | }else{
234 | imageMap.put(entry.getKey(), entry.getValue());
235 | }
236 | }else {
237 | //如果之前添加的图有描述,添加首页图片标识
238 | BGImage bgImage = imageMap.get(entry.getKey());
239 | String desc = ("※" + bgImage.getDesc()).replaceAll("※+", "※");
240 | if (desc.equals("※")) {
241 | bgImage = entry.getValue();
242 | bgImage.setDesc("※" + bgImage.getDesc());
243 | }else{
244 | bgImage.setDesc(desc);
245 | }
246 | imageMap.put(entry.getKey(), bgImage);
247 | };
248 | };
249 | }
250 |
251 | try {
252 | //睡眠一定时间
253 | Thread.sleep(Common.TIME_ONE_PAGE_INFO_PROCESS_SLEEP);
254 | } catch (InterruptedException e) {
255 | e.printStackTrace();
256 | }
257 | }
258 | return imageURLSet;
259 | }
260 |
261 |
262 |
263 |
264 | }
265 |
--------------------------------------------------------------------------------
/src/main/java/cn/blackgray/douban/album/download/service/handler/handler/SitePublicAlbumHandler.java:
--------------------------------------------------------------------------------
1 | package cn.blackgray.douban.album.download.service.handler.handler;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.File;
6 | import java.io.FileReader;
7 | import java.io.FileWriter;
8 | import java.io.IOException;
9 | import java.util.ArrayList;
10 | import java.util.Collections;
11 | import java.util.HashMap;
12 | import java.util.List;
13 | import java.util.Map;
14 |
15 | import cn.blackgray.douban.album.download.common.Common;
16 | import cn.blackgray.douban.album.download.common.Console;
17 | import cn.blackgray.douban.album.download.common.utils.URLUtils;
18 | import cn.blackgray.douban.album.download.model.Album;
19 | import cn.blackgray.douban.album.download.model.BGImage;
20 | import cn.blackgray.douban.album.download.service.handler.AlbumHandler;
21 | import cn.blackgray.douban.album.download.service.image.ImageListComparator;
22 |
23 | /**
24 | * 小站公共相册处理器
25 | */
26 | public class SitePublicAlbumHandler extends AlbumHandler {
27 |
28 | // http://site.douban.com/127530/widget/public_album/7152314/
29 | // http://site.douban.com/127530/widget/public_album/7152314/?start=30
30 |
31 | public static final int PAGE_SIZE_IMAGES_SITE = 30; //小站公共相册照片分页大小(一页30张图)
32 | public static final String PAGE_TAG = "start";
33 | public static final String IMAGE_NAME_REGEX = "p\\d+.(" + Common.IMAGE_TYPE + ")";
34 | public static final String ALBUM_URL_REGEX = "(http|https)://site.douban.com/\\d+/widget/public_album/\\d+/";
35 |
36 | @Override
37 | public String getURLRegex() {
38 | return ALBUM_URL_REGEX;
39 | }
40 |
41 | @Override
42 | public String getPageRegex() {
43 | // /127530/widget/public_album/7152314/?start=30
44 | return "/widget/public_album/\\d+/\\?\\w+=\\d+";
45 | }
46 |
47 | @Override
48 | public boolean removeURLParameter() {
49 | return true;
50 | }
51 |
52 | @Override
53 | public Integer getPageSize() {
54 | return PAGE_SIZE_IMAGES_SITE;
55 | }
56 |
57 | @Override
58 | public String getPageTag() {
59 | return PAGE_TAG;
60 | }
61 |
62 | @Override
63 | public String getImageNameRegex() {
64 | return IMAGE_NAME_REGEX;
65 | }
66 |
67 |
68 | @Override
69 | public boolean hasRaw(Album album) {
70 | return false;
71 | }
72 |
73 | @Override
74 | public String getRawURL(String imageURL) {
75 | return null;
76 | }
77 |
78 | @Override
79 | public void createBGImage(Album album, String source, String pageURL, String imageURL, Map map) {
80 |
81 | //
82 | //
83 | //

84 | //
85 | //
86 | //
周庄
87 | // 来自
劝君莫打鸟
88 | //
1回应
89 | //
90 | //
91 | //
92 |
93 | String imageId = imageURL.substring(imageURL.lastIndexOf("/p") + 2,imageURL.lastIndexOf("."));
94 | String siteAlbumId = pageURL.substring(pageURL.indexOf("public_album/") + 13, pageURL.lastIndexOf("/"));
95 | //【描述】
96 | // 1回应
108 | String commentTatolStartIndexStr = pageURL.substring(0,pageURL.indexOf(siteAlbumId)) + siteAlbumId + "/photo/" + imageId + "/#comments\">";
109 | int commentTatolStartIndex = source.indexOf(commentTatolStartIndexStr);
110 | Integer commentTatol = null;
111 | if (commentTatolStartIndex != -1) {
112 | //“3回应”
113 | String s = source.substring(commentTatolStartIndex + commentTatolStartIndexStr.length(), source.indexOf("",commentTatolStartIndex));
114 | commentTatol = Integer.valueOf(s.replace("回应", ""));
115 | }
116 | //【所有者ID&主页】
117 | String ownerURL = null;
118 | String ownerName = null;
119 | if (descStartIndex != -1) {
120 | String ownerStartStr = "来自 ", ownerStartIndex));
123 | try {
124 | ownerURL = ownerA.substring(0,ownerA.indexOf("/\">"));
125 | } catch (Exception e) {
126 | System.out.println("=====================");
127 | System.out.println(ownerA);
128 | System.out.println(ownerA.indexOf(""));
130 | System.out.println("=====================");
131 | e.printStackTrace();
132 | }
133 | ownerName = ownerA.substring(ownerA.indexOf(">") + 1);
134 | }
135 | //【照片】
136 | //http://img3.douban.com/view/photo/thumb/public/p1748477871.jpg
137 | imageURL = imageURL.replace("thumb", "photo").trim(); //thumb——>photo:缩略图——>大图
138 | desc = desc.replace("\\t\\n","").trim();
139 | if (!map.containsKey(imageURL) || (map.containsKey(imageURL) && (map.get(imageURL).getOwnerURL()==null && ownerURL!=null))) {
140 | BGImage bgImage = new BGImage(desc, imageURL, commentTatol);
141 | bgImage.setOwnerURL(ownerURL);
142 | bgImage.setOwnerName(ownerName);
143 | map.put(imageURL, bgImage);
144 | }
145 |
146 | }
147 |
148 |
149 | @Override
150 | public boolean checkBGImage(BGImage bgImage) {
151 | return bgImage.getUrl().indexOf("albumicon") < 0;
152 | }
153 |
154 | @Override
155 | public String getCommentURL(Album album, BGImage image) {
156 | // http://site.douban.com/127530/widget/public_album/7152314/
157 | // http://site.douban.com/127530/widget/public_album/7152314/photo/1781015215/
158 | return album.getUrl() + "photo/" + image.getId();
159 | }
160 |
161 | @Override
162 | public String getAlbumDesc(String source) {
163 | String startTag = "data-desc=\"";
164 | if (source.indexOf(startTag) != -1) {
165 | int startIndex = source.indexOf(startTag) + startTag.length();
166 | String desc = source.substring(startIndex,source.indexOf("\"", startIndex)).replace("\\t\\n","").trim();
167 | if (desc.indexOf("【") != -1) {
168 | desc = desc.substring(desc.lastIndexOf("】") + 1);
169 | }
170 | return desc;
171 | }else{
172 | return null;
173 | }
174 | }
175 |
176 | @Override
177 | public void createDescDoc(Album album) {
178 | List imageList = album.getPhotosList();
179 | Map map = new HashMap();
180 | for (BGImage bgImage : imageList) {
181 | map.put(bgImage.getUrl(), bgImage);
182 | }
183 | List keyList = new ArrayList(map.keySet());
184 | //排序
185 | Collections.sort(keyList,new ImageListComparator());
186 | try {
187 | BufferedWriter bw = new BufferedWriter(new FileWriter(album.getPath() + "/" + Common.DEFAULT_DOC_NAME));
188 | //输出相册地址
189 | bw.write(URLUtils.charset + " " + album.getUrl() + " " +album.getDate().getTime() + " -");
190 | bw.newLine();
191 | //输出照片地址和描述
192 | for (int i = 0; i < keyList.size(); i++) {
193 | BGImage bgImage = map.get(keyList.get(i));
194 | Integer commentTotal = bgImage.getCommentTotal();
195 | String commentTotalStr = commentTotal==null?"-":String.valueOf(commentTotal);
196 | bw.write((i + 1) + " " + keyList.get(i) + " " + commentTotalStr + " " + bgImage.getDesc());
197 | bw.newLine();
198 | //活动相册,输出用户名&主页地址
199 | bw.write(bgImage.getOwnerURL() + " " + bgImage.getOwnerName());
200 | bw.newLine();
201 | }
202 | bw.flush();
203 | bw.close();
204 | Console.print("生成描述文档:成功");
205 | } catch (IOException e) {
206 | Console.print("生成描述文档:失败");
207 | e.printStackTrace();
208 | }
209 | }
210 |
211 |
212 | @Override
213 | public List getBGImageFromDescDoc(File descFile) throws IOException {
214 | List list = new ArrayList();
215 | BufferedReader reader = new BufferedReader(new FileReader(descFile));
216 | BGImage tempBGImage = null;
217 | int line = 0;
218 | String str;
219 | while ((str = reader.readLine()) != null) {
220 | if (line == 0) {
221 | line++;
222 | }else{
223 | //0行为相册&页面信息,奇数行照片信息,偶数行用户信息
224 | if (line%2 == 1) {
225 | String[] info = str.split(" ",4);
226 | //info[0],info[1],info[2],info[3]分别为照片编号、原始URL地址、评论数、照片描述
227 | tempBGImage = new BGImage(info[0],info[1],info[3]);
228 | //评论数
229 | if (!info[2].equals("-")) {
230 | tempBGImage.setCommentTotal(Integer.valueOf(info[2]));
231 | }
232 | line++;
233 | }else{
234 | String[] info = str.split(" ",2);
235 | //info[0],info[1]分别为用户首页URL、用户名
236 | BGImage bgImage = tempBGImage;
237 | bgImage.setOwnerURL(info[0]);
238 | bgImage.setOwnerName(info[1]);
239 | list.add(bgImage);
240 | line++;
241 | }
242 | }
243 | }
244 | reader.close();
245 | return list;
246 | }
247 |
248 | }
249 |
250 |
--------------------------------------------------------------------------------