├── .gitignore ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── github │ │ └── cptahck │ │ └── commons │ │ └── staticizer │ │ ├── constants │ │ └── StaticizerConstants.java │ │ ├── core │ │ ├── HtmlPage.java │ │ ├── JBrowserDriverStaticizer.java │ │ ├── StaticizerClient.java │ │ └── StaticizerClientFactory.java │ │ ├── exception │ │ ├── ApplicationException.java │ │ ├── AssertHelper.java │ │ └── StaticizerException.java │ │ └── utils │ │ ├── FileHelper.java │ │ └── JbrowserdriverHelper.java └── resources │ └── log4j.properties └── test ├── java └── com │ └── github │ └── cpthack │ └── commons │ └── staticizer │ └── StaticizerTest.java └── resources └── .gitignore /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /logs/ 3 | /.project 4 | /.classpath 5 | /.settings/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # java-staticizer 2 | 3 | 基于无界面浏览器jbrowserdriver实现的静态化组件,支持将AJAX页面静态化,并支持AngularJS及Vue.js等主流JS实现的页面静态化。API简洁,使用方便。 4 | 5 | ## 使用示例: 6 | 7 | > StaticizerTest.java代码引用 8 | 9 | ``` 10 | long startTime = System.currentTimeMillis(); 11 | String url = "http://guangzhou.jianzhimao.com";// 待静态化的目标地址 12 | String baseSavePath = "/Users/cptahck/workspace/jianzhimao/java-staticizer/src/test/resources/static-file/";// 静态化文件保存地址 13 | 14 | StaticizerClient client = StaticizerClientFactory.getClient(); 15 | logger.info("获取StaticizerClient操作对象>>>" + client); 16 | 17 | HtmlPage htmlPage = null; 18 | for (int i = 1; i < 2; i++) { 19 | 20 | logger.debug("准备静态化第[" + i + "]个文件."); 21 | for (int j = 0; j < 100; j++) { 22 | htmlPage = client.getPageSource(url); 23 | logger.info("请求目标地址并获取HtmlPage对象:" + j); 24 | logger.info("请求[" + url + "]后返回的状态,statusCode = [" + htmlPage.getStatusCode() + "]"); 25 | logger.info("请求[" + url + "]后返回的内容大小,size = [" + htmlPage.getPageSource().length() + "]"); 26 | } 27 | 28 | // logger.info("请求[" + url + "]后返回的内容,如下:\n" + htmlPage.getPageSource()); 29 | 30 | logger.info("成功设置本次静态文件保存地址及文件名"); 31 | htmlPage.setBaseSavePath(baseSavePath).asHtml("test".concat(i + ".html")); 32 | } 33 | logger.debug("请求耗时:" + (System.currentTimeMillis() - startTime) + " ms"); 34 | 35 | LinkedHashMap requestHeaders = new LinkedHashMap(); 36 | requestHeaders.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) 37 | Chrome/58.0.3029.81 Mobile Safari/537.36"); 38 | requestHeaders.put("Cookie", "ipcity=guangzhou; isw=1; isp=1; ism=1; UM_distinctid=15b991d8b504-0d54f3feabfe08-143d655c-13c680-15b991d8b51109; 39 | gr_user_id=759e16e4-e3f9-41c2-a693-e2ee6773bf6f; m_location_info=%2C%E5%B9%BF%E5%B7%9E%2C; CNZZDATA1254075128=1655583782-1492924156-%7C1492950101; 40 | Hm_lvt_c48dcbb8f7a6cb176845ad3439189ed0=1492924831; JSESSIONID=00C3088AC63C798A72FA9C6FF2C21FB0-n2.www; 1=189ADF1D3B328F58FD5BBABAEFA022F9; 41 | user_account=18826410953; user_pwd=UVAxQTVYN2oxc1k9; user_type=2; user_autologin=1; jobView_showType=2; jobView_p=1; 42 | Hm_lvt_447f87add4dbd73deca17a45d8536dbd=1492922502,1493304116; Hm_lpvt_447f87add4dbd73deca17a45d8536dbd=1493304191"); 43 | 44 | htmlPage = client.getPageSource(url, requestHeaders); 45 | //logger.info("请求到的内容:" + htmlPage.getPageSource()); 46 | 47 | ``` 48 | 49 | ## 具体参考: 50 | 51 | - 简单测试类参考:[StaticizerTest.java](https://github.com/cpthack/java-staticizer/blob/master/src/test/java/com/github/cpthack/commons/staticizer/StaticizerTest.java) 52 | - spring实现参考:[spring boot实现](https://github.com/cpthack/spring-staticizer-service) 53 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.github.cpthack.commons 5 | java-staticizer 6 | 0.0.1-SNAPSHOT 7 | 8 | 9 | 10 | log4j 11 | log4j 12 | 1.2.17 13 | 14 | 15 | org.slf4j 16 | slf4j-api 17 | 1.7.24 18 | 19 | 20 | org.slf4j 21 | slf4j-log4j12 22 | 1.7.21 23 | 24 | 25 | org.apache.commons 26 | commons-lang3 27 | 3.4 28 | 29 | 30 | com.google.guava 31 | guava 32 | 21.0 33 | 34 | 35 | 36 | com.machinepublishers 37 | jbrowserdriver 38 | 0.17.7 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | maven-compiler-plugin 49 | 50 | 1.8 51 | 1.8 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/constants/StaticizerConstants.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.constants; 17 | 18 | import java.nio.charset.Charset; 19 | import java.util.LinkedHashMap; 20 | 21 | import com.google.common.base.Charsets; 22 | 23 | /** 24 | * StaticizerConstants.java
25 | * 26 | *
27 |  * 静态化常量配置类
28 |  * 
29 | * 30 | * @author cpthack cpt@jianzhimao.com 31 | * @date Apr 28, 2017 9:06:37 AM 32 | * @since JDK 1.7 33 | */ 34 | public class StaticizerConstants { 35 | /** 36 | * 静态化文件默认存放目录 37 | */ 38 | public final static String BASE_SAVE_PATH = "/cpthack/staticizer/file/"; 39 | 40 | /** 41 | * 读写文件默认编码 42 | */ 43 | public final static Charset DEFAULT_CHARTSETS = Charsets.UTF_8; 44 | 45 | /** 46 | * 移动端默认请求头 47 | */ 48 | public final static LinkedHashMap MOBILE_REQUEST_HEADERS = new LinkedHashMap(); 49 | static { 50 | MOBILE_REQUEST_HEADERS.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Mobile Safari/537.36"); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/core/HtmlPage.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.core; 17 | 18 | import java.io.File; 19 | 20 | import com.github.cptahck.commons.staticizer.constants.StaticizerConstants; 21 | import com.github.cptahck.commons.staticizer.exception.AssertHelper; 22 | import com.github.cptahck.commons.staticizer.utils.FileHelper; 23 | 24 | /** 25 | * HtmlPage.java
26 | * 27 | *
28 |  * 网页静态后内容转化类
29 |  * 
30 | * 31 | * @author cpthack cpt@jianzhimao.com 32 | * @date Apr 27, 2017 11:47:47 PM 33 | * @since JDK 1.7 34 | */ 35 | public class HtmlPage { 36 | 37 | /** 38 | * 请求网页状态 39 | */ 40 | private int statusCode; 41 | 42 | /** 43 | * 静态化后的网页内容 44 | */ 45 | private String pageSource; 46 | 47 | /** 48 | * 静态化后网页保存的基础路径,示例:"/cpthack/temp/file/" 49 | */ 50 | private String baseSavePath; 51 | 52 | /** 53 | * 设置静态化后网页保存的基础路径 54 | */ 55 | public HtmlPage setBaseSavePath(String baseSavePath) { 56 | this.baseSavePath = baseSavePath; 57 | return this; 58 | } 59 | 60 | public File asHtml(String fileName) { 61 | AssertHelper.notBlank(pageSource, "The pageSource is Not Allow Null."); 62 | if (null == baseSavePath) { 63 | baseSavePath = StaticizerConstants.BASE_SAVE_PATH; 64 | } 65 | File htmlFile = FileHelper.newFile(baseSavePath, fileName); 66 | htmlFile = FileHelper.toHtml(htmlFile, pageSource); 67 | return htmlFile; 68 | } 69 | 70 | public int getStatusCode() { 71 | return statusCode; 72 | } 73 | 74 | public void setStatusCode(int statusCode) { 75 | this.statusCode = statusCode; 76 | } 77 | 78 | public String getPageSource() { 79 | return pageSource; 80 | } 81 | 82 | public void setPageSource(String pageSource) { 83 | this.pageSource = pageSource; 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/core/JBrowserDriverStaticizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.core; 17 | 18 | import java.util.LinkedHashMap; 19 | 20 | import com.github.cptahck.commons.staticizer.utils.JbrowserdriverHelper; 21 | import com.machinepublishers.jbrowserdriver.JBrowserDriver; 22 | 23 | /** 24 | * JBrowserDriverStaticizer.java
25 | * 26 | *
27 |  * 基于JBrowserDriver实现StaticizerClient
28 |  * 
29 | * 30 | * @author cpthack cpt@jianzhimao.com 31 | * @date Apr 28, 2017 10:55:01 AM 32 | * @since JDK 1.8 33 | */ 34 | public class JBrowserDriverStaticizer implements StaticizerClient { 35 | 36 | @Override 37 | public HtmlPage getMobilePageSource(String url) { 38 | JBrowserDriver driver = JbrowserdriverHelper.getMobileUrl(url); 39 | return toHtmlPage(driver); 40 | } 41 | 42 | @Override 43 | public HtmlPage getPageSource(String url) { 44 | return getPageSource(url, null); 45 | } 46 | 47 | @Override 48 | public HtmlPage getPageSource(String url, LinkedHashMap requestHeaders) { 49 | JBrowserDriver driver = JbrowserdriverHelper.getUrl(url, requestHeaders); 50 | return toHtmlPage(driver); 51 | } 52 | 53 | private HtmlPage toHtmlPage(JBrowserDriver driver) { 54 | HtmlPage htmlPage = new HtmlPage(); 55 | htmlPage.setStatusCode(driver.getStatusCode()); 56 | htmlPage.setPageSource(driver.getPageSource()); 57 | // Close the browser. Allows this thread to terminate. 58 | driver.quit(); 59 | return htmlPage; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/core/StaticizerClient.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.core; 17 | 18 | import java.util.LinkedHashMap; 19 | 20 | /** 21 | * StaticizerClient.java
22 | * 23 | *
24 |  * 静态化客户端操作接口类,定义了静态化操作的一系列接口
25 |  * 
26 | * 27 | * @author cpthack cpt@jianzhimao.com 28 | * @date Apr 27, 2017 10:37:55 PM 29 | * @since JDK 1.7 30 | */ 31 | public interface StaticizerClient { 32 | 33 | /** 34 | * 35 | * getMobilePageSource
36 | *
37 | * 38 | * 获取当前URL的静态页面内容,缺省请求头信息
39 | * 用于一些仅移动端访问的站点,你也可以通过调用getPageSource(String url, LinkedHashMap 40 | * requestHeaders)自定义请求头来实现 41 | * 42 | * @author cpthack cpt@jianzhimao.com 43 | * @param url 44 | * @return HtmlPage 45 | * 46 | */ 47 | HtmlPage getMobilePageSource(String url); 48 | 49 | /** 50 | * 51 | * getPageSource
52 | *
53 | * 54 | * 获取当前URL的静态页面内容,缺省请求头信息
55 | * 56 | * @author cpthack cpt@jianzhimao.com 57 | * @param url 58 | * 目标URL 59 | * @return HtmlPage 60 | * 61 | */ 62 | HtmlPage getPageSource(String url); 63 | 64 | /** 65 | * 66 | * getPageSource
67 | *
68 | * 69 | * 获取当前URL的静态页面内容,自定义请求头信息
70 | * 71 | * @author cpthack cpt@jianzhimao.com 72 | * @param url 73 | * 目标URL 74 | * @param requestHeaders 75 | * 请求头信息 76 | * @return HtmlPage 77 | * 78 | */ 79 | HtmlPage getPageSource(String url, LinkedHashMap requestHeaders); 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/core/StaticizerClientFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.core; 17 | 18 | /** 19 | * StaticizerClientFactory.java
20 | * 21 | *
22 |  * StaticizerClient对象工厂类
23 |  * 
24 | * 25 | * @author cpthack cpt@jianzhimao.com 26 | * @date Apr 28, 2017 2:59:49 PM 27 | * @since JDK 1.7 28 | */ 29 | public class StaticizerClientFactory { 30 | 31 | private final static StaticizerClient client = new JBrowserDriverStaticizer(); 32 | 33 | public static StaticizerClient getClient() { 34 | return client; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/exception/ApplicationException.java: -------------------------------------------------------------------------------- 1 | package com.github.cptahck.commons.staticizer.exception; 2 | 3 | /** 4 | * 自定义应用级别非受检异常 5 | * 6 | * @author cpthack cpt@jianzhimao.com 7 | * @date Jul 23, 2016 4:36:16 PM
8 | * @version 9 | * @since JDK 1.7 10 | */ 11 | public class ApplicationException extends RuntimeException { 12 | 13 | private static final long serialVersionUID = -9084561727097703075L; 14 | 15 | protected String code; 16 | 17 | protected String message; 18 | 19 | public ApplicationException() { 20 | super(); 21 | } 22 | 23 | /** 24 | * @param message 25 | */ 26 | public ApplicationException(String message) { 27 | super(message); 28 | this.message = message; 29 | } 30 | 31 | public ApplicationException(String code, String message){ 32 | super(code + " : " + message); 33 | this.code = code; 34 | this.message = message; 35 | } 36 | 37 | 38 | /** 39 | * @param t 40 | */ 41 | public ApplicationException(Throwable t) { 42 | super(t); 43 | } 44 | 45 | /** 46 | * @see java.lang.Throwable#fillInStackTrace() 47 | */ 48 | @Override 49 | public synchronized Throwable fillInStackTrace() { 50 | // return super.fillInStackTrace(); 51 | // 为了提高性能,不记录堆栈信息 52 | return null; 53 | } 54 | 55 | public String code() { 56 | return code; 57 | } 58 | 59 | public String message() { 60 | return message; 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/exception/AssertHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.exception; 17 | 18 | import org.apache.commons.lang3.StringUtils; 19 | 20 | /** 21 | * AssertHelper.java
22 | * TODO(这里用一句话描述这个类的作用)
23 | * 24 | * @author cpthack cpt@jianzhimao.com 25 | * @date 2017年4月14日 下午3:32:48 26 | * @since JDK 1.7 27 | */ 28 | public class AssertHelper { 29 | 30 | /** 31 | * 32 | * notNull
33 | * 34 | * When the param "object" is null ,Then throw new StaticizerException(message).
35 | * 36 | * @author cpthack cpt@jianzhimao.com 37 | * @param object 38 | * @param message 39 | * void 40 | * 41 | */ 42 | public static void notNull(Object object, String message) { 43 | if (null == object) { 44 | throw new StaticizerException(message); 45 | } 46 | } 47 | 48 | /** 49 | * 50 | * notBlank
51 | * 52 | * if a CharSequence is whitespace, empty ("") or null Then "throw new 53 | * StaticizerException(message)".
54 | * 55 | *
56 | 	 * AssertHelper.notBlank(null)      = throw new StaticizerException(message)
57 | 	 * AssertHelper.notBlank("")        = throw new StaticizerException(message)
58 | 	 * AssertHelper.notBlank(" ")       = throw new StaticizerException(message)
59 | 	 * AssertHelper.notBlank("bob")     = true
60 | 	 * AssertHelper.notBlank("  bob  ") = true
61 | 	 * 
62 | * 63 | * @author cpthack cpt@jianzhimao.com 64 | * @param cs 65 | * @param message 66 | * void 67 | * 68 | */ 69 | public static boolean notBlank(CharSequence cs, String message) { 70 | if (StringUtils.isBlank(cs)) { 71 | throw new StaticizerException(message); 72 | } 73 | return true; 74 | } 75 | 76 | /** 77 | * 78 | * isTrue
79 | * 80 | * When the param "isTrue" is flase ,Then throw new StaticizerException(message).
81 | * 82 | * @author cpthack cpt@jianzhimao.com 83 | * @param isTrue 84 | * @param message 85 | * void 86 | * 87 | */ 88 | public static void isTrue(boolean isTrue, String message) { 89 | if (!isTrue) { 90 | throw new StaticizerException(message); 91 | } 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/exception/StaticizerException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.exception; 17 | 18 | /** 19 | * StaticizerException.java
20 | * TODO(这里用一句话描述这个类的作用)
21 | * 22 | * @author cpthack cpt@jianzhimao.com 23 | * @date 2017年4月13日 下午8:36:23 24 | * @since JDK 1.7 25 | */ 26 | public class StaticizerException extends ApplicationException { 27 | 28 | private static final long serialVersionUID = -873631143382257801L; 29 | 30 | public StaticizerException(Throwable t) { 31 | super(t); 32 | } 33 | 34 | public StaticizerException(String message) { 35 | super(message); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/utils/FileHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.utils; 17 | 18 | import java.io.File; 19 | import java.io.IOException; 20 | 21 | import org.slf4j.Logger; 22 | 23 | import com.github.cptahck.commons.staticizer.constants.StaticizerConstants; 24 | import com.github.cptahck.commons.staticizer.exception.AssertHelper; 25 | import com.google.common.io.Files; 26 | 27 | /** 28 | * FileHelper.java
29 | * 30 | *
 31 |  * 文件操作辅助工具类
 32 |  * 
33 | * 34 | * @author cpthack cpt@jianzhimao.com 35 | * @date Apr 28, 2017 12:22:50 AM 36 | * @since JDK 1.7 37 | */ 38 | public class FileHelper { 39 | 40 | private static Logger logger = org.slf4j.LoggerFactory.getLogger(FileHelper.class); 41 | 42 | /** 43 | * 44 | * newFile
45 | *
46 | * 47 | * 创建文件
48 | * 49 | * @author cpthack cpt@jianzhimao.com 50 | * @param fileSavePath 51 | * @param fileName 52 | * @return File 53 | * 54 | */ 55 | public static File newFile(String fileSavePath, String fileName) { 56 | File file = new File(fileSavePath); 57 | if (!file.exists()) { 58 | file.mkdirs(); 59 | logger.warn("检测到静态文件目录不存在,初始化目录.filePath = [" + fileSavePath + "]"); 60 | } 61 | file = new File(fileSavePath + fileName); 62 | if (file.exists()) { 63 | logger.debug("文件已经存在,直接返回.fileName = [" + fileName + "]"); 64 | return file; 65 | } 66 | try { 67 | file.createNewFile(); 68 | logger.debug("创建静态文件成功.FileName = [" + fileSavePath + fileName + "]"); 69 | } 70 | catch (Exception e) { 71 | logger.error("创建静态文件失败,file = [" + fileSavePath + fileName + "]", e); 72 | } 73 | return file; 74 | } 75 | 76 | /** 77 | * 78 | * toHtml
79 | *
80 | * 81 | * 根据内容生成html文件
82 | * 83 | * @author cpthack cpt@jianzhimao.com 84 | * @param file 85 | * @param content 86 | * @return File 87 | * 88 | */ 89 | public static File toHtml(File writeFile, String content) { 90 | AssertHelper.isTrue(writeFile != null && writeFile.exists(), "生成HTML文件失败,写入的文件writeFile对象不能为空"); 91 | try { 92 | Files.write(content, writeFile, StaticizerConstants.DEFAULT_CHARTSETS); 93 | logger.info("生成静态文件成功.FilePath = " + writeFile.getAbsolutePath() + "]"); 94 | return writeFile; 95 | } 96 | catch (Exception e) { 97 | logger.error("生成HTML文件失败", e); 98 | } 99 | return null; 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/com/github/cptahck/commons/staticizer/utils/JbrowserdriverHelper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cptahck.commons.staticizer.utils; 17 | 18 | import java.util.LinkedHashMap; 19 | 20 | import com.github.cptahck.commons.staticizer.constants.StaticizerConstants; 21 | import com.github.cptahck.commons.staticizer.exception.AssertHelper; 22 | import com.machinepublishers.jbrowserdriver.JBrowserDriver; 23 | import com.machinepublishers.jbrowserdriver.RequestHeaders; 24 | import com.machinepublishers.jbrowserdriver.Settings; 25 | import com.machinepublishers.jbrowserdriver.UserAgent; 26 | 27 | /** 28 | * JbrowserdriverHelper.java
29 | * 30 | *
31 |  * 基于Jbrowserdriver封装的操作工具
32 |  * 
33 | * 34 | * @author cpthack cpt@jianzhimao.com 35 | * @date Apr 28, 2017 11:29:39 AM 36 | * @since JDK 1.8 37 | */ 38 | public class JbrowserdriverHelper { 39 | 40 | public static JBrowserDriver getMobileBrowserDriver() { 41 | return getBrowserDriver(StaticizerConstants.MOBILE_REQUEST_HEADERS); 42 | } 43 | 44 | public static JBrowserDriver getBrowserDriver(LinkedHashMap requestHeaders) { 45 | JBrowserDriver driver = null; 46 | Settings.Builder builder = Settings.builder().processes(100)// 设置当前运行的JBrowserDriver实例数量 47 | .blockAds(true)// 阻止对广告/垃圾邮件服务器的请求 48 | .quickRender(true)// 从渲染中排除网页图像和二进制数据,加快渲染速度 49 | .loggerLevel(null)// 关闭所有日志输出,提高性能 50 | .hostnameVerification(false)// 关于域名验证,提高性能 51 | .cache(true)// 开启缓存,类似真实浏览器的缓存功能 52 | .cacheEntries(10 * 10000)// 设置缓存数量 53 | .cacheEntrySize(20 * 1024 * 1024)// 设置缓存大小 20M 54 | .maxRouteConnections(20)// 最大并发连接(number of per process)to a specific主机+代理的组合 55 | .maxConnections(20)// 并发连接的最大数目(每个进程) 56 | // .userAgent(UserAgent.CHROME) 57 | ; 58 | 59 | if (requestHeaders != null) { 60 | builder = builder.requestHeaders(new RequestHeaders(requestHeaders)); 61 | } 62 | 63 | driver = new JBrowserDriver(builder.build()); 64 | AssertHelper.notNull(driver, "初始化JBrowserDriver失败."); 65 | return driver; 66 | } 67 | 68 | public static JBrowserDriver getMobileUrl(String url) { 69 | JBrowserDriver driver = getMobileBrowserDriver(); 70 | return getUrl(driver, url); 71 | } 72 | 73 | public static JBrowserDriver getUrl(String url, LinkedHashMap requestHeaders) { 74 | JBrowserDriver driver = getBrowserDriver(requestHeaders); 75 | return getUrl(driver, url); 76 | } 77 | 78 | private static JBrowserDriver getUrl(JBrowserDriver driver, String url) { 79 | driver.get(url); 80 | return driver; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN,error-log,console-log 2 | log4j.logger.com.github=DEBUG,debug-log 3 | 4 | 5 | log4j.appender.error-log=org.apache.log4j.DailyRollingFileAppender 6 | log4j.appender.error-log.File=logs/all_error.log 7 | log4j.appender.error-log.Threshold=ERROR 8 | log4j.appender.error-log.layout=org.apache.log4j.PatternLayout 9 | log4j.appender.error-log.layout.ConversionPattern=[%5p %t %d{yyyy-MM-dd HH:mm:ss,SSS} --%r(ms)]%n[%l]%n %m %n%n 10 | log4j.appender.error-log.DatePattern=yyyy-MM-dd'.log' 11 | 12 | log4j.appender.console-log=org.apache.log4j.ConsoleAppender 13 | log4j.appender.console-log.layout=org.apache.log4j.PatternLayout 14 | log4j.appender.console-log.layout.ConversionPattern=[%5p %t %d{yyyy-MM-dd HH:mm:ss,SSS} --%r(ms)]%n[%l]%n %m %n%n 15 | 16 | log4j.appender.debug-log=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.debug-log.File=logs/jiuwei_debug.log 18 | log4j.appender.debug-log.Threshold=DEBUG 19 | log4j.appender.debug-log.layout=org.apache.log4j.PatternLayout 20 | log4j.appender.debug-log.layout.ConversionPattern=[%5p %t %d{yyyy-MM-dd HH:mm:ss,SSS} --%r(ms)]%n[%l]%n %m %n%n 21 | log4j.appender.debug-log.DatePattern=yyyy-MM-dd'.log' 22 | -------------------------------------------------------------------------------- /src/test/java/com/github/cpthack/commons/staticizer/StaticizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2013-2020, cpthack 成佩涛 (cpt@jianzhimao.com). 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.github.cpthack.commons.staticizer; 17 | 18 | import java.util.LinkedHashMap; 19 | 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | import com.github.cptahck.commons.staticizer.core.HtmlPage; 24 | import com.github.cptahck.commons.staticizer.core.StaticizerClient; 25 | import com.github.cptahck.commons.staticizer.core.StaticizerClientFactory; 26 | 27 | /** 28 | * StaticizerTest.java
29 | * 30 | *
31 |  * StaticizerClient测试类
32 |  * 
33 | * 34 | * @author cpthack cpt@jianzhimao.com 35 | * @date Apr 28, 2017 3:09:57 PM 36 | * @since JDK 1.8 37 | */ 38 | public class StaticizerTest { 39 | private static Logger logger = LoggerFactory.getLogger(StaticizerTest.class); 40 | 41 | public static void main(String[] args) { 42 | long startTime = System.currentTimeMillis(); 43 | String url = "https://www.jianzhimao.com/ctrlindex/enterprise.html";// 待静态化的目标地址 44 | String baseSavePath = "/Users/cptahck/workspace/jianzhimao/java-staticizer/src/test/resources/static-file/";// 静态化文件保存地址 45 | 46 | StaticizerClient client = StaticizerClientFactory.getClient(); 47 | logger.info("获取StaticizerClient操作对象>>>" + client); 48 | 49 | HtmlPage htmlPage = null; 50 | for (int i = 1; i < 2; i++) { 51 | 52 | logger.debug("准备静态化第[" + i + "]个文件."); 53 | for (int j = 0; j < 1; j++) { 54 | htmlPage = client.getPageSource(url); 55 | logger.info("请求目标地址并获取HtmlPage对象:" + j); 56 | logger.info("请求[" + url + "]后返回的状态,statusCode = [" + htmlPage.getStatusCode() + "]"); 57 | logger.info("请求[" + url + "]后返回的内容大小,size = [" + htmlPage.getPageSource().length() + "]"); 58 | } 59 | 60 | // logger.info("请求[" + url + "]后返回的内容,如下:\n" + htmlPage.getPageSource()); 61 | 62 | logger.info("成功设置本次静态文件保存地址及文件名"); 63 | htmlPage.setBaseSavePath(baseSavePath).asHtml("test".concat(i + ".html")); 64 | } 65 | logger.debug("请求耗时:" + (System.currentTimeMillis() - startTime) + " ms"); 66 | 67 | LinkedHashMap requestHeaders = new LinkedHashMap(); 68 | requestHeaders.put("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Mobile Safari/537.36"); 69 | requestHeaders.put("Cookie", "ipcity=guangzhou; isw=1; isp=1; ism=1; UM_distinctid=15b991d8b504-0d54f3feabfe08-143d655c-13c680-15b991d8b51109; gr_user_id=759e16e4-e3f9-41c2-a693-e2ee6773bf6f; m_location_info=%2C%E5%B9%BF%E5%B7%9E%2C; CNZZDATA1254075128=1655583782-1492924156-%7C1492950101; Hm_lvt_c48dcbb8f7a6cb176845ad3439189ed0=1492924831; JSESSIONID=00C3088AC63C798A72FA9C6FF2C21FB0-n2.www; 1=189ADF1D3B328F58FD5BBABAEFA022F9; user_account=18826410953; user_pwd=UVAxQTVYN2oxc1k9; user_type=2; user_autologin=1; jobView_showType=2; jobView_p=1; Hm_lvt_447f87add4dbd73deca17a45d8536dbd=1492922502,1493304116; Hm_lpvt_447f87add4dbd73deca17a45d8536dbd=1493304191"); 70 | htmlPage = client.getPageSource(url, requestHeaders); 71 | //logger.info("请求到的内容:" + htmlPage.getPageSource()); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/test/resources/.gitignore: -------------------------------------------------------------------------------- 1 | /static-file/ 2 | --------------------------------------------------------------------------------