├── .classpath ├── .gitignore ├── .project ├── .settings ├── org.eclipse.core.resources.prefs ├── org.eclipse.jdt.core.prefs └── org.eclipse.m2e.core.prefs ├── README.md ├── lib ├── bouncycastle.provider-1.38-jdk15.jar ├── commons-cli-1.2.jar ├── commons-codec-1.9.jar ├── commons-collections-3.2.1.jar ├── commons-digester3-3.2.jar ├── commons-io-2.4.jar ├── commons-lang-2.4.jar ├── commons-logging-1.1.1.jar ├── gson-2.2.4.jar ├── guava-15.0.jar ├── jackson-core-asl-1.9.13.jar ├── jackson-mapper-asl-1.9.13.jar ├── javacsv-2.0.jar ├── jsoup.jar ├── junit-4.4.jar ├── odps-mapred-bridge-0.14.0-rc1.jar ├── odps-mapred-local-0.14.0-rc1.jar ├── odps-sdk-commons-0.14.0-rc1.jar ├── odps-sdk-core-0.14.0-rc1.jar ├── odps-sdk-lot-0.14.0-rc1.jar ├── odps-sdk-mapred-0.14.0-rc1.jar ├── odps-sdk-udf-0.14.0-rc1.jar ├── protobuf-java-2.4.1.jar └── velocity-1.7.jar ├── pom.xml └── src ├── main ├── java │ └── chaitin │ │ ├── phishing │ │ ├── HtmlParser.java │ │ ├── PhishingDetector.java │ │ ├── PhishingFeature.java │ │ ├── PhishingMapper.java │ │ ├── ScoreContent.java │ │ ├── ScoreDomain.java │ │ ├── ScoreForm.java │ │ └── ScoreTitle.java │ │ ├── utils │ │ ├── Base64.java │ │ ├── ChaitinRecord.java │ │ ├── Decoder.java │ │ ├── Gao.java │ │ ├── Multipart.java │ │ ├── Pair.java │ │ ├── ParseUrl.java │ │ ├── QueryString.java │ │ └── Unquote.java │ │ └── webshell │ │ ├── WebshellDetector.java │ │ ├── WebshellMapper.java │ │ ├── WebshellTokenizer.java │ │ └── parser │ │ ├── AspScore.java │ │ ├── AspTokenizer.rl │ │ ├── PhpScore.java │ │ └── PhpScore.rl └── resources │ ├── META-INF │ └── base.mapred.xml │ └── credential.properties └── test └── java └── chaitin └── test └── phishing └── ScoreDomainTest.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | temp 3 | warehouse 4 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | webshell 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.jdt.core.javanature 21 | org.eclipse.m2e.core.maven2Nature 22 | 23 | 24 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding//src/test/resources=UTF-8 6 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.7 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 12 | org.eclipse.jdt.core.compiler.source=1.7 13 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/README.md -------------------------------------------------------------------------------- /lib/bouncycastle.provider-1.38-jdk15.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/bouncycastle.provider-1.38-jdk15.jar -------------------------------------------------------------------------------- /lib/commons-cli-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-cli-1.2.jar -------------------------------------------------------------------------------- /lib/commons-codec-1.9.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-codec-1.9.jar -------------------------------------------------------------------------------- /lib/commons-collections-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-collections-3.2.1.jar -------------------------------------------------------------------------------- /lib/commons-digester3-3.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-digester3-3.2.jar -------------------------------------------------------------------------------- /lib/commons-io-2.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-io-2.4.jar -------------------------------------------------------------------------------- /lib/commons-lang-2.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-lang-2.4.jar -------------------------------------------------------------------------------- /lib/commons-logging-1.1.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-logging-1.1.1.jar -------------------------------------------------------------------------------- /lib/gson-2.2.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/gson-2.2.4.jar -------------------------------------------------------------------------------- /lib/guava-15.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/guava-15.0.jar -------------------------------------------------------------------------------- /lib/jackson-core-asl-1.9.13.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jackson-core-asl-1.9.13.jar -------------------------------------------------------------------------------- /lib/jackson-mapper-asl-1.9.13.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jackson-mapper-asl-1.9.13.jar -------------------------------------------------------------------------------- /lib/javacsv-2.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/javacsv-2.0.jar -------------------------------------------------------------------------------- /lib/jsoup.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jsoup.jar -------------------------------------------------------------------------------- /lib/junit-4.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/junit-4.4.jar -------------------------------------------------------------------------------- /lib/odps-mapred-bridge-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-mapred-bridge-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-mapred-local-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-mapred-local-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-sdk-commons-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-commons-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-sdk-core-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-core-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-sdk-lot-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-lot-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-sdk-mapred-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-mapred-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/odps-sdk-udf-0.14.0-rc1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-udf-0.14.0-rc1.jar -------------------------------------------------------------------------------- /lib/protobuf-java-2.4.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/protobuf-java-2.4.1.jar -------------------------------------------------------------------------------- /lib/velocity-1.7.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/velocity-1.7.jar -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | chaitin 5 | webshell 6 | 0.0.1-SNAPSHOT 7 | jar 8 | 9 | 10 | 11 | 12 | org.apache.maven.plugins 13 | maven-compiler-plugin 14 | 2.3.2 15 | 16 | 1.6 17 | 1.6 18 | 1.6 19 | UTF-8 20 | 21 | 22 | 23 | org.apache.maven.plugins 24 | maven-resources-plugin 25 | 2.4 26 | 27 | UTF-8 28 | 29 | 30 | 31 | org.apache.maven.plugins 32 | maven-assembly-plugin 33 | 34 | 35 | jar-with-dependencies 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | com.alibaba.external 45 | bouncycastle.provider 46 | 1.38-jdk15 47 | system 48 | ${project.basedir}/lib/bouncycastle.provider-1.38-jdk15.jar 49 | 50 | 51 | commons-cli 52 | commons-cli 53 | 1.2 54 | system 55 | ${project.basedir}/lib/commons-cli-1.2.jar 56 | 57 | 58 | commons-codec 59 | commons-codec 60 | 1.9 61 | system 62 | ${project.basedir}/lib/commons-codec-1.9.jar 63 | 64 | 65 | commons-collections 66 | commons-collections 67 | 3.2.1 68 | system 69 | ${project.basedir}/lib/commons-collections-3.2.1.jar 70 | 71 | 72 | commons-io 73 | commons-io 74 | 2.4 75 | system 76 | ${project.basedir}/lib/commons-io-2.4.jar 77 | 78 | 79 | commons-lang 80 | commons-lang 81 | 2.4 82 | system 83 | ${project.basedir}/lib/commons-lang-2.4.jar 84 | 85 | 86 | commons-logging 87 | commons-logging 88 | 1.1.1 89 | system 90 | ${project.basedir}/lib/commons-logging-1.1.1.jar 91 | 92 | 93 | com.google.code.gson 94 | gson 95 | 2.2.4 96 | system 97 | ${project.basedir}/lib/gson-2.2.4.jar 98 | 99 | 100 | com.google.guava 101 | guava 102 | 15.0 103 | system 104 | ${project.basedir}/lib/guava-15.0.jar 105 | 106 | 107 | org.codehaus.jackson 108 | jackson-core-asl 109 | 1.9.13 110 | system 111 | ${project.basedir}/lib/jackson-core-asl-1.9.13.jar 112 | 113 | 114 | org.codehaus.jackson 115 | jackson-mapper-asl 116 | 1.9.13 117 | system 118 | ${project.basedir}/lib/jackson-mapper-asl-1.9.13.jar 119 | 120 | 121 | net.sourceforge.javacsv 122 | javacsv 123 | 2.0 124 | system 125 | ${project.basedir}/lib/javacsv-2.0.jar 126 | 127 | 128 | com.aliyun.odps 129 | odps-mapred-bridge 130 | 0.14.0-rc1 131 | system 132 | ${project.basedir}/lib/odps-mapred-bridge-0.14.0-rc1.jar 133 | 134 | 135 | com.aliyun.odps 136 | odps-mapred-local 137 | 0.14.0-rc1 138 | system 139 | ${project.basedir}/lib/odps-mapred-local-0.14.0-rc1.jar 140 | 141 | 142 | com.aliyun.odps 143 | odps-sdk-commons 144 | 0.14.0-rc1 145 | system 146 | ${project.basedir}/lib/odps-sdk-commons-0.14.0-rc1.jar 147 | 148 | 149 | com.aliyun.odps 150 | odps-sdk-core 151 | 0.14.0-rc1 152 | system 153 | ${project.basedir}/lib/odps-sdk-core-0.14.0-rc1.jar 154 | 155 | 156 | com.aliyun.odps 157 | odps-sdk-lot 158 | 0.14.0-rc1 159 | system 160 | ${project.basedir}/lib/odps-sdk-lot-0.14.0-rc1.jar 161 | 162 | 163 | com.aliyun.odps 164 | odps-sdk-mapred 165 | 0.14.0-rc1 166 | system 167 | ${project.basedir}/lib/odps-sdk-mapred-0.14.0-rc1.jar 168 | 169 | 170 | com.aliyun.odps 171 | odps-sdk-udf 172 | 0.14.0-rc1 173 | system 174 | ${project.basedir}/lib/odps-sdk-udf-0.14.0-rc1.jar 175 | 176 | 177 | com.google.protobuf 178 | protobuf-java 179 | 2.4.1 180 | system 181 | ${project.basedir}/lib/protobuf-java-2.4.1.jar 182 | 183 | 184 | org.apache.velocity 185 | velocity 186 | 1.7 187 | system 188 | ${project.basedir}/lib/velocity-1.7.jar 189 | 190 | 191 | org.apache.commons 192 | commons-digester3 193 | 3.2 194 | 195 | 196 | 197 | junit 198 | junit 199 | 4.4 200 | test 201 | 202 | 203 | 204 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/HtmlParser.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import org.jsoup.Jsoup; 4 | import org.jsoup.nodes.Document; 5 | import org.jsoup.nodes.Element; 6 | import org.jsoup.select.Elements; 7 | 8 | public class HtmlParser { 9 | public static class Form { 10 | public static class Input { 11 | public String type; 12 | public String name; 13 | public String id; 14 | public String placeholder; 15 | } 16 | public String action; 17 | public String text; 18 | public Input[] input; 19 | } 20 | 21 | public static class Link { 22 | public String rel; 23 | public String href; 24 | } 25 | 26 | public static class A { 27 | public String href; 28 | } 29 | 30 | String html; 31 | Document doc; 32 | public String text; 33 | public String title; 34 | public Form form[]; 35 | public Link link[]; 36 | public A a[]; 37 | 38 | public HtmlParser (String _html) { 39 | html = _html; 40 | parse(); 41 | } 42 | 43 | Boolean parse () { 44 | doc = Jsoup.parse(html); 45 | text = doc.text(); 46 | Elements es = doc.getElementsByTag("title"); 47 | if (es.size() > 0) { 48 | title = es.first().text(); 49 | } else { 50 | title = ""; 51 | } 52 | es = doc.getElementsByTag("form"); 53 | form = new Form[es.size()]; 54 | for (int i = 0; i < es.size(); ++i) { 55 | Element e = es.get(i); 56 | Form f = new Form(); 57 | f.action = e.attr("action").toLowerCase(); 58 | f.text = e.text(); 59 | Elements es1 = e.getElementsByTag("input"); 60 | f.input = new Form.Input[es1.size()]; 61 | for (int j = 0; j < es1.size(); ++j) { 62 | Form.Input input = new Form.Input(); 63 | Element e1 = es1.get(j); 64 | input.type = e1.attr("type").toLowerCase(); 65 | input.name = e1.attr("name"); 66 | input.id = e1.attr("id"); 67 | input.placeholder = e1.attr("placeholder"); 68 | f.input[j] = input; 69 | } 70 | form[i] = f; 71 | } 72 | 73 | es = doc.getElementsByTag("link"); 74 | link = new Link[es.size()]; 75 | for (int i = 0; i < es.size(); ++i) { 76 | link[i] = new Link(); 77 | Element e = es.get(i); 78 | link[i].rel = e.attr("rel").toLowerCase(); 79 | link[i].href = e.attr("href").toLowerCase(); 80 | } 81 | 82 | es = doc.getElementsByTag("a"); 83 | a = new A[es.size()]; 84 | for (int i = 0; i < es.size(); ++i) { 85 | a[i] = new A(); 86 | Element e = es.get(i); 87 | a[i].href = e.attr("href").toLowerCase(); 88 | } 89 | 90 | return true; 91 | } 92 | 93 | public static void main(String[] args) { 94 | System.out.println("text: " + new HtmlParser("testasdbody

h1

div
").text); 95 | System.out.println(new HtmlParser("test<img />asd").title); 96 | System.out.println(new HtmlParser("test").title); 97 | System.out.println(new HtmlParser("<TITLE>TEST哈哈哈test2").title); 98 | System.out.println(new HtmlParser("test1test2").form.length); 99 | System.out.println(new HtmlParser("
").form.length); 100 | System.out.println(new HtmlParser("
").form[0].action); 101 | System.out.println("id: " + new HtmlParser("
").form[0].input[0].id); 102 | System.out.println("type: " + new HtmlParser("
").form[0].input[0].type); 103 | 104 | HtmlParser h = new HtmlParser(" "); 105 | System.out.println(h.link.length); 106 | System.out.println("href: " + h.link[0].href); 107 | System.out.println(h.link[0].rel); 108 | System.out.println(h.link[1].href); 109 | System.out.println(h.link[1].rel); 110 | 111 | h = new HtmlParser(" "); 112 | System.out.println(h.a.length); 113 | System.out.println("href: " + h.a[0].href); 114 | System.out.println("href: " + h.a[1].href); 115 | System.out.println("href: " + h.a[2].href); 116 | 117 | return; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/PhishingDetector.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.net.MalformedURLException; 4 | import java.net.URL; 5 | 6 | public class PhishingDetector { 7 | 8 | URL url; 9 | HtmlParser html; 10 | 11 | PhishingDetector (String url, String html) { 12 | try { 13 | this.url = new URL(url); 14 | } catch (MalformedURLException e) { 15 | // TODO Auto-generated catch block 16 | try { 17 | this.url = new URL("http://chaitin-monster.com/"); 18 | } catch (MalformedURLException e1) { 19 | } 20 | } 21 | this.html = new HtmlParser(html); 22 | } 23 | 24 | static double threshold = 7.999; 25 | 26 | double detect() { 27 | double score = 0.0; 28 | if (ScoreDomain.is_white(url.getHost())) { 29 | return score; 30 | } 31 | score += ScoreDomain.score(url.getHost()); 32 | score += ScoreTitle.score(html.title); 33 | score += ScoreContent.score(html.text); 34 | score += ScoreForm.score(html.form); 35 | return score; 36 | } 37 | 38 | public static Boolean is_phishing(String url, String html) { 39 | return new PhishingDetector(url, html).detect() >= threshold; 40 | } 41 | 42 | 43 | public static void main(String[] args) { 44 | System.out.println(is_phishing("http://www.baidu.com.a", "[官]欢迎访问全国信用在线申请中心")); 45 | System.out.println(is_phishing("", " [官]欢迎访问全国信用在线申请中心 ")); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/PhishingFeature.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.net.Inet4Address; 4 | import java.net.InetAddress; 5 | import java.net.MalformedURLException; 6 | import java.net.URL; 7 | import java.net.UnknownHostException; 8 | import java.util.regex.Pattern; 9 | 10 | public class PhishingFeature { 11 | 12 | public static final int has_ip = 0; 13 | public static final int long_url = 1; 14 | public static final int short_service = 2; 15 | public static final int has_at = 3; 16 | public static final int double_slash_redirect = 4; 17 | public static final int pref_suf = 5; 18 | public static final int has_sub_domain = 6; 19 | public static final int ssl_state = 7; 20 | public static final int long_domain = 8; 21 | public static final int favicon = 9; 22 | public static final int port = 10; 23 | public static final int https_token = 11; 24 | public static final int req_url = 12; 25 | public static final int url_of_anchor = 13; 26 | public static final int tag_links = 14; 27 | public static final int SFH = 15; 28 | public static final int submit_to_email = 16; 29 | public static final int abnormal_url = 17; 30 | public static final int redirect = 18; 31 | public static final int mouseover = 19; 32 | public static final int right_click = 20; 33 | public static final int popup = 21; 34 | public static final int iframe = 22; 35 | public static final int domain_age = 23; 36 | public static final int dns_record = 24; 37 | public static final int traffic = 25; 38 | public static final int page_rank = 26; 39 | public static final int google_index = 27; 40 | public static final int links_to_page = 28; 41 | public static final int stats_report = 29; 42 | public static final int target = 30; 43 | 44 | public static int[] featureVector = new int[31]; 45 | 46 | public static URL url; 47 | public static HtmlParser html; 48 | 49 | public static void init(String u, String h) { 50 | u = u.toLowerCase(); 51 | h = h.toLowerCase(); 52 | try { 53 | url = new URL(u); 54 | } catch (MalformedURLException e) { 55 | try { 56 | url = new URL("http://baidu.com/"); 57 | } catch (MalformedURLException e1) { 58 | 59 | } 60 | } 61 | html = new HtmlParser(h); 62 | 63 | System.out.println(url.getHost()); 64 | getFeatures(); 65 | } 66 | 67 | public static boolean isIP(String ip) { 68 | // 0x and 0X 69 | ip = ip.toLowerCase(); 70 | String[] as = ip.split("\\."); 71 | for (int i = 0; i < as.length; i++) { 72 | int base = 10; 73 | if (as[i].length() > 2 && as[i].indexOf("0x") == 0) { 74 | as[i] = as[i].substring(2); 75 | base = 16; 76 | } 77 | try { 78 | Long.parseLong(as[i], base); 79 | } catch (Exception e) { 80 | return false; 81 | } 82 | } 83 | return true; 84 | } 85 | 86 | // should be more precise ? 87 | public static int has_ip_feature() { 88 | String addr = url.getHost(); 89 | if (isIP(addr)) { 90 | return 1; 91 | } 92 | return -1; 93 | } 94 | 95 | public static int long_url_feature() { 96 | int len = url.toString().length(); 97 | if (len < 54) { 98 | return -1; 99 | } else if (54 <= len && len <= 75) { 100 | return 0; 101 | } else { 102 | return 1; 103 | } 104 | } 105 | 106 | // TODO: more precise 107 | public static int short_service_feature() { 108 | int len = url.toString().length(); 109 | if (len <= 9) { 110 | return 1; 111 | } 112 | return -1; 113 | } 114 | 115 | public static int has_at_feature() { 116 | if (url.toString().indexOf('@') != -1) { 117 | return 1; 118 | } 119 | return -1; 120 | } 121 | 122 | public static int double_slash_redirect_feature() { 123 | if (url.toString().lastIndexOf("//") > 7) { 124 | return 1; 125 | } 126 | return -1; 127 | } 128 | 129 | public static int pref_suf_feature() { 130 | if (url.toString().indexOf("-") != -1) { 131 | return 1; 132 | } 133 | return -1; 134 | } 135 | 136 | public static int has_sub_domain_feature() { 137 | int cnt = 0; 138 | String u = url.toString(); 139 | for (int i = 0; i < u.length(); i++) { 140 | if (u.charAt(i) == '.') { 141 | cnt++; 142 | } 143 | } 144 | if (cnt == 1) { 145 | return -1; 146 | } else if (cnt == 2) { 147 | return 0; 148 | } else { 149 | return 1; 150 | } 151 | } 152 | 153 | // Need to consider remain time 154 | public static int ssl_state_feature() { 155 | if (!"https".equals(url.getProtocol().toLowerCase())) { 156 | return 1; 157 | } 158 | return -1; 159 | } 160 | 161 | // Need to solve 162 | public static int long_domain_feature() { 163 | 164 | return -1; 165 | } 166 | 167 | public static int favicon_feature() { 168 | for (int i = 0; i < html.link.length; i++) { 169 | if (html.link[i].rel.indexOf("icon") != -1) { 170 | try { 171 | URL tu = new URL(html.link[i].href); 172 | return 1; 173 | } catch (MalformedURLException e) { 174 | 175 | } 176 | } 177 | } 178 | return 0; 179 | } 180 | 181 | // Need to solve 182 | public static int port_feature() { 183 | 184 | return -1; 185 | } 186 | 187 | public static int https_token_feature() { 188 | if (url.getHost().indexOf("https") != -1) { 189 | return 1; 190 | } 191 | return -1; 192 | } 193 | 194 | // This feature may be important 195 | // Need to solve 196 | public static int req_url_feature() { 197 | 198 | return 0; 199 | } 200 | 201 | public static int url_of_anchor_feature() { 202 | double anchor = 0; 203 | double tot = 0; 204 | for (int i = 0; i < html.a.length; i++) { 205 | if (html.a[i].href.indexOf('#') != -1 || html.a[i].href.indexOf("javascript:") != -1) { 206 | anchor += 1.0; 207 | } 208 | tot += 1.0; 209 | } 210 | double p = anchor / tot; 211 | if (p < 0.22) { 212 | return -1; 213 | } else if (p >= 0.22 && p <= 0.61) { 214 | return 0; 215 | } else { 216 | return 1; 217 | } 218 | } 219 | 220 | public static int tag_links_feature() { 221 | 222 | return 0; 223 | } 224 | 225 | public static int SFH_feature() { 226 | 227 | return 0; 228 | } 229 | 230 | public static int submit_to_email_feature() { 231 | 232 | return 0; 233 | } 234 | 235 | public static int abnormal_url_feature() { 236 | 237 | return 0; 238 | } 239 | 240 | public static int redirect_feature() { 241 | 242 | return 0; 243 | } 244 | 245 | public static int mouseover_feature() { 246 | 247 | return 0; 248 | } 249 | 250 | public static int right_click_feature() { 251 | 252 | return 0; 253 | } 254 | 255 | public static int popup_feature() { 256 | 257 | return 0; 258 | } 259 | 260 | public static int iframe_feature() { 261 | 262 | return 0; 263 | } 264 | 265 | public static int domain_age_feature() { 266 | 267 | return 0; 268 | } 269 | 270 | public static int dns_record_feature() { 271 | 272 | return 0; 273 | } 274 | 275 | public static int traffic_feature() { 276 | 277 | return 0; 278 | } 279 | 280 | public static int page_rank_feature() { 281 | 282 | return 0; 283 | } 284 | 285 | public static int google_index_feature() { 286 | 287 | return 0; 288 | } 289 | 290 | public static int links_to_page_feature() { 291 | 292 | return 0; 293 | } 294 | 295 | public static int stats_report_feature() { 296 | 297 | return 0; 298 | } 299 | 300 | public static int target_feature() { 301 | 302 | return 0; 303 | } 304 | 305 | public static boolean getFeatures() { 306 | 307 | featureVector[has_ip] = has_ip_feature(); 308 | featureVector[long_url] = long_url_feature(); 309 | featureVector[short_service] = short_service_feature(); 310 | featureVector[has_at] = has_at_feature(); 311 | featureVector[double_slash_redirect] = double_slash_redirect_feature(); 312 | featureVector[pref_suf] = pref_suf_feature(); 313 | featureVector[has_sub_domain] = has_sub_domain_feature(); 314 | featureVector[ssl_state] = ssl_state_feature(); 315 | featureVector[long_domain] = long_domain_feature(); 316 | featureVector[favicon] = favicon_feature(); 317 | featureVector[port] = port_feature(); 318 | featureVector[https_token] = https_token_feature(); 319 | featureVector[req_url] = req_url_feature(); 320 | featureVector[url_of_anchor] = url_of_anchor_feature(); 321 | featureVector[tag_links] = tag_links_feature(); 322 | featureVector[SFH] = SFH_feature(); 323 | featureVector[submit_to_email] = submit_to_email_feature(); 324 | featureVector[abnormal_url] = abnormal_url_feature(); 325 | featureVector[redirect] = redirect_feature(); 326 | featureVector[mouseover] = mouseover_feature(); 327 | featureVector[right_click] = right_click_feature(); 328 | featureVector[popup] = popup_feature(); 329 | featureVector[iframe] = iframe_feature(); 330 | featureVector[domain_age] = domain_age_feature(); 331 | featureVector[dns_record] = dns_record_feature(); 332 | featureVector[traffic] = traffic_feature(); 333 | featureVector[page_rank] = page_rank_feature(); 334 | featureVector[google_index] = google_index_feature(); 335 | featureVector[links_to_page] = links_to_page_feature(); 336 | featureVector[stats_report] = stats_report_feature(); 337 | featureVector[target] = target_feature(); 338 | 339 | return true; 340 | } 341 | 342 | public static void show() { 343 | for (int i = 0; i < featureVector.length; i++) { 344 | System.out.print(featureVector[i]); 345 | } 346 | } 347 | 348 | public static void test() { 349 | boolean res = true; 350 | res &= (isIP("1.1.1.1")); 351 | res &= (isIP("0x58.0xCC.0xCA.0x62")); 352 | res &= (isIP("0x58.0xCA.0x62")); 353 | res &= (isIP("0x58CCCA62")); 354 | res &= (isIP("0x58CA62")); 355 | res &= (isIP("0x58CA6")); 356 | res &= (!isIP("baidu.com")); 357 | res &= (!isIP("xxx.123.abc")); 358 | 359 | if (res) { 360 | System.out.println("\nIs ip test succ"); 361 | } else { 362 | System.out.println("\nIs ip test fail"); 363 | } 364 | 365 | if (res) { 366 | System.out.println("\n==TEST PASSED=="); 367 | } else { 368 | System.out.println("\n==TEST FAILED=="); 369 | } 370 | 371 | } 372 | 373 | public static void main(String[] args) { 374 | 375 | for (int i = 0; i < featureVector.length; i++) { 376 | System.out.print(featureVector[i]); 377 | } 378 | System.out.print(""); 379 | 380 | PhishingFeature.init("https://baidu.com", 381 | "<html><head><link rel=\"shortcut icon\" href=\"favicon.ico\" /> </head><link rel=\"icon\" href=\"animated_favicon.gif\" type=\"image/gif\" /> </html>"); 382 | System.out.println(PhishingFeature.url.getProtocol()); 383 | 384 | try { 385 | URL uu = new URL("test.t"); 386 | System.out.println(uu.getProtocol()); 387 | } catch (MalformedURLException e) { 388 | System.out.println("dododo"); 389 | } 390 | 391 | test(); 392 | } 393 | 394 | } 395 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/PhishingMapper.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.io.IOException; 4 | import java.net.MalformedURLException; 5 | import java.net.URL; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | import java.util.regex.Pattern; 9 | 10 | import com.aliyun.odps.data.Record; 11 | import com.aliyun.odps.mapred.Mapper; 12 | 13 | import chaitin.utils.Pair; 14 | 15 | 16 | public class PhishingMapper implements Mapper { 17 | 18 | public void setup(TaskContext context) throws IOException { 19 | } 20 | 21 | public void map(long recordNum, Record record, TaskContext context) throws IOException { 22 | String url = (String) record.get(0); 23 | String html = (String) record.get(1); 24 | if (PhishingDetector.is_phishing(url, html)) { 25 | Record result = context.createOutputRecord(); 26 | result.set("url", url); 27 | context.write(result); 28 | } 29 | } 30 | 31 | public void cleanup(TaskContext context) throws IOException { 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/ScoreContent.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | import java.util.regex.Pattern; 6 | 7 | import chaitin.utils.Pair; 8 | 9 | public class ScoreContent { 10 | static List<Pair<Pattern, Double>> sensitive_word; 11 | static List<Pair<Pattern, Double>> sensitive_word_once; 12 | 13 | static { 14 | sensitive_word = new LinkedList<Pair<Pattern, Double>>(); 15 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*欢迎光临京东商城.*"), 10.0)); 16 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*欢迎登录(华为|苹果|QQ|qq|腾讯|腾讯qq|腾讯QQ|百度|京东|淘宝|支付宝|美团|微信|新浪微博)帐号.*"), 8.0)); 17 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*已经发放.{0,20}奖金.*"), 10.0)); 18 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*Forgot Apple ID or password.*"), 10.0)); 19 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*没有Apple ID.{10}现在创建一个.*"), 10.0)); 20 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*礼品.*"), 1.0)); 21 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*奖金.*"), 1.0)); 22 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*中奖.*"), 1.0)); 23 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*抽奖.*"), 1.0)); 24 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*赚钱.*"), 1.0)); 25 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*额度.*"), 1.0)); 26 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*免费.*"), 1.0)); 27 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*发放.*"), 1.0)); 28 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*下发.*"), 1.0)); 29 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*登陆.*"), 1.0)); 30 | sensitive_word.add(new Pair<Pattern, Double>(Pattern.compile(".*登录.*"), 1.0)); 31 | sensitive_word_once = new LinkedList<Pair<Pattern, Double>>(); 32 | } 33 | 34 | public static double score(String text) { 35 | double result = 0.0; 36 | for (Pair<Pattern, Double> p: sensitive_word) { 37 | if (p.first.matcher(text).matches()) { 38 | result += p.second; 39 | } 40 | } 41 | return result; 42 | } 43 | 44 | public static void main(String[] args) { 45 | System.out.println(score("已经发放 17332305 元奖金")); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/ScoreDomain.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | public class ScoreDomain { 4 | 5 | static String[] whilte_list; 6 | 7 | static { 8 | whilte_list = new String[] { 9 | "email-qq.com", "58.com","jiwu.com","xiaomi.com","cnzz.com","tuicool.com","saic.gov.cn","chinanews.com","23wx.com","ifanr.com","gao7.com","quanmin.tv","vancl.com","cjn.cn","microsoftonline.com","yiqifa.com","appgame.com","mtime.com","suning.com","pinterest.com","12306.cn","cnhubei.com","worktile.com","pcbeta.com","jd.hk","quora.com","hujiang.com","21cn.com","chinaunix.net","lenovo.com.cn","dianping.com","19lou.com","house365.com","btbbt.cc","fuliba.net","onlinedown.net","caixin.com","google.com.tw","v2ex.com","10jqka.com.cn","dell.com","ynepb.gov.cn","ceconline.com","miaopai.com","18183.com","yesky.com","120ask.com","pps.tv","2345.com","xvideos.com","huaban.com","etao.com","yaolan.com","gucheng.com","nga.cn","alicdn.com","gamersky.com","2cto.com","guokr.com","taoche.com","instagram.com","liansuo.com","xjtour.com","microsoft.com","eastday.com","booking.com","sina.com","google.com.hk","github.com","hupu.com","nuomi.com","3158.cn","youth.cn","ebay.com","longzhu.com","baike.com","u17.com","yougou.com","dytt8.net","biquge.la","ecitic.com","twitter.com","ccb.com","ngacn.cc","autohome.com.cn","dy2018.com","caijing.com.cn","weibo.cn","sogou.com","zbj.com","bttiantang.com","cyol.com","eqxiu.com","alibaba-inc.com","51yes.com","yixun.com","people.com.cn","gongchang.com","mafengwo.cn","ximalaya.com","cmbc.com.cn","takungpao.com","dmzj.com","kuwo.cn","spdb.com.cn","zcool.com.cn","tudou.com","mp4ba.com","ikea.com","zhihu.com","jandan.net","jxnews.com.cn","workercn.cn","zuanke8.com","blogspot.com","aliexpress.com","csair.com","aiweibang.com","cnblogs.com","cgbchina.com.cn","xywy.com","nipic.com","ih5.cn","msn.com","google.com","126.com","51credit.com","huanqiu.com","51job.com","jrj.com.cn","tgfcer.com","chooseauto.com.cn","17track.net","weather.com.cn","doubleclick.net","doc88.com","258.com","muchong.com","huim.com","91jm.com","92lux.cn","114so.cn","pptv.com","pchome.net","chinabyte.com","g-fox.cn","chinadaily.com.cn","xiamp4.com","huawei.com","ccidnet.com","amap.com","xdf.cn","chouti.com","lofter.com","meizu.com","duomai.com","ctrip.com","mozilla.org","25pp.com","mgtv.com","95516.com","renren.com","amazon.co.jp","yahoo.co.jp","cnnic.cn","goldcarpet.cn","xiu.com","guancha.cn","17k.com","nih.gov","tumblr.com","10086.cn","winshang.com","ali213.net","loldytt.com","sobaidupan.com","huihui.cn","gusuwang.com","xiami.com","cs.com.cn","juooo.com","chinaso.com","cnr.cn","ftchinese.com","egou.com","bilibili.com","ctfile.com","dahe.cn","mama.cn","cpta.com.cn","google.com.sg","meishichina.com","kdslife.com","ci123.com","alipay.com","k618.cn","oschina.net","epwk.com","amazon.com","cnmo.com","36kr.com","lashou.com","alimama.com","cntv.cn","yinyuetai.com","gmw.cn","acfun.tv","cisco.com","github.io","cnbeta.com","3dmgame.com","122.gov.cn","kaixin001.com","xunlei.com","dragonparking.com","segmentfault.com","mydrivers.com","blizzard.cn","beva.com","17173.com","sdo.com","zol.com.cn","smzdm.com","steamcommunity.com","kugou.com","rednet.cn","scol.com.cn","taobao.com","alibaba.com","jiemian.com","11467.com","adobe.com","cncn.org.cn","7k7k.com","techweb.com.cn","fanli.com","tibet.cn","bankofchina.com","iconfont.cn","3dwwwgame.com","dbw.cn","icbc.com.cn","amazon.co.uk","55haitao.com","anjuke.com","xinjunshi.com","mydigit.cn","oeeee.com","jiayuan.com","sq.cn","p5w.net","avmo.pw","pixiv.net","niuche.com","qidian.com","vmall.com","6pm.com","skycn.com","wtoip.com","xiaomi.cn","sznews.com","zhibo8.cc","dgtle.com","cyzone.cn","fudan.edu.cn","saraba1st.com","jumei.com","jiameng.com","xitek.com","tmall.com","qianzhan.com","xinhuanet.com","pcgames.com.cn","chinaz.com","stockstar.com","voc.com.cn","iqiyi.com","lesports.com","sinaimg.cn","ed2000.com","bitauto.com","znds.com","alexa.cn","xcar.com.cn","reddit.com","salesforce.com","xueqiu.com","paypal.com","youku.com","lagou.com","focus.cn","joyme.com","southcn.com","haosou.com","cnki.net","a9vg.com","bigccq.cn","dix3.com","hjenglish.com","steampowered.com","07073.com","to8to.com","gome.com.cn","4399.com","cr173.com","wangtu.com","365jia.cn","fengniao.com","amazon.de","lianjia.com","w3school.com.cn","douban.com","baiducontent.com","viidii.info","yhd.com","huomaotv.cn","pingan.com","51.la","wanfangdata.com.cn","imdb.com","tianya.cn","yinxiang.com","thepaper.cn","123cha.com","baomihua.com","dangdang.com","fobshanghai.com","qunar.com","2mnd56.com","jjwxc.net","alitrip.com","ycwb.com","boc.cn","ebrun.com","tower.im","kuaidi100.com","stackexchange.com","youboy.com","tmall.hk","5dcar.com","zhanqi.tv","battlenet.com.cn","facebook.com","umeng.com","linkedin.com","amazon.fr","pc6.com","apple.com","51cto.com","eastmoney.com","huya.com","gh0089.com","douyu.com","wordpress.com","dygang.com","zaobao.com","3987.com","uuu9.com","iiyi.com","ems.com.cn","google.co.jp","yxdown.com","haitao.com","newsmth.net","hc360.com","pconline.com.cn","sina.cn","189.cn","dotamax.com","sonhoo.com","gfan.com","bjrcb.com","duowan.com","yunpan.cn","jb51.net","pcbaby.com.cn","miercn.com","runoob.com","miui.com","soku.com","admaimai.com","chiphell.com","cn163.net","sanguosha.com","toutiao.com","ku6.com","meituan.com","globaltimes.cn","kdnet.net","chsi.com.cn","pcauto.com.cn","makepolo.com","zybang.com","dilidili.com","ttmeiju.com","hao123.com","engadget.com","baidu.com","baidu.com.cn","ifeng.com","google.cn","sohu.com","netease.com","weibo.com","wish.com","cdstm.cn","familydoctor.com.cn","163.com","jianshu.com","ooopic.com","cmbchina.com","jiyoujia.com","zhaopin.com","icloud.com","51sole.com","mi.com","ynet.com","stackoverflow.com","17ok.com","6vhao.com","51auto.com","youtube.com","enet.com.cn","zealer.com","docin.com","godaddy.com","force.com","ithome.com","firefoxchina.cn","liepin.com","uc.cn","iteye.com","tgbus.com","quanjing.com","chengdu.cn","wikipedia.org","178.com","eol.cn","verycd.com","zjol.com.cn","atpanel.com","ganji.com","sourceforge.net","eelly.com","99114.com","btso.pw","btime.com","icolor.com.cn","tuniu.com","58pic.com","oracle.com","wenkang.cn","52pojie.cn","cqnews.net","39yst.com","panda.tv","jqw.com","zimuzu.tv","g4d7.com","babyschool.com.cn","hexun.com","babytree.com","linktech.cn","yahoo.com","iciba.com","t66y.com","ea3w.com","qyer.com","duba.com","netcoc.com","10010.com","sina.com.cn","flyertea.com","youdao.com","qinqinbaby.com","qq.com","jmw.com.cn","chekb.com","weiyun.com","yjbys.com","dxy.cn","kafan.cn","aizhan.com","amazon.cn","cankaoxiaoxi.com","sf-express.com","jd.com","ccb.com.cn","qingdaonews.com","go108.com.cn","wtoutiao.com","kaola.com","1905.com","dwnews.com","cctv.com","cri.cn" 10 | }; 11 | } 12 | 13 | static Boolean is_white (String host) { 14 | for (String s: whilte_list) { 15 | if (s.length() == host.length()) { 16 | if (host.equals(s)) { 17 | return true; 18 | } 19 | } else if (s.length() < host.length()) { 20 | if (host.endsWith("." + s)) { 21 | return true; 22 | } 23 | } 24 | } 25 | return false; 26 | } 27 | 28 | public static double score (String host) { 29 | double result = 0.0; 30 | host = host.toLowerCase(); 31 | if (!is_white(host)) { 32 | if (host.length() > 20) { 33 | result += (host.length() - 20) * 0.1; 34 | } 35 | for (String s: whilte_list) { 36 | int p = host.indexOf(s); 37 | if (p > 1 && (host.charAt(p - 1) == '.' || host.charAt(p - 1) == '-')) { 38 | return (double)s.length(); 39 | } 40 | } 41 | host = host.replace("0", "o"); 42 | host = host.replace("l", "1"); 43 | //host = host.replace("-", ""); 44 | for (String s: whilte_list) { 45 | s = s.replace("0", "o"); 46 | s = s.replace("l", "1"); 47 | int p = host.indexOf(s); 48 | if (p == 0 || (p > 1 && (host.charAt(p - 1) == '.' || host.charAt(p - 1) == '-'))) { 49 | return (double)s.length() + 2; 50 | } 51 | } 52 | } 53 | return result; 54 | } 55 | 56 | public static void main(String[] args) { 57 | System.out.println("t".endsWith("t")); 58 | System.out.println("qq.com: " + score("qq.com")); 59 | System.out.println("www.qq.com: " + score("www.qq.com")); 60 | System.out.println("www.q-q.com: " + score("www.q-q.com")); 61 | System.out.println("www.a-qq.com: " + score("www.a-qq.com")); 62 | System.out.println("www.qq.com.monster.com: " + score("www.qq.com.monster.com")); 63 | System.out.println("10086.cn: " + score("10086.cn")); 64 | System.out.println("l0086.cn: " + score("l0086.cn")); 65 | System.out.println("monster.com: " + score("monster.com")); 66 | System.out.println("xagfjd.com: " + score("xagfjd.com")); 67 | return; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/ScoreForm.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | import java.util.regex.Pattern; 6 | 7 | import chaitin.phishing.HtmlParser.Form; 8 | import chaitin.phishing.HtmlParser.Form.Input; 9 | import chaitin.utils.Pair; 10 | 11 | public class ScoreForm { 12 | 13 | static List<Pair<Pattern, Double>> sensitive_action; 14 | static List<Pair<Pattern, Double>> sensitive_form_word; 15 | 16 | static { 17 | sensitive_action = new LinkedList<Pair<Pattern, Double>>(); 18 | sensitive_action.add(new Pair<Pattern, Double>(Pattern.compile(".*fuck.*"), 5.0)); 19 | sensitive_action.add(new Pair<Pattern, Double>(Pattern.compile(".*diaoyu.*"), 5.0)); 20 | sensitive_action.add(new Pair<Pattern, Double>(Pattern.compile(".*\\.asp.*"), 2.0)); 21 | sensitive_form_word = new LinkedList<Pair<Pattern, Double>>(); 22 | sensitive_form_word.add(new Pair<Pattern, Double>(Pattern.compile(".*身份证.*"), 3.0)); 23 | sensitive_form_word.add(new Pair<Pattern, Double>(Pattern.compile(".*额度.*"), 3.0)); 24 | sensitive_form_word.add(new Pair<Pattern, Double>(Pattern.compile(".*手机号.*"), 1.0)); 25 | } 26 | 27 | public static double scoce_action(String action) { 28 | double result = 0.0; 29 | for (Pair<Pattern, Double> p: sensitive_action) { 30 | if (p.first.matcher(action).matches() && p.second > result) { 31 | result = p.second; 32 | } 33 | } 34 | return result; 35 | } 36 | 37 | public static double score_form(Form form) { 38 | double result = 0.0; 39 | for (Input input: form.input) { 40 | if (input.type == "password") { 41 | result += 3.0; 42 | break; 43 | } 44 | } 45 | for (Pair<Pattern, Double> p: sensitive_form_word) { 46 | if (p.first.matcher(form.text).matches() && p.second > result) { 47 | result = p.second; 48 | } 49 | } 50 | result += scoce_action(form.action); 51 | return result; 52 | } 53 | 54 | public static double score(Form[] forms) { 55 | double result = 0.0; 56 | for (Form form: forms) { 57 | double t = score_form(form); 58 | if (t > result) { 59 | result = t; 60 | } 61 | } 62 | return result; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/chaitin/phishing/ScoreTitle.java: -------------------------------------------------------------------------------- 1 | package chaitin.phishing; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | import chaitin.utils.Pair; 7 | 8 | public class ScoreTitle { 9 | static List<Pair<String, Double>> sensitive_word; 10 | 11 | static { 12 | sensitive_word = new LinkedList<Pair<String, Double>>(); 13 | sensitive_word.add(new Pair<String, Double>("中国平安官网-中国平安保险(集团)股份有限公司-保险,银行,投资", 8.0)); 14 | sensitive_word.add(new Pair<String, Double>("淘宝网 - 淘!我喜欢", 8.0)); 15 | sensitive_word.add(new Pair<String, Double>("京东商城-京东商城官方网站!", 8.0)); 16 | sensitive_word.add(new Pair<String, Double>("京东-欢迎登录", 8.0)); 17 | sensitive_word.add(new Pair<String, Double>("京东(JD.COM)-综合网购首选-正品低价、品质保障、配送及时、轻松购物!", 8.0)); 18 | sensitive_word.add(new Pair<String, Double>("中国移动官方网站", 8.0)); 19 | sensitive_word.add(new Pair<String, Double>("中国联通网上营业厅", 8.0)); 20 | sensitive_word.add(new Pair<String, Double>("苏宁易购(Suning) -综合网上购物商城,正品行货,全国联保,货到付款!", 8.0)); 21 | sensitive_word.add(new Pair<String, Double>("用户登录 - 苏宁易购", 8.0)); 22 | sensitive_word.add(new Pair<String, Double>("上天猫,就够了", 8.0)); 23 | sensitive_word.add(new Pair<String, Double>("网上超市1号店,省力省钱省时间", 8.0)); 24 | sensitive_word.add(new Pair<String, Double>("1号店登录", 8.0)); 25 | sensitive_word.add(new Pair<String, Double>("亚马逊-网上购物商城:要网购, 就来Z.cn!", 8.0)); 26 | sensitive_word.add(new Pair<String, Double>("Amazon 登录", 8.0)); 27 | sensitive_word.add(new Pair<String, Double>("唯品会(原Vipshop.com)特卖会:一家专门做特卖的网站_确保正品_确保低价_货到付款", 8.0)); 28 | sensitive_word.add(new Pair<String, Double>("唯品会网站登录", 8.0)); 29 | sensitive_word.add(new Pair<String, Double>("美丽说—白领的全球导购", 8.0)); 30 | sensitive_word.add(new Pair<String, Double>("登录 - 美丽说", 8.0)); 31 | sensitive_word.add(new Pair<String, Double>("易迅网-专业的电脑、数码家电、手机、汽车用品、鞋服百货网上数码大卖场 - 易迅网", 8.0)); 32 | sensitive_word.add(new Pair<String, Double>("国美在线(GOME)-综合网购商城,正品低价、品质保障、快速送达、安心服务!", 8.0)); 33 | sensitive_word.add(new Pair<String, Double>("用户登录-国美在线", 8.0)); 34 | sensitive_word.add(new Pair<String, Double>("聚美优品 - 【极速免税店 品牌防伪码】正品化妆品团购网站BJ,千万用户推荐,拆封30天无条件退货!", 8.0)); 35 | sensitive_word.add(new Pair<String, Double>("登录聚美", 8.0)); 36 | sensitive_word.add(new Pair<String, Double>("华为商城官网-提供华为手机(", 8.0)); 37 | sensitive_word.add(new Pair<String, Double>("欢迎访问中国建设银行网站", 8.0)); 38 | sensitive_word.add(new Pair<String, Double>("中国建设银行 个人客户网上银行", 8.0)); 39 | sensitive_word.add(new Pair<String, Double>("交通银行 - 交银金融网", 8.0)); 40 | sensitive_word.add(new Pair<String, Double>("一网通主页 -- 招商银行官方网站", 8.0)); 41 | sensitive_word.add(new Pair<String, Double>("中国银行全球门户网站", 8.0)); 42 | sensitive_word.add(new Pair<String, Double>("中国工商银行中国网站", 8.0)); 43 | sensitive_word.add(new Pair<String, Double>("中国农业银行", 8.0)); 44 | sensitive_word.add(new Pair<String, Double>("首页 - 广发银行", 8.0)); 45 | sensitive_word.add(new Pair<String, Double>("中国邮政储蓄银行", 8.0)); 46 | sensitive_word.add(new Pair<String, Double>("登录 - 当当网", 8.0)); 47 | sensitive_word.add(new Pair<String, Double>("当当—网上购物中心:图书、母婴、美妆、家居、数码、家电、服装、鞋包等,正品低价,货到付款", 8.0)); 48 | sensitive_word.add(new Pair<String, Double>("126网易免费邮--你的专业电子邮局", 8.0)); 49 | sensitive_word.add(new Pair<String, Double>("163网易免费邮--中文邮箱第一品牌", 8.0)); 50 | sensitive_word.add(new Pair<String, Double>("企业邮箱领航者|163企业邮箱-网易企业邮箱-外贸企业邮箱-中文企业邮箱首选品牌解决方案", 8.0)); 51 | sensitive_word.add(new Pair<String, Double>("登录QQ邮箱", 8.0)); 52 | sensitive_word.add(new Pair<String, Double>("QQ空间-分享生活,留住感动", 8.0)); 53 | sensitive_word.add(new Pair<String, Double>("微博-随时随地发现新鲜事", 8.0)); 54 | sensitive_word.add(new Pair<String, Double>("登录 - 支付宝", 8.0)); 55 | sensitive_word.add(new Pair<String, Double>("登录 | 美团网", 8.0)); 56 | sensitive_word.add(new Pair<String, Double>("[官]欢迎访问全国信用在线申请中心", 15.0)); 57 | sensitive_word.add(new Pair<String, Double>("登录", 2.0)); 58 | sensitive_word.add(new Pair<String, Double>("登陆", 3.0)); 59 | sensitive_word.add(new Pair<String, Double>("中奖", 2.0)); 60 | sensitive_word.add(new Pair<String, Double>("抽奖", 2.0)); 61 | sensitive_word.add(new Pair<String, Double>("信用", 2.0)); 62 | } 63 | 64 | public static double score(String title) { 65 | double result = 0.0; 66 | for (Pair<String, Double> p: sensitive_word) { 67 | if (title.indexOf(p.first) != -1 && p.second > result) { 68 | result = p.second; 69 | } 70 | } 71 | return result; 72 | } 73 | 74 | public static void main(String[] args) { 75 | System.out.println(score("[官]欢迎访问全国信用在线申请中心")); 76 | System.out.println(score(" [官]欢迎访问全国信用在线申请中心 ")); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Base64.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.util.Arrays; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | 8 | public class Base64 { 9 | List<byte[]> decode(byte[] input) { 10 | List<byte[]> result = new LinkedList<byte[]>(); 11 | return result; 12 | } 13 | 14 | public static byte alpha(byte c) { 15 | if (Character.isUpperCase(c)) { 16 | return (byte) (c - 'A'); 17 | } else if (Character.isLowerCase(c)) { 18 | return (byte) (c - 'a' + 26); 19 | } else if (Character.isDigit(c)) { 20 | return (byte) (c - '0' + 52); 21 | } else if (c == '+') { 22 | return (byte) (62); 23 | } else if (c == '/') { 24 | return (byte) (63); 25 | } else if (c == '=') { 26 | return (byte) (0xFE); 27 | } else { 28 | return (byte)0xFF; 29 | } 30 | } 31 | 32 | 33 | public static boolean is_alpha(byte c) { 34 | return Character.isUpperCase(c) 35 | || Character.isLowerCase(c) 36 | || Character.isDigit(c) 37 | || c == '+' 38 | || c == '/' 39 | || c == '='; 40 | } 41 | 42 | public static byte[] longest_sub_base64(byte[] input) { 43 | if (input.length == 0) { 44 | return input; 45 | } 46 | int max = 0; 47 | int[] result = new int[input.length]; 48 | result[0] = is_alpha(input[0]) ? 1 : 0; 49 | for (int i = 1; i < input.length; ++i) { 50 | result[i] = is_alpha(input[i]) ? result[i - 1] + 1 : 0; 51 | if (result[i] > result[max]) { 52 | max = i; 53 | } 54 | } 55 | 56 | return Arrays.copyOfRange(input, max - result[max] + 1, max + 1); 57 | } 58 | 59 | public static byte[] decode_base64(byte[] input) { 60 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 61 | int i = 0; 62 | int c; 63 | for (i = 0; i < input.length; i += 4) { 64 | if (i + 1 >= input.length 65 | || alpha(input[i]) == (byte)0xff 66 | || alpha(input[i + 1]) == (byte)0xff) { 67 | break; 68 | } 69 | c = (alpha(input[i]) & 0b111111) << 2; 70 | c |= (alpha(input[i + 1]) & 0b110000) >> 4; 71 | output.write((byte)c); 72 | if (i + 2 >= input.length || alpha(input[i + 2]) == (byte)0xfe) { 73 | c = (alpha(input[i + 1]) & 0b001111) << 4; 74 | output.write((byte)c); 75 | break; 76 | } else if (alpha(input[i + 2]) == (byte)0xff) { 77 | break; 78 | } 79 | c = (alpha(input[i + 1]) & 0b001111) << 4; 80 | c |= (alpha(input[i + 2]) & 0b111100) >> 2; 81 | output.write((byte)c); 82 | if (i + 3 >= input.length || alpha(input[i + 3]) == (byte)0xfe) { 83 | c = (alpha(input[i + 2]) & 0b000011) << 6; 84 | output.write((byte)c); 85 | break; 86 | } else if (i + 3 >= input.length || alpha(input[i + 3]) == (byte)0xff) { 87 | break; 88 | } 89 | c = (alpha(input[i + 2]) & 0b000011) << 6; 90 | c |= (alpha(input[i + 3]) & 0b111111); 91 | output.write((byte)c); 92 | } 93 | return output.toByteArray(); 94 | } 95 | 96 | 97 | public static void main(String[] args) { 98 | System.out.println(new String(decode_base64("YWJjZA0==".getBytes()))); 99 | System.out.println(new String(longest_sub_base64("!@#$test---123456".getBytes()))); 100 | } 101 | } -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/ChaitinRecord.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.util.Date; 4 | 5 | import com.aliyun.odps.Column; 6 | import com.aliyun.odps.data.Record; 7 | 8 | public class ChaitinRecord implements Record { 9 | 10 | Object[] obj; 11 | 12 | public ChaitinRecord(Object[] obj) { 13 | this.obj = obj; 14 | } 15 | 16 | @Override 17 | public Object get(int arg0) { 18 | return obj[arg0]; 19 | } 20 | 21 | @Override 22 | public Object get(String arg0) { 23 | // TODO Auto-generated method stub 24 | return null; 25 | } 26 | 27 | @Override 28 | public Long getBigint(int arg0) { 29 | // TODO Auto-generated method stub 30 | return null; 31 | } 32 | 33 | @Override 34 | public Long getBigint(String arg0) { 35 | // TODO Auto-generated method stub 36 | return null; 37 | } 38 | 39 | @Override 40 | public Boolean getBoolean(int arg0) { 41 | // TODO Auto-generated method stub 42 | return null; 43 | } 44 | 45 | @Override 46 | public Boolean getBoolean(String arg0) { 47 | // TODO Auto-generated method stub 48 | return null; 49 | } 50 | 51 | @Override 52 | public byte[] getBytes(int arg0) { 53 | // TODO Auto-generated method stub 54 | return null; 55 | } 56 | 57 | @Override 58 | public byte[] getBytes(String arg0) { 59 | // TODO Auto-generated method stub 60 | return null; 61 | } 62 | 63 | @Override 64 | public int getColumnCount() { 65 | // TODO Auto-generated method stub 66 | return 0; 67 | } 68 | 69 | @Override 70 | public Column[] getColumns() { 71 | // TODO Auto-generated method stub 72 | return null; 73 | } 74 | 75 | @Override 76 | public Date getDatetime(int arg0) { 77 | // TODO Auto-generated method stub 78 | return null; 79 | } 80 | 81 | @Override 82 | public Date getDatetime(String arg0) { 83 | // TODO Auto-generated method stub 84 | return null; 85 | } 86 | 87 | @Override 88 | public Double getDouble(int arg0) { 89 | // TODO Auto-generated method stub 90 | return null; 91 | } 92 | 93 | @Override 94 | public Double getDouble(String arg0) { 95 | // TODO Auto-generated method stub 96 | return null; 97 | } 98 | 99 | @Override 100 | public String getString(int arg0) { 101 | // TODO Auto-generated method stub 102 | return null; 103 | } 104 | 105 | @Override 106 | public String getString(String arg0) { 107 | // TODO Auto-generated method stub 108 | return null; 109 | } 110 | 111 | @Override 112 | public void set(Object[] arg0) { 113 | obj = arg0; 114 | 115 | } 116 | 117 | @Override 118 | public void set(int arg0, Object arg1) { 119 | obj[arg0] = arg1; 120 | 121 | } 122 | 123 | @Override 124 | public void set(String arg0, Object arg1) { 125 | // TODO Auto-generated method stub 126 | 127 | } 128 | 129 | @Override 130 | public void setBigint(int arg0, Long arg1) { 131 | // TODO Auto-generated method stub 132 | 133 | } 134 | 135 | @Override 136 | public void setBigint(String arg0, Long arg1) { 137 | // TODO Auto-generated method stub 138 | 139 | } 140 | 141 | @Override 142 | public void setBoolean(int arg0, Boolean arg1) { 143 | obj[arg0] = arg1; 144 | 145 | } 146 | 147 | @Override 148 | public void setBoolean(String arg0, Boolean arg1) { 149 | // TODO Auto-generated method stub 150 | 151 | } 152 | 153 | @Override 154 | public void setDatetime(int arg0, Date arg1) { 155 | // TODO Auto-generated method stub 156 | 157 | } 158 | 159 | @Override 160 | public void setDatetime(String arg0, Date arg1) { 161 | // TODO Auto-generated method stub 162 | 163 | } 164 | 165 | @Override 166 | public void setDouble(int arg0, Double arg1) { 167 | // TODO Auto-generated method stub 168 | 169 | } 170 | 171 | @Override 172 | public void setDouble(String arg0, Double arg1) { 173 | // TODO Auto-generated method stub 174 | 175 | } 176 | 177 | @Override 178 | public void setString(int arg0, String arg1) { 179 | obj[arg0] = arg1; 180 | } 181 | 182 | @Override 183 | public void setString(String arg0, String arg1) { 184 | // TODO Auto-generated method stub 185 | 186 | } 187 | 188 | @Override 189 | public void setString(int arg0, byte[] arg1) { 190 | // TODO Auto-generated method stub 191 | 192 | } 193 | 194 | @Override 195 | public void setString(String arg0, byte[] arg1) { 196 | // TODO Auto-generated method stub 197 | 198 | } 199 | 200 | @Override 201 | public Object[] toArray() { 202 | // TODO Auto-generated method stub 203 | return obj; 204 | } 205 | 206 | } 207 | -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Decoder.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | public class Decoder { 7 | List<byte[]> decode(byte[] input) { 8 | List<byte[]> result = new LinkedList<byte[]>(); 9 | return result; 10 | } 11 | } -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Gao.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileInputStream; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.io.InputStreamReader; 8 | 9 | import com.aliyun.odps.data.Record; 10 | import com.aliyun.odps.mapred.Mapper; 11 | import com.google.gson.Gson; 12 | 13 | import chaitin.webshell.WebshellDetector; 14 | import chaitin.webshell.WebshellMapper; 15 | import chaitin.webshell.parser.AspScore; 16 | import chaitin.webshell.parser.PhpScore; 17 | 18 | public class Gao { 19 | 20 | static String output_file_name = "/tmp/out.txt"; 21 | static int count = 0; 22 | 23 | public static void map_file(String input_file_name, Mapper mapper) throws IOException { 24 | //* 25 | new FileWriter(output_file_name).close(); 26 | BufferedReader br = new BufferedReader(new InputStreamReader( 27 | new FileInputStream(input_file_name))); 28 | String line; 29 | Gson gson = new Gson(); 30 | while ((line = br.readLine()) != null) { 31 | String[] item = gson.fromJson(line, String[].class); 32 | ChaitinRecord record = new ChaitinRecord(item); 33 | mapper.map(0, record, null); 34 | //break; 35 | } 36 | br.close(); 37 | //*/ 38 | } 39 | 40 | public static void dump(Object[] item) throws IOException { 41 | //* 42 | Gson gson = new Gson(); 43 | String result = gson.toJson(item); 44 | System.out.println(result); 45 | FileWriter fw = new FileWriter(output_file_name, true); 46 | fw.write(result + "\n"); 47 | fw.close(); 48 | //System.exit(0); 49 | count += 1; 50 | //*/ 51 | } 52 | 53 | public static void main(String[] args) throws Exception { 54 | WebshellMapper mapper = new WebshellMapper(); 55 | 56 | map_file("/Users/Monster/Documents/webshell.in.json", mapper); 57 | System.out.println("count: " + count); 58 | System.out.println("precision: " + (mapper._tp/(mapper._tp + mapper._fp))); 59 | System.out.println("recall: " + (mapper._tp/(mapper._tp + mapper._fn))); 60 | 61 | boolean b = WebshellDetector.isWebshell("", "1\u003d%40eval%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FP%27.%27OST%27%7D%5Bz9%5D%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FPOS%27.%27T%27%7D%5Bz0%5D%29%29%3B\u0026z0\u003dNTk1NTQ2O0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjA"); 62 | System.out.println(b); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Multipart.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | public class Multipart extends Decoder { 7 | @Override 8 | List<byte[]> decode(byte[] input) { 9 | List<byte[]> result = new LinkedList<byte[]>(); 10 | return result; 11 | } 12 | /* 13 | static List<Pair<byte[], byte[]>> multipart(byte[] input) { 14 | List<Pair<byte[], byte[]>> result = new LinkedList<Pair<byte[], byte[]>>(); 15 | int pos = Multipart.accept(input, 0, "--".getBytes()); 16 | if (pos == 0) { 17 | return result; 18 | } 19 | int pos_boundary = pos; 20 | while (pos < input.length && input[pos] != '\r' && input[pos] != '\n') { 21 | pos += 1; 22 | } 23 | byte[] boundary = Arrays.copyOfRange(input, pos_boundary, pos); 24 | return result; 25 | } 26 | 27 | static int accept(byte[] input, int pos, byte[] sub) { 28 | int i = 0; 29 | for (i = 0; i < sub.length && pos + i < input.length && sub[i] == input[pos + i]; ++i) { 30 | ; 31 | } 32 | return i == sub.length ? pos + i : 0; 33 | } 34 | 35 | static int accept_line(byte[] input, int pos) { 36 | 37 | }*/ 38 | } -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Pair.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | public class Pair<T1, T2> { 4 | 5 | public T1 first; 6 | public T2 second; 7 | 8 | 9 | public Pair() { 10 | super(); 11 | } 12 | 13 | public Pair(T1 first, T2 second) { 14 | super(); 15 | this.first = first; 16 | this.second = second; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/ParseUrl.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.net.MalformedURLException; 4 | import java.net.URL; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | 8 | public class ParseUrl extends Decoder { 9 | public static class Url { 10 | public byte [] protocol; 11 | public byte [] host; 12 | public int port; 13 | public byte [] path; 14 | public byte [] query; 15 | } 16 | 17 | @Override 18 | List<byte[]> decode(byte[] input) { 19 | List<byte[]> result = new LinkedList<byte[]>(); 20 | return result; 21 | } 22 | 23 | public static Url parse_url(byte[] input) { 24 | Url url = new Url(); 25 | try { 26 | URL _url = new URL(new String(input)); 27 | url.protocol = _url.getProtocol().getBytes(); 28 | url.host = _url.getHost().getBytes(); 29 | url.port = _url.getPort(); 30 | url.path = _url.getPath().getBytes(); 31 | url.query = _url.getQuery().getBytes(); 32 | } catch (MalformedURLException e) { 33 | e.printStackTrace(); 34 | } 35 | return url; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/QueryString.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.util.Arrays; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | public class QueryString extends Decoder{ 8 | 9 | @Override 10 | List<byte[]> decode(byte[] input) { 11 | List<byte[]> result = new LinkedList<byte[]>(); 12 | return result; 13 | } 14 | 15 | public static List<Pair<byte[], byte[]>> query_string(byte[] input) { 16 | List<Pair<byte[], byte[]>> result = new LinkedList<Pair<byte[], byte[]>>(); 17 | int pos = 0; 18 | while (pos < input.length) { 19 | int pos_key_start = pos; 20 | 21 | while (pos < input.length 22 | && input[pos] != '=' 23 | && input[pos] != '&') { 24 | pos += 1; 25 | } 26 | int pos_key_end = pos; 27 | 28 | int pos_value_start = 0; 29 | int pos_value_end = 0; 30 | 31 | if (pos < input.length 32 | && input[pos] == '=') { 33 | pos += 1; 34 | pos_value_start = pos; 35 | while (pos < input.length 36 | && input[pos] != '&') { 37 | pos += 1; 38 | } 39 | pos_value_end = pos; 40 | } 41 | 42 | if (pos < input.length && input[pos] == '&') { 43 | pos += 1; 44 | } 45 | 46 | byte[] key = Unquote.unquote(Arrays.copyOfRange(input, pos_key_start, pos_key_end)); 47 | byte[] value = Unquote.unquote(Arrays.copyOfRange(input, pos_value_start, pos_value_end)); 48 | if (key.length != 0 || value.length != 0) { 49 | Pair<byte[], byte[]> kv_pair = new Pair<byte[], byte[]>(key, value); 50 | result.add(kv_pair); 51 | } 52 | } 53 | return result; 54 | } 55 | } -------------------------------------------------------------------------------- /src/main/java/chaitin/utils/Unquote.java: -------------------------------------------------------------------------------- 1 | package chaitin.utils; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | public class Unquote extends Decoder { 8 | @Override 9 | List<byte[]> decode(byte[] input) { 10 | List<byte[]> result = new LinkedList<byte[]>(); 11 | return result; 12 | } 13 | 14 | public static byte[] unquote(byte[] input) { 15 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 16 | int pos = 0; 17 | while (pos < input.length) { 18 | if (pos + 2 < input.length 19 | && input[pos] == '%' 20 | && isxdigit(input[pos + 1]) 21 | && isxdigit(input[pos + 2])) { 22 | output.write(Unquote.ord(input[pos + 1], input[pos + 2])); 23 | pos += 3; 24 | } else if (pos + 5 < input.length 25 | && input[pos] == '%' 26 | && (input[pos + 1] == 'u' || input[pos + 1] == 'U' ) 27 | && isxdigit(input[pos + 2]) 28 | && isxdigit(input[pos + 3]) 29 | && isxdigit(input[pos + 4]) 30 | && isxdigit(input[pos + 5])) { 31 | if (input[pos + 2] != '0' || input[pos + 3] != '0') { 32 | output.write(Unquote.ord(input[pos + 2], input[pos + 3])); 33 | } 34 | output.write(Unquote.ord(input[pos + 4], input[pos + 5])); 35 | pos += 6; 36 | } else if (pos < input.length && input[pos] == '+') { 37 | output.write(' '); 38 | pos += 1; 39 | } else { 40 | output.write(input[pos]); 41 | pos += 1; 42 | } 43 | } 44 | return output.toByteArray(); 45 | } 46 | 47 | static boolean isxdigit(byte b) { 48 | return (b >= '0' && b <= '9') 49 | || (b >= 'a' && b <= 'f') 50 | || (b >= 'A' && b <= 'F'); 51 | } 52 | 53 | static byte ord(byte a, byte b) { 54 | byte r = 0; 55 | if (a >= '0' && a <= '9') { 56 | a = (byte)(a & 0x0F); 57 | } else { 58 | a = (byte)((a & 0x0F) + 9); 59 | } 60 | if (b >= '0' && b <= '9') { 61 | b = (byte)(b & 0x0F); 62 | } else { 63 | b = (byte)((b & 0x0F) + 9); 64 | } 65 | return (byte)((a << 4) | b); 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/WebshellDetector.java: -------------------------------------------------------------------------------- 1 | package chaitin.webshell; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import chaitin.utils.Base64; 8 | import chaitin.utils.Pair; 9 | import chaitin.utils.QueryString; 10 | import chaitin.utils.Unquote; 11 | import chaitin.webshell.parser.AspScore; 12 | import chaitin.webshell.parser.PhpScore; 13 | 14 | public class WebshellDetector { 15 | 16 | static double thres_hold = 2.0; 17 | 18 | static Map<String, Double> sensitive_key = new HashMap<String, Double>() { 19 | private static final long serialVersionUID = 97314362015453784L; 20 | 21 | { 22 | put("z", 1.0); 23 | put("z0", 1.5); 24 | put("z1", 1.0); 25 | put("z2", 1.0); 26 | put("caidao", 1.8); 27 | put("mb", 0.8); 28 | put("hk715", 1.2); 29 | put("xise", 1.5); 30 | put("diaosi", 1.0); 31 | }}; 32 | 33 | 34 | public static double scorePhp_one(byte[] payload) { 35 | double score = new PhpScore(payload).score(); 36 | byte[] payload_base64 = Base64.decode_base64(payload); 37 | double score_base64_decoded = new PhpScore(payload_base64).score(); 38 | if (score_base64_decoded > 0.8) { 39 | score_base64_decoded += 0.3; 40 | } 41 | return score > score_base64_decoded ? score : score_base64_decoded; 42 | } 43 | 44 | public static double scorePhp(byte[] payload) { 45 | String s = new String(payload); 46 | double score = scorePhp_one(payload); 47 | /*if (s.indexOf('"') + 1 < payload.length) { 48 | double tscore = scorePhp_one(s.substring(s.indexOf('"') + 1).getBytes()); 49 | score = score > tscore ? score : tscore; 50 | } 51 | if (s.indexOf('\'') + 1 < payload.length) { 52 | double tscore = scorePhp_one(s.substring(s.indexOf('\'') + 1).getBytes()); 53 | score = score > tscore ? score : tscore; 54 | }*/ 55 | return score; 56 | } 57 | 58 | public static double scoreAsp(byte[] payload) { 59 | double score = new AspScore(payload).score(); 60 | byte[] payload_base64 = Base64.decode_base64(payload); 61 | byte[] payload_without_op = AspScore.filter_strop(payload); 62 | double score_without_op = new AspScore(payload_without_op).score(); 63 | double score_base64_decoded = new AspScore(payload_base64).score(); 64 | if (score_base64_decoded > 0.8) { 65 | score_base64_decoded += 0.3; 66 | } 67 | score = score > score_base64_decoded ? score : score_base64_decoded; 68 | return score; 69 | } 70 | 71 | public static double score_key(byte[] payload) { 72 | String s = new String(payload).toLowerCase(); 73 | if (sensitive_key.containsKey(s)) { 74 | return sensitive_key.get(s); 75 | } 76 | double score = 0.0; 77 | for (char c: s.toCharArray()) { 78 | if (!Character.isLetter(c) 79 | && !Character.isDigit(c) 80 | && c != '_' 81 | && c != '$') { 82 | score -= 0.5; 83 | } 84 | } 85 | return score; 86 | } 87 | 88 | public static double score(byte[] payload) { 89 | //return WebshellTokenizer.scoreTokens(new String(payload)); 90 | double score_php = scorePhp(payload); 91 | double score_asp = scoreAsp(payload); 92 | return score_php > score_asp ? score_php : score_asp; 93 | } 94 | 95 | public static boolean isWebshell(String uri, String data) { 96 | 97 | List<Pair<byte[], byte[]>> plist = QueryString.query_string(data.getBytes()); 98 | 99 | for (Pair<byte[], byte[]> p : plist) { 100 | byte[] key = Unquote.unquote(p.first); 101 | byte[] value = Unquote.unquote(p.second); 102 | 103 | if (score(key) >= thres_hold) { 104 | return true; 105 | } 106 | if (score_key(key) + score(value) >= thres_hold) { 107 | return true; 108 | } 109 | } 110 | /* 111 | uri = uri.substring(uri.indexOf('?') + 1); 112 | 113 | if (uri.length() > 1) { 114 | plist = QueryString.query_string(uri.getBytes()); 115 | for (Pair<byte[], byte[]> p : plist) { 116 | 117 | byte[] key = Unquote.unquote(p.first); 118 | byte[] value = Unquote.unquote(p.second); 119 | 120 | if (score(key) >= thres_hold) { 121 | return true; 122 | } 123 | if (score(value) >= thres_hold) { 124 | return true; 125 | } 126 | } 127 | }*/ 128 | return false; 129 | } 130 | 131 | public static void main(String[] args) throws Exception { 132 | // List<Pair<byte[], byte[]>> plist = QueryString.query_string("/44b676ed1a4a6ea7ba0918cf05093f1d/f9a1e3cd54ace2b54024e1b21a7637ab?_timestamp_=rc_time_grit_hour_one".getBytes()); 133 | 134 | //String s = "ysh=execute(\"response.clear:response.write(\"\"jinlaile\"\"):response.end\")"; 135 | //System.out.println(isWebshell("", s) == true); 136 | 137 | //s = "xiaoliang=Execute(\"Execute(\"\"On+Error+Resume+Next:Function+bd%28byVal+s%29%3AFor+i%3D1+To+Len%28s%29+Step+2%3Ac%3DMid%28s%2Ci%2C2%29%3AIf+IsNumeric%28Mid%28s%2Ci%2C1%29%29+Then%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26%22%22%22%22%29%22%22%22%22%29%3AElse%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26Mid%28s%2Ci%2B2%2C2%29%26%22%22%22%22%29%22%22%22%22%29%3Ai%3Di%2B2%3AEnd+If%22%22%26chr%2810%29%26%22%22Next%3AEnd+Function:Response.Write(\"\"\"\"#onewordbackdoor->|\"\"\"\"):Execute(\"\"\"\"On+Error+Resume+Next:\"\"\"\"%26bd(\"\"\"\"44696D20533A533D5365727665722E4D61707061746828222E2229266368722839293A53455420433D4372656174654F626A6563742822536372697074696E672E46696C6553797374656D4F626A656374"; 138 | //System.out.println(isWebshell("", s) == true); 139 | 140 | //s = "sd=Execute++++++++++++++++++++++++++++++(\"++++++++++++++++++++++++++++++Execute++++++++++++++++++++++++++++++(\"\"++++++++++:Function+bd%28byVal+s%29%3AFor+i%3D1+To+Len%28s%29+Step+2%3Ac%3DMid%28s%2Ci%2C2%29%3AIf+IsNumeric%28Mid%28s%2Ci%2C1%29%29+Then%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26%22%22%22%22%29%22%22%22%22%29%3AElse%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26Mid%28s%2Ci%2B2%2C2%29%26%22%22%22%22%29%22%22%22%22%29%3Ai%3Di%2B2%3AEnd+If%22%22%26chr%2810%29%26%22%22Next%3AEnd+Function:Response.Write(\"\"\"\"->|\"\"\"\"):++++++++"; 141 | //System.out.println(isWebshell("", s) == true); 142 | 143 | //s = "cmd=%40eval%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FP%27.%27OST%27%7D%5Bz9%5D%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FPOS%27.%27T%27%7D%5Bz0%5D%29%29%3B&z0=Nzc0MTEwO0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZnVuY3Rpb24gc2V0X3dyaXRlYWJsZSgkZmlsZV9uYW1lKXtpZihAY2htb2QoJGZpbGVfbmFtZSxiYXNlX2NvbnZlcnQoYmFzZTY0X2RlY29kZSgkX1BPU1RbIngyIl0pLDgsMTApKSl7ZWNobyAiMSI7fWVsc2V7ZWNobyAiLTEiO319c2V0X3dyaXRlYWJsZShiYXNlNjRfZGVjb2RlKCRfUE9TVFsieDEiXSkpO2VjaG8oInw8LSIpOztkaWUoKTs%3D&x1=RDovd2Vic2l0ZXMveGluc2p6LmNvbS9wdWJsaWNfaHRtbC9uZXdzLzA0MDM0MjY0Lmh0bWw%3D&x2=MDY2Ng%3D%3D&z9=BaSE64%5FdEcOdE"; 144 | //System.out.println(isWebshell("", s) == true); 145 | 146 | //String s = "z0=NTYwNjQ4O0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZWNobyBAZndyaXRlKGZvcGVuKGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MSJdKSwidyIpLGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MiJdKSk%"; 147 | //System.out.println(isWebshell("", s) == true); 148 | /* 149 | BufferedReader br = new BufferedReader(new InputStreamReader( 150 | new FileInputStream("/tmp/z0.post_data"))); 151 | int c = 0; 152 | for (String line = br.readLine(); line != null; line = br.readLine()) { 153 | if (!isWebshell("", line)) { 154 | System.out.println(line); 155 | c += 1; 156 | if (c > 20) { 157 | break; 158 | } 159 | } 160 | } 161 | br.close();*/ 162 | } 163 | 164 | } 165 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/WebshellMapper.java: -------------------------------------------------------------------------------- 1 | package chaitin.webshell; 2 | import java.io.IOException; 3 | import com.aliyun.odps.data.Record; 4 | import com.aliyun.odps.mapred.Mapper; 5 | 6 | import chaitin.utils.Gao; 7 | 8 | public class WebshellMapper implements Mapper { 9 | 10 | public double _tp, _fp, _fn; 11 | 12 | public void setup(TaskContext context) throws IOException { 13 | 14 | } 15 | 16 | 17 | public void map(long recordNum, Record record, TaskContext context) throws IOException { 18 | String id = (String) record.get(0); 19 | String uri = (String) record.get(1); 20 | String data = (String) record.get(2); 21 | Boolean result = WebshellDetector.isWebshell(uri, data); 22 | if (context != null) { 23 | if (result) { 24 | Record result_record = context.createOutputRecord(); 25 | result_record.set("id", id); 26 | context.write(result_record); 27 | } 28 | } else { 29 | if (!record.get(3).toString().equals("true") && !record.get(3).toString().equals("false")) { 30 | System.out.println("err! "); 31 | System.exit(0); 32 | } 33 | if (result.toString().equals("true") && record.get(3).toString().equals("true")) { 34 | _tp += 1; 35 | } 36 | if (result.toString().equals("true") && record.get(3).toString().equals("false")) { 37 | _fp += 1; 38 | } 39 | if (result.toString().equals("false") && record.get(3).toString().equals("true")) { 40 | _fn += 1; 41 | } 42 | 43 | if (!result.toString().equals(record.get(3))) { 44 | Gao.dump(record.toArray()); 45 | } 46 | } 47 | } 48 | 49 | public void cleanup(TaskContext context) throws IOException { 50 | 51 | } 52 | 53 | 54 | public static void main(String[] args) throws Exception { 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/WebshellTokenizer.java: -------------------------------------------------------------------------------- 1 | package chaitin.webshell; 2 | 3 | import java.util.HashMap; 4 | import java.util.HashSet; 5 | import java.util.Iterator; 6 | import java.util.Map; 7 | import java.util.Map.Entry; 8 | 9 | import chaitin.utils.Base64; 10 | 11 | public class WebshellTokenizer { 12 | 13 | // no _ 14 | private static final String separators = "[`~!@#$%^&*()+=\\-{}\\[\\]|\\:;\"<>',.?/\\\\ \n\r\t]"; 15 | 16 | private static final HashMap<String, Integer> tokenScore = new HashMap<String, Integer>() { 17 | private static final long serialVersionUID = 5401942617951940220L; 18 | { 19 | //put("assert", 1); 20 | //put("allowstaticmethodaccess", 1); 21 | //put("array", 1); 22 | put("array_map", 2); 23 | put("base64_decode", 2); 24 | put("catch", 1); 25 | put("chr", 1); 26 | put("create_function", 1); 27 | put("display_errors", 2); 28 | put("echo", 1); 29 | put("encoding", 2); 30 | put("eval", 2); 31 | put("exception", 2); 32 | put("execute", 1); 33 | put("exit", 1); 34 | put("frombase64string", 2); 35 | put("getencoding", 2); 36 | put("getinputstream", 2); 37 | put("getrequest", 2); 38 | put("getwriter", 2); 39 | put("gzinflate", 2); 40 | put("ini_set", 2); 41 | put("isnumeri", 2); 42 | put("md5", 1); 43 | put("phpinfo", 2); 44 | put("preg_replace", 2); 45 | put("println", 2); 46 | put("response", 1); 47 | put("servletactioncontext", 1); 48 | put("streamconnector", 1); 49 | put("system", 1); 50 | put("write", 1); 51 | put("methodaccessor", 1); 52 | put("_cookie", 1); 53 | put("_get", 1); 54 | put("_post", 1); 55 | put("_request", 1); 56 | put("_server", 1); 57 | }}; 58 | 59 | private static final HashMap<String, Integer> dangerousTokenScore = new HashMap<String, Integer>() { 60 | private static final long serialVersionUID = 4464369449273331205L; 61 | { 62 | put("assert", 1); 63 | put("allowstaticmethodaccess", 1); 64 | put("base64_decode", 2); 65 | put("create_function", 1); 66 | put("display_errors", 2); 67 | put("eval", 2); 68 | put("frombase64string", 2); 69 | put("getencoding", 2); 70 | put("getinputstream", 2); 71 | put("getrequest", 2); 72 | put("getwriter", 2); 73 | put("gzinflate", 2); 74 | put("ini_set", 2); 75 | put("isnumeri", 2); 76 | put("preg_replace", 2); 77 | put("response", 1); 78 | put("servletactioncontext", 1); 79 | put("streamconnector", 1); 80 | //put("try", 1); 81 | put("methodaccessor", 1); 82 | put("_cookie", 2); 83 | put("_get", 2); 84 | put("_post", 2); 85 | put("_request", 2); 86 | put("_server", 2); 87 | }}; 88 | public static int scoreTokens(String input) { 89 | 90 | HashSet<String> used = new HashSet<String>(); 91 | used.clear(); 92 | 93 | int score = 0; 94 | 95 | String[] tokens = input.split(separators); 96 | 97 | for (int i = 0; i < tokens.length; i++) { 98 | tokens[i] = tokens[i].toLowerCase(); 99 | if (!used.contains(tokens[i]) && tokenScore.containsKey(tokens[i])) { 100 | score += (Integer)tokenScore.get(tokens[i]); 101 | used.add(tokens[i]); 102 | } 103 | } 104 | 105 | // Next is merge score. 106 | int mergeScore = 0; 107 | String mergeString = String.join("", tokens); 108 | 109 | Iterator<Entry<String, Integer>> iter = dangerousTokenScore.entrySet().iterator(); 110 | while (iter.hasNext()) { 111 | Map.Entry entry = (Map.Entry) iter.next(); 112 | String key = (String)entry.getKey(); 113 | int val = (Integer)entry.getValue(); 114 | if (mergeString.indexOf(key) >= 0) { 115 | mergeScore += val; 116 | } 117 | } 118 | return Math.max(score, mergeScore); 119 | } 120 | 121 | public static int webShellScore(String input) { 122 | String decodedInput = new String(Base64.decode_base64(input.getBytes())); 123 | return Math.max(scoreTokens(input), scoreTokens(decodedInput)); 124 | } 125 | 126 | public static void main(String[] args) { 127 | 128 | String s = "2`3~4$1!5@6#7$8%9^8&7*6(5)4+2=1-2{3}4[5]6|7:8;9\"8<7>6'5,4.3?2/1\\0 1\n2\r3\t4"; 129 | String s1 = "mb=Response.Write(\"------>|\");var err:Exception;try{+++++++++++++++++++++++++eval++++++++++++++++++++(System.Text.Encoding.GetEncoding(936).GetString(System.Convert.FromBase64String(\"UmVzcG9uc2UuV3JpdGUoImhhb3JlbmdlLmNvbVFRMzE3Mjc1NzM4Iik7\")),\"unsafe\");}catch(err){Response.Write(\"ERROR:// \"+err.essage);}Response.Write(\"|<----\");Response.End();"; 130 | String s2 = "wysiwyg=1&subject=2014-3-12-783070+new+balance+sneakers+e.mq.txf.jsp&message=Now+that+the+wind+fairy+emperor+say%2C+equal+opportunity+for+everyone+to.+As+long+as+you+have+fairy+crystal%2C%5Burl%3Dhttp%3A%2F%2Fnewbalancesneakers1.snack.ws%2F%5Dnew+balance+sneakers%5B%2Furl%5D%2C+you+can+switch+to+the+best+training+resources%2C+or+top+fairy.+For+a+time+the+square+the+atmosphere+again+warm+up%2C%5Burl%3Dhttp%3A%2F%2Flouisvuittoniphone5case.snack.ws%2F%5DLouis+vuitton+iPhone+5+Case%5B%2Furl%5D%2C+many+have+a+large+number+of+fairy+crystal+of+a+person+already+in+the+itch+for+a+try%2C%5Burl%3Dhttp%3A%2F%2Fgucciipadminicase.snack.ws%2F%5DGucci+iPad+Mini+Case%5B%2Furl%5D%2C+some+of+them+even+start+together%2C+ready+to+be+fairy+crystal+together+to+buy+top+class+fairy.+That+the+wind+fairy+emperor+in+response+to+the+following+is+nodded+with+satisfaction%2C+and+continued%3A+%22please+one+thousand+one+hundred+to+participate+in+the+small+area+of+the+elite+stage......%22+With+that+wind+fairy+emperor+voice+down%2C+one+thousand+one+hundred+have+the+jade+big+Luo+Xian+went+on+stage.+Although+it+is+one+thousand+one+hundred+people%2C+but+in+the+huge+platform%2C+still+appear+some+empty.+The+thistle+Hun+stood+on+the+platform+of+God+consciousness+and+sweep%2C+she+soon+went+up%2C+she+did+not+see+brother+leaves.+Visible+brother+not+jade%2C+should+leave+Hong+Yuxian+city.+Before+she+was+to+protect+his%2C+just+know+ye+silent+leave+Hong+Yuxian+city+is+already+a+few+days.+%22Now+please+Dragon+River+emperor+said+into+the+small+area+rule%2C+at+the+same+time+about+what+people+can+not+enter+the+small+area.%22+That+the+wind+fairy+emperor+finished%2C+immediately+stand+aside.+As+one+of+the+Great+Dragon+River+four%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmcB8%5Dnew+balance+tilbud%5B%2Furl%5D%2C+looks+very+ordinary%2C+or+even+a+lost+in+the+crowd+which+are+not+to+be+found+in+middle-aged+men.+He+walked+out%2C+smiled%2C+looked+very+nice.+But+here+all+know%2C+the+dragon+river+is+one+of+the+four+emperor%2C+he+be+a+slap+in+the+face+can+crush+dozens+of+sin+city%2C+and+even+a+domain+could+not+help+his+toss.+The+great+dragon+river+walk+out+just+smiled%2C+square+it+seems+everyone+felt+the+gentle+temperament+of+the+great+dragon+river.+%22Into+the+small+area+is+not+easy%2C+we+need+ten+fairy+emperor+also+open+void+gap%2C+it+can+barely+go%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmGgY%5DGucci+iPad+Mini+Case%5B%2Furl%5D%2C+so+limited.+To+be+fair%2C+there+are+people+carrying+space+world%2C+please+stand+up%2C+otherwise+it+will+confiscate+directly+lose+access+to+qualified+jade.%22+Dragon+River%2C+words+like+hammers+generally+play+in+some+carry+small+world%2C+or+carry+the+rest+space+to+cheat+people.+Although+the+Dragon+River+emperor+said+very+tactful%2C%5Burl%3Dhttp%3A%2F%2Fnewbalancetilbud.snack.ws%2F%5Dnew+balance+tilbud%5B%2Furl%5D%2C+but+everyone+knows+the+meaning+of+dragon+river.+%28second+also+sent%2C+thank+gold+teeth+2013%2C+I+1230%2C+skdavid%2C+dust+tear+a+few+friends+continuous+million+dollars+ticket%21%29+%28to+be+continued...+The+first+seven+eight+four+chapters%29+into+small+areas+of+full+text+updates%2C+TXT+download%2C+as+in+the+novel+the+great+Knight+http%3A%2F%2Fwww.xs74.com%2F+dragon+river+in+table+one+thousand+one+hundred+big+Luo+Xian+swept+past%2C+then+calmly+said%3A+%22now+please+carry+space+world+people+hand+over+world+space+of+their+own%2C%5Burl%3Dhttp%3A%2F%2Fnewbalance576sko.snack.ws%2F%5Dnew+balance+576+Sko%5B%2Furl%5D%2C+also+line+up+through+the+artifact+under+the+door......%22+Unequal+Dragon+River+emperor+will+finish%2C+seven+or+eight+big+Luo+Xian+has+active+flying+down%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmE0p%5DBurberry+iPhone+5s+Case%5B%2Furl%5D%2C+will+own+small+world+to+his+martial+art+in+people+or+acquaintance.+Love+Xian+Chun+Qu%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.988good.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D288017%26fromuid%3D52970%5Dhttp%3A%2F%2Fwww.988good.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D288017%26fromuid%3D52970%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.fadianzhan.com%2Fbbs%2Fhome.php%3Fmod%3Dspace%26uid%3D22628%26do%3Dblog%26quickforward%3D1%26id%3D245050%5Dhttp%3A%2F%2Fwww.fadianzhan.com%2Fbbs%2Fhome.php%3Fmod%3Dspace%26uid%3D22628%26do%3Dblog%26quickforward%3D1%26id%3D245050%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.hongniangxiehui.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D180733%5Dhttp%3A%2F%2Fwww.hongniangxiehui.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D180733%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.cuplf.com%2Fthread-14818-1-1.html%5Dhttp%3A%2F%2Fwww.cuplf.com%2Fthread-14818-1-1.html%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.xiaoxuexiao.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1749007%26fromuid%3D127389%5Dhttp%3A%2F%2Fwww.xiaoxuexiao.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1749007%26fromuid%3D127389%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Figirl.chinatimes.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D61649%26extra%3D%5Dhttp%3A%2F%2Figirl.chinatimes.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D61649%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fbbxyky.tk%2Fthread-154997-1-1.html%5Dhttp%3A%2F%2Fbbxyky.tk%2Fthread-154997-1-1.html%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fmoshanghua.66rt.com%2Fviewthread.php%3Ftid%3D10070%26extra%3D%5Dhttp%3A%2F%2Fmoshanghua.66rt.com%2Fviewthread.php%3Ftid%3D10070%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fn.800tuan.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D76418%5Dhttp%3A%2F%2Fn.800tuan.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D76418%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fbbs1.imotor.com%2Fviewthread.php%3Ftid%3D276%26extra%3D%5Dhttp%3A%2F%2Fbbs1.imotor.com%2Fviewthread.php%3Ftid%3D276%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fforums.webcrow.jp%2Fforum.php%3Fmod%3Dviewthread%26tid%3D2120301%5Dhttp%3A%2F%2Fforums.webcrow.jp%2Fforum.php%3Fmod%3Dviewthread%26tid%3D2120301%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fsuofeiya520.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1010284%5Dhttp%3A%2F%2Fsuofeiya520.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1010284%5B%2Furl%5D&save=&formhash=f50c6624&sortid=1&uploadalbum=-2&allownoticeauthor=1&addfeed=1&usesig=1&newalbum=%C7%EB%CA%E4%C8%EB%CF%E0%B2%E1%C3%FB%B3%C6&posttime=1394591880"; 131 | String[] r = s2.split(separators); 132 | for (int i = 0; i< r.length; i++) { 133 | if (tokenScore.containsKey(r[i]) && (Integer)tokenScore.get(r[i]) > 0) { 134 | System.out.println("[" + r[i] + "]"); 135 | } 136 | } 137 | 138 | System.out.println(1 + (Integer)tokenScore.get("eval")); 139 | try { 140 | //System.out.println(new String(Base64.getDecoder().decode("NzU3MjIyO0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZWNobyBAZndyaXRlKGZvcGVuKGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MSJdKSwidyIpLGJhc2fjkdsalfjas8*&(*2NF9kZWNvZGUoJF9QT1NUWyJ6MiJdKSk/IjEiOiIwIjtlY2hvKCJ8PC0iKTs7ZGllKCk7"))); 141 | } catch (Exception e) { 142 | System.out.println("error"); 143 | } 144 | 145 | System.out.println("AbcDEF".toLowerCase()); 146 | String ts = ""; 147 | System.out.println(webShellScore(ts)); 148 | 149 | String ss[] = new String[]{"abc", "123", "*(*()"}; 150 | System.out.println(String.join("", ss)); 151 | 152 | Iterator iter = tokenScore.entrySet().iterator(); 153 | while (iter.hasNext()) { 154 | Map.Entry entry = (Map.Entry) iter.next(); 155 | String key = (String)entry.getKey(); 156 | int val = (Integer)entry.getValue(); 157 | System.out.println(key + " " + val); 158 | } 159 | 160 | ts = "array_map(\"ass\".\"ert\",array(\"ev\".\"Al(\\\"\\\\\\$xx%3D\\\\\\\"Ba\".\"SE6\".\"4_dEc\""; 161 | System.out.println(scoreTokens(ts)); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/parser/AspScore.java: -------------------------------------------------------------------------------- 1 | package chaitin.webshell.parser; 2 | 3 | import java.util.*; 4 | 5 | import chaitin.utils.Pair; 6 | 7 | 8 | public class AspScore { 9 | 10 | byte[] data; 11 | double score; 12 | boolean black; 13 | boolean white; 14 | int count_token; 15 | int count_par; 16 | int count_bracket; 17 | int count_brace; 18 | 19 | int ps, p; 20 | 21 | Map<String, Integer> existed_key = new HashMap<String, Integer>(); 22 | 23 | static Map<String, Pair<Double, Integer>> keyword = new HashMap<String, Pair<Double, Integer>>() { 24 | private static final long serialVersionUID = 8287097453483845897L; 25 | 26 | { 27 | put("eval", new Pair<Double, Integer>(1.2, 1)); 28 | put("execute", new Pair<Double, Integer>(1.2, 1)); 29 | put("request", new Pair<Double, Integer>(0.9, 1)); 30 | put("response", new Pair<Double, Integer>(0.9, 1)); 31 | put("exception", new Pair<Double, Integer>(0.8, 1)); 32 | put("chr", new Pair<Double, Integer>(0.4, 4)); 33 | put("write", new Pair<Double, Integer>(0.4, 1)); 34 | put("function", new Pair<Double, Integer>(0.3, 1)); 35 | put("server", new Pair<Double, Integer>(0.2, 1)); 36 | put("settimeout", new Pair<Double, Integer>(0.5, 1)); 37 | put("replace", new Pair<Double, Integer>(0.4, 1)); 38 | put("len", new Pair<Double, Integer>(0.5, 1)); 39 | //put("if", new Pair<Double, Integer>(0.2, 1)); 40 | //put("else", new Pair<Double, Integer>(0.2, 1)); 41 | put("on", new Pair<Double, Integer>(0.2, 1)); 42 | put("error", new Pair<Double, Integer>(0.3, 1)); 43 | put("resume", new Pair<Double, Integer>(0.6, 1)); 44 | put("next", new Pair<Double, Integer>(0.3, 1)); 45 | put("isnumeric", new Pair<Double, Integer>(0.8, 1)); 46 | put("_memberaccess", new Pair<Double, Integer>(-1.0, 1)); 47 | put("setaccessible", new Pair<Double, Integer>(-1.0, 1)); 48 | put("getdeclaredfield", new Pair<Double, Integer>(-1.0, 1)); 49 | put("allowstaticmethodaccess", new Pair<Double, Integer>(-1.0, 1)); 50 | }}; 51 | 52 | 53 | public AspScore(byte[] data) { 54 | this.data = data; 55 | ps = -1; 56 | p = 0; 57 | score = 0.0; 58 | } 59 | 60 | double keyword_score() { 61 | String s = new String(Arrays.copyOfRange(data, ps, p)).toLowerCase(); 62 | //System.out.println(s); 63 | 64 | if (keyword.containsKey(s)) { 65 | if (existed_key.getOrDefault(s, 0) >= keyword.get(s).second) { 66 | return 0.0; 67 | } 68 | existed_key.put(s, existed_key.getOrDefault(s, 0) + 1); 69 | return keyword.get(s).first; 70 | } 71 | return 0.0; 72 | } 73 | 74 | boolean alphabet(byte b) { 75 | return Character.isLetter(b) || Character.isDigit(b) || b == '_'; 76 | } 77 | 78 | public static byte[] filter_strop (byte[] input) { 79 | String s = new String(input); 80 | s = s.replace("\"", ""); 81 | s = s.replace("&", ""); 82 | return s.getBytes(); 83 | } 84 | 85 | public double score() { 86 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) { 87 | score -= 3; 88 | } 89 | while (true) { 90 | if (p >= data.length) { 91 | break; 92 | } 93 | if (alphabet(data[p]) && ps == -1) { 94 | ps = p; 95 | } else if (ps != -1 && !alphabet(data[p])) { 96 | score += keyword_score(); 97 | ps = -1; 98 | } 99 | if (data[p] < 0) { 100 | //score -= 0.1; 101 | } 102 | p += 1; 103 | } 104 | if (ps != -1) { 105 | score += keyword_score(); 106 | } 107 | return score; 108 | } 109 | 110 | public static void main(String[] args) { 111 | System.out.println(new AspScore("a(b(c".getBytes()).score()); 112 | System.out.println(new AspScore("eval($_GET['a']);".getBytes()).score()); 113 | System.out.println(new AspScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score()); 114 | 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/parser/AspTokenizer.rl: -------------------------------------------------------------------------------- 1 | // -*- coding: utf-8 -*- 2 | %%{ 3 | machine php_lexer; 4 | 5 | ws = '\r' | '\n' | ' '; 6 | 7 | integer = ('+'|'-')?[0-9]+; 8 | 9 | main := 10 | |* 11 | integer => { emit(PhpTokenType.Integer, 0.1); }; 12 | ws; 13 | *| 14 | ; 15 | 16 | }%% 17 | 18 | package chaitin.webshell.parser; 19 | 20 | import java.util.*; 21 | 22 | 23 | public class PhpTokenizer { 24 | public enum PhpTokenType { 25 | Integer 26 | } 27 | 28 | public class PhpToken { 29 | public byte[] data; 30 | public PhpTokenType type; 31 | public double wight; 32 | 33 | public PhpToken(byte[] data, PhpTokenType type, double wight) { 34 | this.data = data; 35 | this.type = type; 36 | this.wight = wight; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return new String(data) + wight; 42 | } 43 | } 44 | 45 | List<PhpToken> tokens; 46 | byte[] data; 47 | 48 | int p, pe, eof, te, ts, cs, act; 49 | 50 | %% write data; 51 | 52 | public PhpTokenizer(byte[] data) { 53 | tokens = new LinkedList<PhpToken>(); 54 | this.data = data; 55 | 56 | p = 0; 57 | pe = data.length; 58 | eof = pe; 59 | 60 | %% write init; 61 | } 62 | 63 | public void emit(PhpTokenType type, double weight) { 64 | tokens.add(new PhpToken(Arrays.copyOfRange(data, ts, te), type, weight)); 65 | } 66 | 67 | 68 | public Boolean tokenize() { 69 | %% write exec; 70 | 71 | System.out.println(tokens); 72 | return p == pe; 73 | } 74 | 75 | public static void main(String[] args) { 76 | new PhpTokenizer("123 tést = -100".getBytes()).tokenize(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/parser/PhpScore.java: -------------------------------------------------------------------------------- 1 | 2 | // line 1 "PhpScore.rl" 3 | // -*- coding: utf-8 -*- 4 | 5 | // line 54 "PhpScore.rl" 6 | 7 | 8 | package chaitin.webshell.parser; 9 | 10 | 11 | import chaitin.utils.Base64; 12 | import java.util.Arrays; 13 | import java.util.Map; 14 | import java.util.HashMap; 15 | import java.util.Set; 16 | import java.util.HashSet; 17 | 18 | 19 | 20 | public class PhpScore { 21 | 22 | byte[] data; 23 | boolean black; 24 | boolean white; 25 | int count_token; 26 | int count_var; 27 | int count_svar; 28 | int count_cmt; 29 | int count_par; 30 | int count_bracket; 31 | int count_brace; 32 | 33 | Set<String> existed_key = new HashSet<String>(); 34 | 35 | static Map<String, Double> keyword = new HashMap<String, Double>() { 36 | private static final long serialVersionUID = 6899997024892413801L; 37 | { 38 | 39 | put("PHP_SELF", 1.6); 40 | put("_GET", 1.2); 41 | put("_POST", 1.6); 42 | put("_COOKIE", 1.5); 43 | put("_FILE", 1.2); 44 | put("_ENV", 1.2); 45 | put("_SESSION", 1.5); 46 | put("_REQUEST", 1.5); 47 | put("_SERVER", 1.6); 48 | put("array_map", 1.4); 49 | put("assert", 1.3); 50 | put("array_slice", 1.5); 51 | put("base64_decode", 2.0); 52 | put("base_convert", 1.7); 53 | put("edoced_46esab", 2.0); 54 | put("call_user_func", 1.0); 55 | put("call_user_func_array", 1.0); 56 | put("chr", 0.4); 57 | put("create_function", 0.8); 58 | put("curl_exec", 1.4); 59 | put("curl_multi_exec", 1.4); 60 | put("dirname", 1.0); 61 | put("echo", 1.0); 62 | put("error_reporting", 1.2); 63 | put("eval", 1.0); 64 | put("exec", 1.0); 65 | put("exit", 0.5); 66 | put("file_put_contents", 1.2); 67 | put("gzdecode", 1.2); 68 | put("implode", 1.5); 69 | //put("include", 0.9); 70 | put("include_once", 1.6); 71 | put("ini_set", 1.1); 72 | put("isset", 1.1); 73 | put("movefile", 1.2); 74 | put("ob_start", 1.5); 75 | put("parse_str", 1.5); 76 | put("passthru", 1.8); 77 | put("pcntl_exec", 1.7); 78 | put("phpinfo", 1.9); 79 | put("prege_replace", 1.5); 80 | put("proc_open", 1.7); 81 | //put("require", 0.9); 82 | put("require_once", 1.6); 83 | put("session_start", 1.5); 84 | put("set_magic_quotes_runtime", 1.0); 85 | put("set_time_limit", 1.4); 86 | put("shell_exec", 1.4); 87 | put("str_rot13", 1.2); 88 | put("strrev", 1.1); 89 | put("system", 0.5); 90 | 91 | 92 | put("_memberaccess", -10.0); 93 | put("allowstaticmethodaccess", -10.0); 94 | put("alert", -1.8); 95 | put("document", -1.8); 96 | put("fromcharcode", -10.0); 97 | put("getdeclaredfield", -10.0); 98 | put("parseint", -5.0); 99 | put("println", -4.0); 100 | put("prototype", -5.0); 101 | put("setaccessible", -10.0); 102 | put("string", -1.0); 103 | put("tostring", -4.0); 104 | }}; 105 | 106 | 107 | // line 108 "PhpScore.java" 108 | private static byte[] init__php_lexer_actions_0() 109 | { 110 | return new byte [] { 111 | 0, 1, 1, 1, 2, 1, 3, 1, 6, 1, 7, 1, 112 | 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 113 | 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1, 114 | 20, 1, 21, 1, 22, 1, 24, 1, 25, 1, 26, 1, 115 | 27, 1, 28, 1, 29, 1, 30, 1, 31, 1, 32, 1, 116 | 33, 1, 34, 2, 0, 23, 2, 3, 4, 2, 3, 5 117 | }; 118 | } 119 | 120 | private static final byte _php_lexer_actions[] = init__php_lexer_actions_0(); 121 | 122 | 123 | private static short[] init__php_lexer_key_offsets_0() 124 | { 125 | return new short [] { 126 | 0, 2, 2, 4, 6, 6, 17, 18, 19, 20, 21, 24, 127 | 26, 27, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 128 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 129 | 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 130 | 66, 68, 74, 75, 76, 77, 114, 116, 117, 119, 121, 123, 131 | 134, 137, 137, 144, 148, 150, 155, 161, 163, 164, 171, 178, 132 | 185 133 | }; 134 | } 135 | 136 | private static final short _php_lexer_key_offsets[] = init__php_lexer_key_offsets_0(); 137 | 138 | 139 | private static char[] init__php_lexer_trans_keys_0() 140 | { 141 | return new char [] { 142 | 34, 92, 10, 13, 39, 92, 97, 98, 100, 102, 105, 111, 143 | 114, 115, 117, 0, 32, 114, 114, 97, 121, 41, 0, 32, 144 | 105, 111, 110, 97, 114, 111, 108, 41, 101, 0, 32, 97, 145 | 110, 111, 117, 98, 108, 101, 108, 111, 97, 116, 110, 98, 146 | 106, 101, 99, 101, 97, 108, 116, 114, 105, 110, 103, 110, 147 | 115, 101, 42, 47, 43, 45, 48, 57, 48, 57, 65, 70, 148 | 97, 102, 104, 112, 96, 33, 34, 35, 36, 39, 40, 41, 149 | 44, 47, 48, 60, 66, 79, 91, 93, 94, 96, 98, 111, 150 | 123, 125, 0, 32, 37, 38, 42, 46, 49, 57, 58, 64, 151 | 65, 90, 95, 122, 124, 126, 0, 32, 61, 34, 92, 10, 152 | 13, 39, 92, 97, 98, 100, 102, 105, 111, 114, 115, 117, 153 | 0, 32, 42, 47, 61, 46, 69, 88, 101, 120, 48, 57, 154 | 69, 101, 48, 57, 48, 57, 46, 69, 101, 48, 57, 48, 155 | 57, 65, 70, 97, 102, 37, 63, 112, 95, 48, 57, 65, 156 | 90, 97, 122, 95, 48, 57, 65, 90, 97, 122, 95, 48, 157 | 57, 65, 90, 97, 122, 96, 0 158 | }; 159 | } 160 | 161 | private static final char _php_lexer_trans_keys[] = init__php_lexer_trans_keys_0(); 162 | 163 | 164 | private static byte[] init__php_lexer_single_lengths_0() 165 | { 166 | return new byte [] { 167 | 2, 0, 2, 2, 0, 9, 1, 1, 1, 1, 1, 2, 168 | 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 171 | 0, 0, 1, 1, 1, 21, 0, 1, 2, 2, 2, 9, 172 | 3, 0, 5, 2, 0, 3, 0, 2, 1, 1, 1, 1, 173 | 1 174 | }; 175 | } 176 | 177 | private static final byte _php_lexer_single_lengths[] = init__php_lexer_single_lengths_0(); 178 | 179 | 180 | private static byte[] init__php_lexer_range_lengths_0() 181 | { 182 | return new byte [] { 183 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 184 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 185 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 186 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 187 | 1, 3, 0, 0, 0, 8, 1, 0, 0, 0, 0, 1, 188 | 0, 0, 1, 1, 1, 1, 3, 0, 0, 3, 3, 3, 189 | 0 190 | }; 191 | } 192 | 193 | private static final byte _php_lexer_range_lengths[] = init__php_lexer_range_lengths_0(); 194 | 195 | 196 | private static short[] init__php_lexer_index_offsets_0() 197 | { 198 | return new short [] { 199 | 0, 3, 4, 7, 10, 11, 22, 24, 26, 28, 30, 33, 200 | 36, 38, 40, 42, 44, 46, 50, 52, 54, 56, 58, 60, 201 | 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 202 | 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 203 | 111, 113, 117, 119, 121, 123, 153, 155, 157, 160, 163, 166, 204 | 177, 181, 182, 189, 193, 195, 200, 204, 207, 209, 214, 219, 205 | 224 206 | }; 207 | } 208 | 209 | private static final short _php_lexer_index_offsets[] = init__php_lexer_index_offsets_0(); 210 | 211 | 212 | private static byte[] init__php_lexer_indicies_0() 213 | { 214 | return new byte [] { 215 | 2, 3, 1, 1, 6, 6, 5, 2, 8, 7, 7, 11, 216 | 12, 13, 14, 15, 16, 17, 18, 19, 10, 9, 20, 9, 217 | 21, 9, 22, 9, 23, 9, 24, 23, 9, 25, 26, 9, 218 | 27, 9, 28, 9, 22, 9, 29, 9, 30, 9, 24, 31, 219 | 23, 9, 32, 9, 23, 9, 33, 9, 34, 9, 35, 9, 220 | 36, 9, 23, 9, 37, 9, 38, 9, 39, 9, 23, 9, 221 | 39, 9, 40, 9, 41, 9, 42, 9, 39, 9, 43, 9, 222 | 44, 9, 23, 9, 45, 9, 46, 9, 47, 9, 48, 9, 223 | 23, 9, 49, 9, 50, 9, 39, 9, 53, 52, 54, 52, 224 | 56, 56, 55, 57, 55, 58, 58, 58, 55, 60, 59, 61, 225 | 59, 62, 0, 64, 65, 66, 67, 69, 70, 71, 68, 72, 226 | 73, 75, 77, 78, 79, 81, 68, 82, 77, 78, 83, 84, 227 | 63, 68, 64, 74, 68, 76, 76, 68, 80, 63, 85, 87, 228 | 86, 2, 3, 1, 6, 6, 5, 2, 8, 7, 11, 12, 229 | 13, 14, 15, 16, 17, 18, 19, 10, 89, 52, 5, 87, 230 | 86, 90, 92, 93, 94, 93, 94, 74, 91, 93, 93, 92, 231 | 91, 57, 91, 92, 93, 93, 74, 91, 58, 58, 58, 91, 232 | 95, 96, 86, 98, 97, 76, 76, 76, 76, 99, 76, 76, 233 | 76, 76, 99, 76, 76, 76, 76, 99, 88, 100, 0 234 | }; 235 | } 236 | 237 | private static final byte _php_lexer_indicies[] = init__php_lexer_indicies_0(); 238 | 239 | 240 | private static byte[] init__php_lexer_trans_targs_0() 241 | { 242 | return new byte [] { 243 | 53, 0, 53, 1, 53, 2, 53, 3, 4, 53, 5, 6, 244 | 11, 20, 25, 29, 30, 34, 37, 42, 7, 8, 9, 10, 245 | 53, 12, 15, 13, 14, 16, 17, 18, 19, 21, 22, 23, 246 | 24, 26, 27, 28, 31, 32, 33, 35, 36, 38, 39, 40, 247 | 41, 43, 44, 53, 45, 46, 61, 53, 48, 64, 66, 53, 248 | 51, 53, 53, 54, 55, 56, 57, 53, 53, 58, 59, 53, 249 | 60, 62, 65, 67, 69, 70, 71, 53, 53, 53, 72, 53, 250 | 53, 53, 53, 53, 53, 53, 53, 53, 63, 47, 49, 53, 251 | 68, 53, 50, 53, 52 252 | }; 253 | } 254 | 255 | private static final byte _php_lexer_trans_targs[] = init__php_lexer_trans_targs_0(); 256 | 257 | 258 | private static byte[] init__php_lexer_trans_actions_0() 259 | { 260 | return new byte [] { 261 | 59, 0, 7, 0, 61, 0, 11, 0, 0, 53, 0, 0, 262 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 263 | 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 264 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 265 | 0, 0, 0, 57, 0, 0, 0, 51, 0, 0, 0, 55, 266 | 0, 27, 9, 0, 0, 5, 69, 33, 31, 5, 5, 13, 267 | 66, 5, 5, 0, 0, 0, 0, 15, 35, 17, 5, 19, 268 | 21, 47, 45, 23, 49, 41, 63, 39, 5, 0, 0, 29, 269 | 5, 43, 0, 37, 0 270 | }; 271 | } 272 | 273 | private static final byte _php_lexer_trans_actions[] = init__php_lexer_trans_actions_0(); 274 | 275 | 276 | private static byte[] init__php_lexer_to_state_actions_0() 277 | { 278 | return new byte [] { 279 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 280 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 284 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 285 | 0 286 | }; 287 | } 288 | 289 | private static final byte _php_lexer_to_state_actions[] = init__php_lexer_to_state_actions_0(); 290 | 291 | 292 | private static byte[] init__php_lexer_from_state_actions_0() 293 | { 294 | return new byte [] { 295 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 296 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 297 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 298 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299 | 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 300 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 301 | 0 302 | }; 303 | } 304 | 305 | private static final byte _php_lexer_from_state_actions[] = init__php_lexer_from_state_actions_0(); 306 | 307 | 308 | private static short[] init__php_lexer_eof_trans_0() 309 | { 310 | return new short [] { 311 | 1, 1, 5, 1, 1, 10, 10, 10, 10, 10, 10, 10, 312 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 313 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 314 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 52, 52, 56, 315 | 56, 56, 60, 60, 1, 0, 86, 87, 89, 89, 89, 90, 316 | 87, 91, 92, 92, 92, 92, 92, 87, 98, 100, 100, 100, 317 | 89 318 | }; 319 | } 320 | 321 | private static final short _php_lexer_eof_trans[] = init__php_lexer_eof_trans_0(); 322 | 323 | 324 | static final int php_lexer_start = 53; 325 | static final int php_lexer_first_final = 53; 326 | static final int php_lexer_error = -1; 327 | 328 | static final int php_lexer_en_main = 53; 329 | 330 | 331 | // line 155 "PhpScore.rl" 332 | 333 | int p, pe, eof; 334 | int te, ts, cs, act; 335 | 336 | 337 | public PhpScore(byte[] data) { 338 | this.data = data; 339 | 340 | black = false; 341 | white = false; 342 | count_cmt = 0; 343 | count_var = 0; 344 | count_svar = 0; 345 | count_par = 0; 346 | count_token = 0; 347 | count_bracket = 0; 348 | count_brace = 0; 349 | 350 | } 351 | 352 | double string_score() { 353 | byte[] sb = Arrays.copyOfRange(data, ts + 1, te - 1); 354 | String s = new String(sb); 355 | //System.out.println(s); 356 | if (keyword.containsKey(s) && !existed_key.contains(s)) { 357 | existed_key.add(s); 358 | return keyword.get(s); 359 | } 360 | if (s.length() >= 10) { 361 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb)); 362 | double s1 = new PhpScore(bsb).score(); 363 | double s2 = new PhpScore(sb).score(); 364 | s1 = s1 > s2 ? s1 : s2; 365 | s1 = s1 > 0 ? s1 : 0; 366 | return s1; 367 | } 368 | return 0.0; 369 | } 370 | 371 | double keyword_score() { 372 | byte[] sb = Arrays.copyOfRange(data, ts, te); 373 | String s = new String(sb); 374 | //System.out.println(s); 375 | if (keyword.containsKey(s) && !existed_key.contains(s)) { 376 | existed_key.add(s); 377 | return keyword.get(s); 378 | } 379 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb)); 380 | double s1 = new PhpScore(bsb).score(); 381 | return s1 > 0.0 ? s1 : 0.0; 382 | } 383 | 384 | 385 | double tokenize() { 386 | p = 0; 387 | pe = data.length; 388 | eof = pe; 389 | double score = 0.0; 390 | 391 | // line 392 "PhpScore.java" 392 | { 393 | cs = php_lexer_start; 394 | ts = -1; 395 | te = -1; 396 | act = 0; 397 | } 398 | 399 | // line 214 "PhpScore.rl" 400 | 401 | // line 402 "PhpScore.java" 402 | { 403 | int _klen; 404 | int _trans = 0; 405 | int _acts; 406 | int _nacts; 407 | int _keys; 408 | int _goto_targ = 0; 409 | 410 | _goto: while (true) { 411 | switch ( _goto_targ ) { 412 | case 0: 413 | if ( p == pe ) { 414 | _goto_targ = 4; 415 | continue _goto; 416 | } 417 | case 1: 418 | _acts = _php_lexer_from_state_actions[cs]; 419 | _nacts = (int) _php_lexer_actions[_acts++]; 420 | while ( _nacts-- > 0 ) { 421 | switch ( _php_lexer_actions[_acts++] ) { 422 | case 2: 423 | // line 1 "NONE" 424 | {ts = p;} 425 | break; 426 | // line 427 "PhpScore.java" 427 | } 428 | } 429 | 430 | _match: do { 431 | _keys = _php_lexer_key_offsets[cs]; 432 | _trans = _php_lexer_index_offsets[cs]; 433 | _klen = _php_lexer_single_lengths[cs]; 434 | if ( _klen > 0 ) { 435 | int _lower = _keys; 436 | int _mid; 437 | int _upper = _keys + _klen - 1; 438 | while (true) { 439 | if ( _upper < _lower ) 440 | break; 441 | 442 | _mid = _lower + ((_upper-_lower) >> 1); 443 | if ( data[p] < _php_lexer_trans_keys[_mid] ) 444 | _upper = _mid - 1; 445 | else if ( data[p] > _php_lexer_trans_keys[_mid] ) 446 | _lower = _mid + 1; 447 | else { 448 | _trans += (_mid - _keys); 449 | break _match; 450 | } 451 | } 452 | _keys += _klen; 453 | _trans += _klen; 454 | } 455 | 456 | _klen = _php_lexer_range_lengths[cs]; 457 | if ( _klen > 0 ) { 458 | int _lower = _keys; 459 | int _mid; 460 | int _upper = _keys + (_klen<<1) - 2; 461 | while (true) { 462 | if ( _upper < _lower ) 463 | break; 464 | 465 | _mid = _lower + (((_upper-_lower) >> 1) & ~1); 466 | if ( data[p] < _php_lexer_trans_keys[_mid] ) 467 | _upper = _mid - 2; 468 | else if ( data[p] > _php_lexer_trans_keys[_mid+1] ) 469 | _lower = _mid + 2; 470 | else { 471 | _trans += ((_mid - _keys)>>1); 472 | break _match; 473 | } 474 | } 475 | _trans += _klen; 476 | } 477 | } while (false); 478 | 479 | _trans = _php_lexer_indicies[_trans]; 480 | case 3: 481 | cs = _php_lexer_trans_targs[_trans]; 482 | 483 | if ( _php_lexer_trans_actions[_trans] != 0 ) { 484 | _acts = _php_lexer_trans_actions[_trans]; 485 | _nacts = (int) _php_lexer_actions[_acts++]; 486 | while ( _nacts-- > 0 ) 487 | { 488 | switch ( _php_lexer_actions[_acts++] ) 489 | { 490 | case 0: 491 | // line 23 "PhpScore.rl" 492 | { count_cmt += 1; } 493 | break; 494 | case 3: 495 | // line 1 "NONE" 496 | {te = p+1;} 497 | break; 498 | case 4: 499 | // line 48 "PhpScore.rl" 500 | {act = 17;} 501 | break; 502 | case 5: 503 | // line 51 "PhpScore.rl" 504 | {act = 20;} 505 | break; 506 | case 6: 507 | // line 32 "PhpScore.rl" 508 | {te = p+1;{ score += string_score(); }} 509 | break; 510 | case 7: 511 | // line 33 "PhpScore.rl" 512 | {te = p+1;{ score += 0.5 + string_score(); }} 513 | break; 514 | case 8: 515 | // line 34 "PhpScore.rl" 516 | {te = p+1;} 517 | break; 518 | case 9: 519 | // line 36 "PhpScore.rl" 520 | {te = p+1;{ if (count_par > 0) { count_par -= 1; } else { black = true; } }} 521 | break; 522 | case 10: 523 | // line 37 "PhpScore.rl" 524 | {te = p+1;{ count_bracket += 1; }} 525 | break; 526 | case 11: 527 | // line 38 "PhpScore.rl" 528 | {te = p+1;{ if (count_bracket > 0) { count_bracket -= 1; } else { black = true; } }} 529 | break; 530 | case 12: 531 | // line 39 "PhpScore.rl" 532 | {te = p+1;{ count_brace += 1; }} 533 | break; 534 | case 13: 535 | // line 40 "PhpScore.rl" 536 | {te = p+1;{ if (count_brace > 0) { count_brace -= 1; } else { black = true; } }} 537 | break; 538 | case 14: 539 | // line 41 "PhpScore.rl" 540 | {te = p+1;{ /*score += 0.1;*/ }} 541 | break; 542 | case 15: 543 | // line 44 "PhpScore.rl" 544 | {te = p+1;{ score += 1.5; }} 545 | break; 546 | case 16: 547 | // line 45 "PhpScore.rl" 548 | {te = p+1;{ score -= 999.0; }} 549 | break; 550 | case 17: 551 | // line 47 "PhpScore.rl" 552 | {te = p+1;{ score -= 999.0; }} 553 | break; 554 | case 18: 555 | // line 48 "PhpScore.rl" 556 | {te = p+1;} 557 | break; 558 | case 19: 559 | // line 49 "PhpScore.rl" 560 | {te = p+1;{ /*score += 0.1;*/ }} 561 | break; 562 | case 20: 563 | // line 51 "PhpScore.rl" 564 | {te = p+1;{ score -= 1; }} 565 | break; 566 | case 21: 567 | // line 30 "PhpScore.rl" 568 | {te = p;p--;{ score += keyword_score(); }} 569 | break; 570 | case 22: 571 | // line 31 "PhpScore.rl" 572 | {te = p;p--;} 573 | break; 574 | case 23: 575 | // line 34 "PhpScore.rl" 576 | {te = p;p--;} 577 | break; 578 | case 24: 579 | // line 35 "PhpScore.rl" 580 | {te = p;p--;{ count_par += 1; }} 581 | break; 582 | case 25: 583 | // line 46 "PhpScore.rl" 584 | {te = p;p--;{ score -= 999.0; }} 585 | break; 586 | case 26: 587 | // line 48 "PhpScore.rl" 588 | {te = p;p--;} 589 | break; 590 | case 27: 591 | // line 50 "PhpScore.rl" 592 | {te = p;p--;} 593 | break; 594 | case 28: 595 | // line 51 "PhpScore.rl" 596 | {te = p;p--;{ score -= 1; }} 597 | break; 598 | case 29: 599 | // line 31 "PhpScore.rl" 600 | {{p = ((te))-1;}} 601 | break; 602 | case 30: 603 | // line 35 "PhpScore.rl" 604 | {{p = ((te))-1;}{ count_par += 1; }} 605 | break; 606 | case 31: 607 | // line 46 "PhpScore.rl" 608 | {{p = ((te))-1;}{ score -= 999.0; }} 609 | break; 610 | case 32: 611 | // line 48 "PhpScore.rl" 612 | {{p = ((te))-1;}} 613 | break; 614 | case 33: 615 | // line 51 "PhpScore.rl" 616 | {{p = ((te))-1;}{ score -= 1; }} 617 | break; 618 | case 34: 619 | // line 1 "NONE" 620 | { switch( act ) { 621 | case 20: 622 | {{p = ((te))-1;} score -= 1; } 623 | break; 624 | default: 625 | {{p = ((te))-1;}} 626 | break; 627 | } 628 | } 629 | break; 630 | // line 631 "PhpScore.java" 631 | } 632 | } 633 | } 634 | 635 | case 2: 636 | _acts = _php_lexer_to_state_actions[cs]; 637 | _nacts = (int) _php_lexer_actions[_acts++]; 638 | while ( _nacts-- > 0 ) { 639 | switch ( _php_lexer_actions[_acts++] ) { 640 | case 1: 641 | // line 1 "NONE" 642 | {ts = -1;} 643 | break; 644 | // line 645 "PhpScore.java" 645 | } 646 | } 647 | 648 | if ( ++p != pe ) { 649 | _goto_targ = 1; 650 | continue _goto; 651 | } 652 | case 4: 653 | if ( p == eof ) 654 | { 655 | if ( _php_lexer_eof_trans[cs] > 0 ) { 656 | _trans = _php_lexer_eof_trans[cs] - 1; 657 | _goto_targ = 3; 658 | continue _goto; 659 | } 660 | } 661 | 662 | case 5: 663 | } 664 | break; } 665 | } 666 | 667 | // line 215 "PhpScore.rl" 668 | return score; 669 | } 670 | 671 | public double score() { 672 | double score = 0.0; 673 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) { 674 | score -= 3; 675 | } 676 | 677 | score += tokenize(); 678 | 679 | score += count_cmt > 0 ? 1.0 : 0; 680 | score += count_svar * 0.5; 681 | score += count_var > 0 ? 0.6 : 0; 682 | black |= p != pe; 683 | //if (black) { 684 | // score = 0.0; 685 | //} 686 | return score; 687 | } 688 | 689 | public static void main(String[] args) { 690 | System.out.println(new PhpScore("a(b(c".getBytes()).score()); 691 | System.out.println(new PhpScore("eval($_GET['a']);".getBytes()).score()); 692 | System.out.println(new PhpScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score()); 693 | 694 | } 695 | } 696 | -------------------------------------------------------------------------------- /src/main/java/chaitin/webshell/parser/PhpScore.rl: -------------------------------------------------------------------------------- 1 | // -*- coding: utf-8 -*- 2 | %%{ 3 | machine php_lexer; 4 | 5 | ws = '\r' | '\n' | ' ' | '\t' | (0x00 .. 0x1F); 6 | label = ('_' | alpha) ('_' | alnum)*; 7 | number = ('0' 8 | ('x'i xdigit+) 9 | | ('b'i ('0' .. '1')) 10 | | ('o'i ('0' .. '7'))) 11 | | (digit+ ('.' digit*)? ('e'i ('+' | '-') digit+)? 12 | ); 13 | string = ( 14 | ( "'" ( ( any - '\\' - "'" ) | ( '\\' any ) )* "'" ) | 15 | ( '"' ( ( any - '\\' - '"' ) | ( '\\' any ) )* '"' ) 16 | ); 17 | backtick_string = '`' (any - '`') '`'; 18 | comment = ( 19 | ( 20 | ( "//" | '#' ) ( any - '\r' - '\n')* ( '\r' | '\n' ) 21 | ) | 22 | ( 23 | ("/*" ( ( any - '*' ) | ( '*' ( any - '/' ) ) )* "*/") % { count_cmt += 1; } 24 | ) 25 | ); 26 | value_cast = '(' ws* ('int' | 'real' | 'double' | 'float' | 'string' | 'binary' | 'array' | 'object' | 'boolean' | 'bool' | 'unset') ws* ')'; 27 | 28 | 29 | main := |* 30 | label => { score += keyword_score(); }; 31 | number; 32 | string => { score += string_score(); }; 33 | backtick_string => { score += 0.5 + string_score(); }; 34 | comment; 35 | '(' => { count_par += 1; }; 36 | ')' => { if (count_par > 0) { count_par -= 1; } else { black = true; } }; 37 | '[' => { count_bracket += 1; }; 38 | ']' => { if (count_bracket > 0) { count_bracket -= 1; } else { black = true; } }; 39 | '{' => { count_brace += 1; }; 40 | '}' => { if (count_brace > 0) { count_brace -= 1; } else { black = true; } }; 41 | ('.=' | '+=' | '-=' | '*=' | '/=' | '!=') => { /*score += 0.1;*/ }; 42 | #'$' label => { count_var += 1; }; 43 | #'${' label '}' => { count_svar += 1; }; 44 | value_cast => { score += 1.5; }; 45 | '<?php' => { score -= 999.0; }; 46 | '<?' => { score -= 999.0; }; 47 | '<%' => { score -= 999.0; }; 48 | ('=' | ',' | '%' | '+' | '-' | '*' | '/' | ';' | '?' | ':' | '!' | '.' | '&' | '|' | '^' | '~' | '<' | '>' | '@'); 49 | '$' => { /*score += 0.1;*/ }; 50 | ws+; 51 | any => { score -= 1; }; 52 | *|; 53 | 54 | }%% 55 | 56 | package chaitin.webshell.parser; 57 | 58 | 59 | import chaitin.utils.Base64; 60 | import java.util.Arrays; 61 | import java.util.Map; 62 | import java.util.HashMap; 63 | import java.util.Set; 64 | import java.util.HashSet; 65 | 66 | 67 | 68 | public class PhpScore { 69 | 70 | byte[] data; 71 | boolean black; 72 | boolean white; 73 | int count_token; 74 | int count_var; 75 | int count_svar; 76 | int count_cmt; 77 | int count_par; 78 | int count_bracket; 79 | int count_brace; 80 | 81 | Set<String> existed_key = new HashSet<String>(); 82 | 83 | static Map<String, Double> keyword = new HashMap<String, Double>() { 84 | private static final long serialVersionUID = 6899997024892413801L; 85 | { 86 | 87 | put("PHP_SELF", 1.6); 88 | put("_GET", 1.2); 89 | put("_POST", 1.6); 90 | put("_COOKIE", 1.5); 91 | put("_FILE", 1.2); 92 | put("_ENV", 1.2); 93 | put("_SESSION", 1.5); 94 | put("_REQUEST", 1.5); 95 | put("_SERVER", 1.6); 96 | put("array_map", 1.4); 97 | put("assert", 1.3); 98 | put("array_slice", 1.5); 99 | put("base64_decode", 2.0); 100 | put("base_convert", 1.7); 101 | put("edoced_46esab", 2.0); 102 | put("call_user_func", 1.0); 103 | put("call_user_func_array", 1.0); 104 | put("chr", 0.4); 105 | put("create_function", 0.8); 106 | put("curl_exec", 1.4); 107 | put("curl_multi_exec", 1.4); 108 | put("dirname", 1.0); 109 | put("echo", 1.0); 110 | put("error_reporting", 1.2); 111 | put("eval", 1.0); 112 | put("exec", 1.0); 113 | put("exit", 0.5); 114 | put("file_put_contents", 1.2); 115 | put("gzdecode", 1.2); 116 | put("implode", 1.5); 117 | //put("include", 0.9); 118 | put("include_once", 1.6); 119 | put("ini_set", 1.1); 120 | put("isset", 1.1); 121 | put("movefile", 1.2); 122 | put("ob_start", 1.5); 123 | put("parse_str", 1.5); 124 | put("passthru", 1.8); 125 | put("pcntl_exec", 1.7); 126 | put("phpinfo", 1.9); 127 | put("prege_replace", 1.5); 128 | put("proc_open", 1.7); 129 | //put("require", 0.9); 130 | put("require_once", 1.6); 131 | put("session_start", 1.5); 132 | put("set_magic_quotes_runtime", 1.0); 133 | put("set_time_limit", 1.4); 134 | put("shell_exec", 1.4); 135 | put("str_rot13", 1.2); 136 | put("strrev", 1.1); 137 | put("system", 0.5); 138 | 139 | 140 | put("_memberaccess", -10.0); 141 | put("allowstaticmethodaccess", -10.0); 142 | put("alert", -1.8); 143 | put("document", -1.8); 144 | put("fromcharcode", -10.0); 145 | put("getdeclaredfield", -10.0); 146 | put("parseint", -5.0); 147 | put("println", -4.0); 148 | put("prototype", -5.0); 149 | put("setaccessible", -10.0); 150 | put("string", -1.0); 151 | put("tostring", -4.0); 152 | }}; 153 | 154 | %% write data; 155 | 156 | int p, pe, eof; 157 | int te, ts, cs, act; 158 | 159 | 160 | public PhpScore(byte[] data) { 161 | this.data = data; 162 | 163 | black = false; 164 | white = false; 165 | count_cmt = 0; 166 | count_var = 0; 167 | count_svar = 0; 168 | count_par = 0; 169 | count_token = 0; 170 | count_bracket = 0; 171 | count_brace = 0; 172 | 173 | } 174 | 175 | double string_score() { 176 | byte[] sb = Arrays.copyOfRange(data, ts + 1, te - 1); 177 | String s = new String(sb); 178 | //System.out.println(s); 179 | if (keyword.containsKey(s) && !existed_key.contains(s)) { 180 | existed_key.add(s); 181 | return keyword.get(s); 182 | } 183 | if (s.length() >= 10) { 184 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb)); 185 | double s1 = new PhpScore(bsb).score(); 186 | double s2 = new PhpScore(sb).score(); 187 | s1 = s1 > s2 ? s1 : s2; 188 | s1 = s1 > 0 ? s1 : 0; 189 | return s1; 190 | } 191 | return 0.0; 192 | } 193 | 194 | double keyword_score() { 195 | byte[] sb = Arrays.copyOfRange(data, ts, te); 196 | String s = new String(sb); 197 | //System.out.println(s); 198 | if (keyword.containsKey(s) && !existed_key.contains(s)) { 199 | existed_key.add(s); 200 | return keyword.get(s); 201 | } 202 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb)); 203 | double s1 = new PhpScore(bsb).score(); 204 | return s1 > 0.0 ? s1 : 0.0; 205 | } 206 | 207 | 208 | double tokenize() { 209 | p = 0; 210 | pe = data.length; 211 | eof = pe; 212 | double score = 0.0; 213 | %% write init; 214 | %% write exec; 215 | return score; 216 | } 217 | 218 | public double score() { 219 | double score = 0.0; 220 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) { 221 | score -= 3; 222 | } 223 | 224 | score += tokenize(); 225 | 226 | score += count_cmt > 0 ? 1.0 : 0; 227 | score += count_svar * 0.5; 228 | score += count_var > 0 ? 0.6 : 0; 229 | black |= p != pe; 230 | //if (black) { 231 | // score = 0.0; 232 | //} 233 | return score; 234 | } 235 | 236 | public static void main(String[] args) { 237 | System.out.println(new PhpScore("a(b(c".getBytes()).score()); 238 | System.out.println(new PhpScore("eval($_GET['a']);".getBytes()).score()); 239 | System.out.println(new PhpScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score()); 240 | 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/base.mapred.xml: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <mapred> 3 | <!-- profile --> 4 | <baseId>1</baseId> 5 | <projectId>1</projectId> 6 | <resourceName>webshell</resourceName> 7 | <idePath>workflow/myfolder</idePath> 8 | 9 | <!-- classes --> 10 | <jobLauncher>chaitin.webshell.JobLauncher</jobLauncher> 11 | <mapper>chaitin.webshell.MyMapper</mapper> 12 | <reducer>chaitin.webshell.MyReducer</reducer> 13 | <combiner>chaitin.webshell.MyCombiner</combiner> 14 | 15 | <!--task--> 16 | <mapOutputKey>word:string</mapOutputKey> 17 | <mapOutputValue>cnt:bigint</mapOutputValue> 18 | <!-- 19 | <partitionColumns>col1,col2</partitionColumns> 20 | <outputKeySortColumns>col1,col2</outputKeySortColumns> 21 | <outputKeySortOrders>ASC,DESC</outputKeySortOrders> 22 | <outputGroupingColumns>col1,col2</outputGroupingColumns> 23 | <numReduceTask>8</numReduceTask> 24 | <memoryForMapTask>2048</memoryForMapTask> 25 | <memoryForReduceTask>2048</memoryForReduceTask> 26 | --> 27 | 28 | <!-- tables --> 29 | <inputTables> 30 | <table> 31 | <name>wordcount_in</name> 32 | <partitions> 33 | <partition>dt={yyyymmdd}</partition> 34 | <partition>dt={yyyymmdd-1}</partition> 35 | </partitions> 36 | </table> 37 | </inputTables> 38 | <outputTable> 39 | <name>wordcount_out</name> 40 | <partition>dt={yyyymmdd}</partition> 41 | </outputTable> 42 | </mapred> 43 | -------------------------------------------------------------------------------- /src/main/resources/credential.properties: -------------------------------------------------------------------------------- 1 | base_id=1 2 | project_id=1 3 | token=xxx -------------------------------------------------------------------------------- /src/test/java/chaitin/test/phishing/ScoreDomainTest.java: -------------------------------------------------------------------------------- 1 | package chaitin.test.phishing; 2 | 3 | public class ScoreDomainTest { 4 | 5 | } 6 | --------------------------------------------------------------------------------