├── .classpath
├── .gitignore
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
├── org.eclipse.jdt.core.prefs
└── org.eclipse.m2e.core.prefs
├── README.md
├── lib
├── bouncycastle.provider-1.38-jdk15.jar
├── commons-cli-1.2.jar
├── commons-codec-1.9.jar
├── commons-collections-3.2.1.jar
├── commons-digester3-3.2.jar
├── commons-io-2.4.jar
├── commons-lang-2.4.jar
├── commons-logging-1.1.1.jar
├── gson-2.2.4.jar
├── guava-15.0.jar
├── jackson-core-asl-1.9.13.jar
├── jackson-mapper-asl-1.9.13.jar
├── javacsv-2.0.jar
├── jsoup.jar
├── junit-4.4.jar
├── odps-mapred-bridge-0.14.0-rc1.jar
├── odps-mapred-local-0.14.0-rc1.jar
├── odps-sdk-commons-0.14.0-rc1.jar
├── odps-sdk-core-0.14.0-rc1.jar
├── odps-sdk-lot-0.14.0-rc1.jar
├── odps-sdk-mapred-0.14.0-rc1.jar
├── odps-sdk-udf-0.14.0-rc1.jar
├── protobuf-java-2.4.1.jar
└── velocity-1.7.jar
├── pom.xml
└── src
├── main
├── java
│ └── chaitin
│ │ ├── phishing
│ │ ├── HtmlParser.java
│ │ ├── PhishingDetector.java
│ │ ├── PhishingFeature.java
│ │ ├── PhishingMapper.java
│ │ ├── ScoreContent.java
│ │ ├── ScoreDomain.java
│ │ ├── ScoreForm.java
│ │ └── ScoreTitle.java
│ │ ├── utils
│ │ ├── Base64.java
│ │ ├── ChaitinRecord.java
│ │ ├── Decoder.java
│ │ ├── Gao.java
│ │ ├── Multipart.java
│ │ ├── Pair.java
│ │ ├── ParseUrl.java
│ │ ├── QueryString.java
│ │ └── Unquote.java
│ │ └── webshell
│ │ ├── WebshellDetector.java
│ │ ├── WebshellMapper.java
│ │ ├── WebshellTokenizer.java
│ │ └── parser
│ │ ├── AspScore.java
│ │ ├── AspTokenizer.rl
│ │ ├── PhpScore.java
│ │ └── PhpScore.rl
└── resources
│ ├── META-INF
│ └── base.mapred.xml
│ └── credential.properties
└── test
└── java
└── chaitin
└── test
└── phishing
└── ScoreDomainTest.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | temp
3 | warehouse
4 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | webshell
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/main/resources=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding//src/test/resources=UTF-8
6 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.7
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
12 | org.eclipse.jdt.core.compiler.source=1.7
13 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/README.md
--------------------------------------------------------------------------------
/lib/bouncycastle.provider-1.38-jdk15.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/bouncycastle.provider-1.38-jdk15.jar
--------------------------------------------------------------------------------
/lib/commons-cli-1.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-cli-1.2.jar
--------------------------------------------------------------------------------
/lib/commons-codec-1.9.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-codec-1.9.jar
--------------------------------------------------------------------------------
/lib/commons-collections-3.2.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-collections-3.2.1.jar
--------------------------------------------------------------------------------
/lib/commons-digester3-3.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-digester3-3.2.jar
--------------------------------------------------------------------------------
/lib/commons-io-2.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-io-2.4.jar
--------------------------------------------------------------------------------
/lib/commons-lang-2.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-lang-2.4.jar
--------------------------------------------------------------------------------
/lib/commons-logging-1.1.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/commons-logging-1.1.1.jar
--------------------------------------------------------------------------------
/lib/gson-2.2.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/gson-2.2.4.jar
--------------------------------------------------------------------------------
/lib/guava-15.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/guava-15.0.jar
--------------------------------------------------------------------------------
/lib/jackson-core-asl-1.9.13.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jackson-core-asl-1.9.13.jar
--------------------------------------------------------------------------------
/lib/jackson-mapper-asl-1.9.13.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jackson-mapper-asl-1.9.13.jar
--------------------------------------------------------------------------------
/lib/javacsv-2.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/javacsv-2.0.jar
--------------------------------------------------------------------------------
/lib/jsoup.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/jsoup.jar
--------------------------------------------------------------------------------
/lib/junit-4.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/junit-4.4.jar
--------------------------------------------------------------------------------
/lib/odps-mapred-bridge-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-mapred-bridge-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-mapred-local-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-mapred-local-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-sdk-commons-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-commons-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-sdk-core-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-core-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-sdk-lot-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-lot-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-sdk-mapred-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-mapred-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/odps-sdk-udf-0.14.0-rc1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/odps-sdk-udf-0.14.0-rc1.jar
--------------------------------------------------------------------------------
/lib/protobuf-java-2.4.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/protobuf-java-2.4.1.jar
--------------------------------------------------------------------------------
/lib/velocity-1.7.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monstersb/detect_webshell/284fb6fdeffc5e88b3d8120c640ad95b8a8b448b/lib/velocity-1.7.jar
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | chaitin
5 | webshell
6 | 0.0.1-SNAPSHOT
7 | jar
8 |
9 |
10 |
11 |
12 | org.apache.maven.plugins
13 | maven-compiler-plugin
14 | 2.3.2
15 |
16 | 1.6
17 | 1.6
18 | 1.6
19 | UTF-8
20 |
21 |
22 |
23 | org.apache.maven.plugins
24 | maven-resources-plugin
25 | 2.4
26 |
27 | UTF-8
28 |
29 |
30 |
31 | org.apache.maven.plugins
32 | maven-assembly-plugin
33 |
34 |
35 | jar-with-dependencies
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | com.alibaba.external
45 | bouncycastle.provider
46 | 1.38-jdk15
47 | system
48 | ${project.basedir}/lib/bouncycastle.provider-1.38-jdk15.jar
49 |
50 |
51 | commons-cli
52 | commons-cli
53 | 1.2
54 | system
55 | ${project.basedir}/lib/commons-cli-1.2.jar
56 |
57 |
58 | commons-codec
59 | commons-codec
60 | 1.9
61 | system
62 | ${project.basedir}/lib/commons-codec-1.9.jar
63 |
64 |
65 | commons-collections
66 | commons-collections
67 | 3.2.1
68 | system
69 | ${project.basedir}/lib/commons-collections-3.2.1.jar
70 |
71 |
72 | commons-io
73 | commons-io
74 | 2.4
75 | system
76 | ${project.basedir}/lib/commons-io-2.4.jar
77 |
78 |
79 | commons-lang
80 | commons-lang
81 | 2.4
82 | system
83 | ${project.basedir}/lib/commons-lang-2.4.jar
84 |
85 |
86 | commons-logging
87 | commons-logging
88 | 1.1.1
89 | system
90 | ${project.basedir}/lib/commons-logging-1.1.1.jar
91 |
92 |
93 | com.google.code.gson
94 | gson
95 | 2.2.4
96 | system
97 | ${project.basedir}/lib/gson-2.2.4.jar
98 |
99 |
100 | com.google.guava
101 | guava
102 | 15.0
103 | system
104 | ${project.basedir}/lib/guava-15.0.jar
105 |
106 |
107 | org.codehaus.jackson
108 | jackson-core-asl
109 | 1.9.13
110 | system
111 | ${project.basedir}/lib/jackson-core-asl-1.9.13.jar
112 |
113 |
114 | org.codehaus.jackson
115 | jackson-mapper-asl
116 | 1.9.13
117 | system
118 | ${project.basedir}/lib/jackson-mapper-asl-1.9.13.jar
119 |
120 |
121 | net.sourceforge.javacsv
122 | javacsv
123 | 2.0
124 | system
125 | ${project.basedir}/lib/javacsv-2.0.jar
126 |
127 |
128 | com.aliyun.odps
129 | odps-mapred-bridge
130 | 0.14.0-rc1
131 | system
132 | ${project.basedir}/lib/odps-mapred-bridge-0.14.0-rc1.jar
133 |
134 |
135 | com.aliyun.odps
136 | odps-mapred-local
137 | 0.14.0-rc1
138 | system
139 | ${project.basedir}/lib/odps-mapred-local-0.14.0-rc1.jar
140 |
141 |
142 | com.aliyun.odps
143 | odps-sdk-commons
144 | 0.14.0-rc1
145 | system
146 | ${project.basedir}/lib/odps-sdk-commons-0.14.0-rc1.jar
147 |
148 |
149 | com.aliyun.odps
150 | odps-sdk-core
151 | 0.14.0-rc1
152 | system
153 | ${project.basedir}/lib/odps-sdk-core-0.14.0-rc1.jar
154 |
155 |
156 | com.aliyun.odps
157 | odps-sdk-lot
158 | 0.14.0-rc1
159 | system
160 | ${project.basedir}/lib/odps-sdk-lot-0.14.0-rc1.jar
161 |
162 |
163 | com.aliyun.odps
164 | odps-sdk-mapred
165 | 0.14.0-rc1
166 | system
167 | ${project.basedir}/lib/odps-sdk-mapred-0.14.0-rc1.jar
168 |
169 |
170 | com.aliyun.odps
171 | odps-sdk-udf
172 | 0.14.0-rc1
173 | system
174 | ${project.basedir}/lib/odps-sdk-udf-0.14.0-rc1.jar
175 |
176 |
177 | com.google.protobuf
178 | protobuf-java
179 | 2.4.1
180 | system
181 | ${project.basedir}/lib/protobuf-java-2.4.1.jar
182 |
183 |
184 | org.apache.velocity
185 | velocity
186 | 1.7
187 | system
188 | ${project.basedir}/lib/velocity-1.7.jar
189 |
190 |
191 | org.apache.commons
192 | commons-digester3
193 | 3.2
194 |
195 |
196 |
197 | junit
198 | junit
199 | 4.4
200 | test
201 |
202 |
203 |
204 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/HtmlParser.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import org.jsoup.Jsoup;
4 | import org.jsoup.nodes.Document;
5 | import org.jsoup.nodes.Element;
6 | import org.jsoup.select.Elements;
7 |
8 | public class HtmlParser {
9 | public static class Form {
10 | public static class Input {
11 | public String type;
12 | public String name;
13 | public String id;
14 | public String placeholder;
15 | }
16 | public String action;
17 | public String text;
18 | public Input[] input;
19 | }
20 |
21 | public static class Link {
22 | public String rel;
23 | public String href;
24 | }
25 |
26 | public static class A {
27 | public String href;
28 | }
29 |
30 | String html;
31 | Document doc;
32 | public String text;
33 | public String title;
34 | public Form form[];
35 | public Link link[];
36 | public A a[];
37 |
38 | public HtmlParser (String _html) {
39 | html = _html;
40 | parse();
41 | }
42 |
43 | Boolean parse () {
44 | doc = Jsoup.parse(html);
45 | text = doc.text();
46 | Elements es = doc.getElementsByTag("title");
47 | if (es.size() > 0) {
48 | title = es.first().text();
49 | } else {
50 | title = "";
51 | }
52 | es = doc.getElementsByTag("form");
53 | form = new Form[es.size()];
54 | for (int i = 0; i < es.size(); ++i) {
55 | Element e = es.get(i);
56 | Form f = new Form();
57 | f.action = e.attr("action").toLowerCase();
58 | f.text = e.text();
59 | Elements es1 = e.getElementsByTag("input");
60 | f.input = new Form.Input[es1.size()];
61 | for (int j = 0; j < es1.size(); ++j) {
62 | Form.Input input = new Form.Input();
63 | Element e1 = es1.get(j);
64 | input.type = e1.attr("type").toLowerCase();
65 | input.name = e1.attr("name");
66 | input.id = e1.attr("id");
67 | input.placeholder = e1.attr("placeholder");
68 | f.input[j] = input;
69 | }
70 | form[i] = f;
71 | }
72 |
73 | es = doc.getElementsByTag("link");
74 | link = new Link[es.size()];
75 | for (int i = 0; i < es.size(); ++i) {
76 | link[i] = new Link();
77 | Element e = es.get(i);
78 | link[i].rel = e.attr("rel").toLowerCase();
79 | link[i].href = e.attr("href").toLowerCase();
80 | }
81 |
82 | es = doc.getElementsByTag("a");
83 | a = new A[es.size()];
84 | for (int i = 0; i < es.size(); ++i) {
85 | a[i] = new A();
86 | Element e = es.get(i);
87 | a[i].href = e.attr("href").toLowerCase();
88 | }
89 |
90 | return true;
91 | }
92 |
93 | public static void main(String[] args) {
94 | System.out.println("text: " + new HtmlParser("
testasdbodyh1
div
").text);
95 | System.out.println(new HtmlParser("test
asd").title);
96 | System.out.println(new HtmlParser("test").title);
97 | System.out.println(new HtmlParser("TEST哈哈哈test2").title);
98 | System.out.println(new HtmlParser("test1test2").form.length);
99 | System.out.println(new HtmlParser("").form.length);
100 | System.out.println(new HtmlParser("").form[0].action);
101 | System.out.println("id: " + new HtmlParser("").form[0].input[0].id);
102 | System.out.println("type: " + new HtmlParser("").form[0].input[0].type);
103 |
104 | HtmlParser h = new HtmlParser(" ");
105 | System.out.println(h.link.length);
106 | System.out.println("href: " + h.link[0].href);
107 | System.out.println(h.link[0].rel);
108 | System.out.println(h.link[1].href);
109 | System.out.println(h.link[1].rel);
110 |
111 | h = new HtmlParser(" ");
112 | System.out.println(h.a.length);
113 | System.out.println("href: " + h.a[0].href);
114 | System.out.println("href: " + h.a[1].href);
115 | System.out.println("href: " + h.a[2].href);
116 |
117 | return;
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/PhishingDetector.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.net.MalformedURLException;
4 | import java.net.URL;
5 |
6 | public class PhishingDetector {
7 |
8 | URL url;
9 | HtmlParser html;
10 |
11 | PhishingDetector (String url, String html) {
12 | try {
13 | this.url = new URL(url);
14 | } catch (MalformedURLException e) {
15 | // TODO Auto-generated catch block
16 | try {
17 | this.url = new URL("http://chaitin-monster.com/");
18 | } catch (MalformedURLException e1) {
19 | }
20 | }
21 | this.html = new HtmlParser(html);
22 | }
23 |
24 | static double threshold = 7.999;
25 |
26 | double detect() {
27 | double score = 0.0;
28 | if (ScoreDomain.is_white(url.getHost())) {
29 | return score;
30 | }
31 | score += ScoreDomain.score(url.getHost());
32 | score += ScoreTitle.score(html.title);
33 | score += ScoreContent.score(html.text);
34 | score += ScoreForm.score(html.form);
35 | return score;
36 | }
37 |
38 | public static Boolean is_phishing(String url, String html) {
39 | return new PhishingDetector(url, html).detect() >= threshold;
40 | }
41 |
42 |
43 | public static void main(String[] args) {
44 | System.out.println(is_phishing("http://www.baidu.com.a", "[官]欢迎访问全国信用在线申请中心"));
45 | System.out.println(is_phishing("", " [官]欢迎访问全国信用在线申请中心 "));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/PhishingFeature.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.net.Inet4Address;
4 | import java.net.InetAddress;
5 | import java.net.MalformedURLException;
6 | import java.net.URL;
7 | import java.net.UnknownHostException;
8 | import java.util.regex.Pattern;
9 |
10 | public class PhishingFeature {
11 |
12 | public static final int has_ip = 0;
13 | public static final int long_url = 1;
14 | public static final int short_service = 2;
15 | public static final int has_at = 3;
16 | public static final int double_slash_redirect = 4;
17 | public static final int pref_suf = 5;
18 | public static final int has_sub_domain = 6;
19 | public static final int ssl_state = 7;
20 | public static final int long_domain = 8;
21 | public static final int favicon = 9;
22 | public static final int port = 10;
23 | public static final int https_token = 11;
24 | public static final int req_url = 12;
25 | public static final int url_of_anchor = 13;
26 | public static final int tag_links = 14;
27 | public static final int SFH = 15;
28 | public static final int submit_to_email = 16;
29 | public static final int abnormal_url = 17;
30 | public static final int redirect = 18;
31 | public static final int mouseover = 19;
32 | public static final int right_click = 20;
33 | public static final int popup = 21;
34 | public static final int iframe = 22;
35 | public static final int domain_age = 23;
36 | public static final int dns_record = 24;
37 | public static final int traffic = 25;
38 | public static final int page_rank = 26;
39 | public static final int google_index = 27;
40 | public static final int links_to_page = 28;
41 | public static final int stats_report = 29;
42 | public static final int target = 30;
43 |
44 | public static int[] featureVector = new int[31];
45 |
46 | public static URL url;
47 | public static HtmlParser html;
48 |
49 | public static void init(String u, String h) {
50 | u = u.toLowerCase();
51 | h = h.toLowerCase();
52 | try {
53 | url = new URL(u);
54 | } catch (MalformedURLException e) {
55 | try {
56 | url = new URL("http://baidu.com/");
57 | } catch (MalformedURLException e1) {
58 |
59 | }
60 | }
61 | html = new HtmlParser(h);
62 |
63 | System.out.println(url.getHost());
64 | getFeatures();
65 | }
66 |
67 | public static boolean isIP(String ip) {
68 | // 0x and 0X
69 | ip = ip.toLowerCase();
70 | String[] as = ip.split("\\.");
71 | for (int i = 0; i < as.length; i++) {
72 | int base = 10;
73 | if (as[i].length() > 2 && as[i].indexOf("0x") == 0) {
74 | as[i] = as[i].substring(2);
75 | base = 16;
76 | }
77 | try {
78 | Long.parseLong(as[i], base);
79 | } catch (Exception e) {
80 | return false;
81 | }
82 | }
83 | return true;
84 | }
85 |
86 | // should be more precise ?
87 | public static int has_ip_feature() {
88 | String addr = url.getHost();
89 | if (isIP(addr)) {
90 | return 1;
91 | }
92 | return -1;
93 | }
94 |
95 | public static int long_url_feature() {
96 | int len = url.toString().length();
97 | if (len < 54) {
98 | return -1;
99 | } else if (54 <= len && len <= 75) {
100 | return 0;
101 | } else {
102 | return 1;
103 | }
104 | }
105 |
106 | // TODO: more precise
107 | public static int short_service_feature() {
108 | int len = url.toString().length();
109 | if (len <= 9) {
110 | return 1;
111 | }
112 | return -1;
113 | }
114 |
115 | public static int has_at_feature() {
116 | if (url.toString().indexOf('@') != -1) {
117 | return 1;
118 | }
119 | return -1;
120 | }
121 |
122 | public static int double_slash_redirect_feature() {
123 | if (url.toString().lastIndexOf("//") > 7) {
124 | return 1;
125 | }
126 | return -1;
127 | }
128 |
129 | public static int pref_suf_feature() {
130 | if (url.toString().indexOf("-") != -1) {
131 | return 1;
132 | }
133 | return -1;
134 | }
135 |
136 | public static int has_sub_domain_feature() {
137 | int cnt = 0;
138 | String u = url.toString();
139 | for (int i = 0; i < u.length(); i++) {
140 | if (u.charAt(i) == '.') {
141 | cnt++;
142 | }
143 | }
144 | if (cnt == 1) {
145 | return -1;
146 | } else if (cnt == 2) {
147 | return 0;
148 | } else {
149 | return 1;
150 | }
151 | }
152 |
153 | // Need to consider remain time
154 | public static int ssl_state_feature() {
155 | if (!"https".equals(url.getProtocol().toLowerCase())) {
156 | return 1;
157 | }
158 | return -1;
159 | }
160 |
161 | // Need to solve
162 | public static int long_domain_feature() {
163 |
164 | return -1;
165 | }
166 |
167 | public static int favicon_feature() {
168 | for (int i = 0; i < html.link.length; i++) {
169 | if (html.link[i].rel.indexOf("icon") != -1) {
170 | try {
171 | URL tu = new URL(html.link[i].href);
172 | return 1;
173 | } catch (MalformedURLException e) {
174 |
175 | }
176 | }
177 | }
178 | return 0;
179 | }
180 |
181 | // Need to solve
182 | public static int port_feature() {
183 |
184 | return -1;
185 | }
186 |
187 | public static int https_token_feature() {
188 | if (url.getHost().indexOf("https") != -1) {
189 | return 1;
190 | }
191 | return -1;
192 | }
193 |
194 | // This feature may be important
195 | // Need to solve
196 | public static int req_url_feature() {
197 |
198 | return 0;
199 | }
200 |
201 | public static int url_of_anchor_feature() {
202 | double anchor = 0;
203 | double tot = 0;
204 | for (int i = 0; i < html.a.length; i++) {
205 | if (html.a[i].href.indexOf('#') != -1 || html.a[i].href.indexOf("javascript:") != -1) {
206 | anchor += 1.0;
207 | }
208 | tot += 1.0;
209 | }
210 | double p = anchor / tot;
211 | if (p < 0.22) {
212 | return -1;
213 | } else if (p >= 0.22 && p <= 0.61) {
214 | return 0;
215 | } else {
216 | return 1;
217 | }
218 | }
219 |
220 | public static int tag_links_feature() {
221 |
222 | return 0;
223 | }
224 |
225 | public static int SFH_feature() {
226 |
227 | return 0;
228 | }
229 |
230 | public static int submit_to_email_feature() {
231 |
232 | return 0;
233 | }
234 |
235 | public static int abnormal_url_feature() {
236 |
237 | return 0;
238 | }
239 |
240 | public static int redirect_feature() {
241 |
242 | return 0;
243 | }
244 |
245 | public static int mouseover_feature() {
246 |
247 | return 0;
248 | }
249 |
250 | public static int right_click_feature() {
251 |
252 | return 0;
253 | }
254 |
255 | public static int popup_feature() {
256 |
257 | return 0;
258 | }
259 |
260 | public static int iframe_feature() {
261 |
262 | return 0;
263 | }
264 |
265 | public static int domain_age_feature() {
266 |
267 | return 0;
268 | }
269 |
270 | public static int dns_record_feature() {
271 |
272 | return 0;
273 | }
274 |
275 | public static int traffic_feature() {
276 |
277 | return 0;
278 | }
279 |
280 | public static int page_rank_feature() {
281 |
282 | return 0;
283 | }
284 |
285 | public static int google_index_feature() {
286 |
287 | return 0;
288 | }
289 |
290 | public static int links_to_page_feature() {
291 |
292 | return 0;
293 | }
294 |
295 | public static int stats_report_feature() {
296 |
297 | return 0;
298 | }
299 |
300 | public static int target_feature() {
301 |
302 | return 0;
303 | }
304 |
305 | public static boolean getFeatures() {
306 |
307 | featureVector[has_ip] = has_ip_feature();
308 | featureVector[long_url] = long_url_feature();
309 | featureVector[short_service] = short_service_feature();
310 | featureVector[has_at] = has_at_feature();
311 | featureVector[double_slash_redirect] = double_slash_redirect_feature();
312 | featureVector[pref_suf] = pref_suf_feature();
313 | featureVector[has_sub_domain] = has_sub_domain_feature();
314 | featureVector[ssl_state] = ssl_state_feature();
315 | featureVector[long_domain] = long_domain_feature();
316 | featureVector[favicon] = favicon_feature();
317 | featureVector[port] = port_feature();
318 | featureVector[https_token] = https_token_feature();
319 | featureVector[req_url] = req_url_feature();
320 | featureVector[url_of_anchor] = url_of_anchor_feature();
321 | featureVector[tag_links] = tag_links_feature();
322 | featureVector[SFH] = SFH_feature();
323 | featureVector[submit_to_email] = submit_to_email_feature();
324 | featureVector[abnormal_url] = abnormal_url_feature();
325 | featureVector[redirect] = redirect_feature();
326 | featureVector[mouseover] = mouseover_feature();
327 | featureVector[right_click] = right_click_feature();
328 | featureVector[popup] = popup_feature();
329 | featureVector[iframe] = iframe_feature();
330 | featureVector[domain_age] = domain_age_feature();
331 | featureVector[dns_record] = dns_record_feature();
332 | featureVector[traffic] = traffic_feature();
333 | featureVector[page_rank] = page_rank_feature();
334 | featureVector[google_index] = google_index_feature();
335 | featureVector[links_to_page] = links_to_page_feature();
336 | featureVector[stats_report] = stats_report_feature();
337 | featureVector[target] = target_feature();
338 |
339 | return true;
340 | }
341 |
342 | public static void show() {
343 | for (int i = 0; i < featureVector.length; i++) {
344 | System.out.print(featureVector[i]);
345 | }
346 | }
347 |
348 | public static void test() {
349 | boolean res = true;
350 | res &= (isIP("1.1.1.1"));
351 | res &= (isIP("0x58.0xCC.0xCA.0x62"));
352 | res &= (isIP("0x58.0xCA.0x62"));
353 | res &= (isIP("0x58CCCA62"));
354 | res &= (isIP("0x58CA62"));
355 | res &= (isIP("0x58CA6"));
356 | res &= (!isIP("baidu.com"));
357 | res &= (!isIP("xxx.123.abc"));
358 |
359 | if (res) {
360 | System.out.println("\nIs ip test succ");
361 | } else {
362 | System.out.println("\nIs ip test fail");
363 | }
364 |
365 | if (res) {
366 | System.out.println("\n==TEST PASSED==");
367 | } else {
368 | System.out.println("\n==TEST FAILED==");
369 | }
370 |
371 | }
372 |
373 | public static void main(String[] args) {
374 |
375 | for (int i = 0; i < featureVector.length; i++) {
376 | System.out.print(featureVector[i]);
377 | }
378 | System.out.print("");
379 |
380 | PhishingFeature.init("https://baidu.com",
381 | " ");
382 | System.out.println(PhishingFeature.url.getProtocol());
383 |
384 | try {
385 | URL uu = new URL("test.t");
386 | System.out.println(uu.getProtocol());
387 | } catch (MalformedURLException e) {
388 | System.out.println("dododo");
389 | }
390 |
391 | test();
392 | }
393 |
394 | }
395 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/PhishingMapper.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.io.IOException;
4 | import java.net.MalformedURLException;
5 | import java.net.URL;
6 | import java.util.LinkedList;
7 | import java.util.List;
8 | import java.util.regex.Pattern;
9 |
10 | import com.aliyun.odps.data.Record;
11 | import com.aliyun.odps.mapred.Mapper;
12 |
13 | import chaitin.utils.Pair;
14 |
15 |
16 | public class PhishingMapper implements Mapper {
17 |
18 | public void setup(TaskContext context) throws IOException {
19 | }
20 |
21 | public void map(long recordNum, Record record, TaskContext context) throws IOException {
22 | String url = (String) record.get(0);
23 | String html = (String) record.get(1);
24 | if (PhishingDetector.is_phishing(url, html)) {
25 | Record result = context.createOutputRecord();
26 | result.set("url", url);
27 | context.write(result);
28 | }
29 | }
30 |
31 | public void cleanup(TaskContext context) throws IOException {
32 |
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/ScoreContent.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.util.LinkedList;
4 | import java.util.List;
5 | import java.util.regex.Pattern;
6 |
7 | import chaitin.utils.Pair;
8 |
9 | public class ScoreContent {
10 | static List> sensitive_word;
11 | static List> sensitive_word_once;
12 |
13 | static {
14 | sensitive_word = new LinkedList>();
15 | sensitive_word.add(new Pair(Pattern.compile(".*欢迎光临京东商城.*"), 10.0));
16 | sensitive_word.add(new Pair(Pattern.compile(".*欢迎登录(华为|苹果|QQ|qq|腾讯|腾讯qq|腾讯QQ|百度|京东|淘宝|支付宝|美团|微信|新浪微博)帐号.*"), 8.0));
17 | sensitive_word.add(new Pair(Pattern.compile(".*已经发放.{0,20}奖金.*"), 10.0));
18 | sensitive_word.add(new Pair(Pattern.compile(".*Forgot Apple ID or password.*"), 10.0));
19 | sensitive_word.add(new Pair(Pattern.compile(".*没有Apple ID.{10}现在创建一个.*"), 10.0));
20 | sensitive_word.add(new Pair(Pattern.compile(".*礼品.*"), 1.0));
21 | sensitive_word.add(new Pair(Pattern.compile(".*奖金.*"), 1.0));
22 | sensitive_word.add(new Pair(Pattern.compile(".*中奖.*"), 1.0));
23 | sensitive_word.add(new Pair(Pattern.compile(".*抽奖.*"), 1.0));
24 | sensitive_word.add(new Pair(Pattern.compile(".*赚钱.*"), 1.0));
25 | sensitive_word.add(new Pair(Pattern.compile(".*额度.*"), 1.0));
26 | sensitive_word.add(new Pair(Pattern.compile(".*免费.*"), 1.0));
27 | sensitive_word.add(new Pair(Pattern.compile(".*发放.*"), 1.0));
28 | sensitive_word.add(new Pair(Pattern.compile(".*下发.*"), 1.0));
29 | sensitive_word.add(new Pair(Pattern.compile(".*登陆.*"), 1.0));
30 | sensitive_word.add(new Pair(Pattern.compile(".*登录.*"), 1.0));
31 | sensitive_word_once = new LinkedList>();
32 | }
33 |
34 | public static double score(String text) {
35 | double result = 0.0;
36 | for (Pair p: sensitive_word) {
37 | if (p.first.matcher(text).matches()) {
38 | result += p.second;
39 | }
40 | }
41 | return result;
42 | }
43 |
44 | public static void main(String[] args) {
45 | System.out.println(score("已经发放 17332305 元奖金"));
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/ScoreDomain.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | public class ScoreDomain {
4 |
5 | static String[] whilte_list;
6 |
7 | static {
8 | whilte_list = new String[] {
9 | "email-qq.com", "58.com","jiwu.com","xiaomi.com","cnzz.com","tuicool.com","saic.gov.cn","chinanews.com","23wx.com","ifanr.com","gao7.com","quanmin.tv","vancl.com","cjn.cn","microsoftonline.com","yiqifa.com","appgame.com","mtime.com","suning.com","pinterest.com","12306.cn","cnhubei.com","worktile.com","pcbeta.com","jd.hk","quora.com","hujiang.com","21cn.com","chinaunix.net","lenovo.com.cn","dianping.com","19lou.com","house365.com","btbbt.cc","fuliba.net","onlinedown.net","caixin.com","google.com.tw","v2ex.com","10jqka.com.cn","dell.com","ynepb.gov.cn","ceconline.com","miaopai.com","18183.com","yesky.com","120ask.com","pps.tv","2345.com","xvideos.com","huaban.com","etao.com","yaolan.com","gucheng.com","nga.cn","alicdn.com","gamersky.com","2cto.com","guokr.com","taoche.com","instagram.com","liansuo.com","xjtour.com","microsoft.com","eastday.com","booking.com","sina.com","google.com.hk","github.com","hupu.com","nuomi.com","3158.cn","youth.cn","ebay.com","longzhu.com","baike.com","u17.com","yougou.com","dytt8.net","biquge.la","ecitic.com","twitter.com","ccb.com","ngacn.cc","autohome.com.cn","dy2018.com","caijing.com.cn","weibo.cn","sogou.com","zbj.com","bttiantang.com","cyol.com","eqxiu.com","alibaba-inc.com","51yes.com","yixun.com","people.com.cn","gongchang.com","mafengwo.cn","ximalaya.com","cmbc.com.cn","takungpao.com","dmzj.com","kuwo.cn","spdb.com.cn","zcool.com.cn","tudou.com","mp4ba.com","ikea.com","zhihu.com","jandan.net","jxnews.com.cn","workercn.cn","zuanke8.com","blogspot.com","aliexpress.com","csair.com","aiweibang.com","cnblogs.com","cgbchina.com.cn","xywy.com","nipic.com","ih5.cn","msn.com","google.com","126.com","51credit.com","huanqiu.com","51job.com","jrj.com.cn","tgfcer.com","chooseauto.com.cn","17track.net","weather.com.cn","doubleclick.net","doc88.com","258.com","muchong.com","huim.com","91jm.com","92lux.cn","114so.cn","pptv.com","pchome.net","chinabyte.com","g-fox.cn","chinadaily.com.cn","xiamp4.com","huawei.com","ccidnet.com","amap.com","xdf.cn","chouti.com","lofter.com","meizu.com","duomai.com","ctrip.com","mozilla.org","25pp.com","mgtv.com","95516.com","renren.com","amazon.co.jp","yahoo.co.jp","cnnic.cn","goldcarpet.cn","xiu.com","guancha.cn","17k.com","nih.gov","tumblr.com","10086.cn","winshang.com","ali213.net","loldytt.com","sobaidupan.com","huihui.cn","gusuwang.com","xiami.com","cs.com.cn","juooo.com","chinaso.com","cnr.cn","ftchinese.com","egou.com","bilibili.com","ctfile.com","dahe.cn","mama.cn","cpta.com.cn","google.com.sg","meishichina.com","kdslife.com","ci123.com","alipay.com","k618.cn","oschina.net","epwk.com","amazon.com","cnmo.com","36kr.com","lashou.com","alimama.com","cntv.cn","yinyuetai.com","gmw.cn","acfun.tv","cisco.com","github.io","cnbeta.com","3dmgame.com","122.gov.cn","kaixin001.com","xunlei.com","dragonparking.com","segmentfault.com","mydrivers.com","blizzard.cn","beva.com","17173.com","sdo.com","zol.com.cn","smzdm.com","steamcommunity.com","kugou.com","rednet.cn","scol.com.cn","taobao.com","alibaba.com","jiemian.com","11467.com","adobe.com","cncn.org.cn","7k7k.com","techweb.com.cn","fanli.com","tibet.cn","bankofchina.com","iconfont.cn","3dwwwgame.com","dbw.cn","icbc.com.cn","amazon.co.uk","55haitao.com","anjuke.com","xinjunshi.com","mydigit.cn","oeeee.com","jiayuan.com","sq.cn","p5w.net","avmo.pw","pixiv.net","niuche.com","qidian.com","vmall.com","6pm.com","skycn.com","wtoip.com","xiaomi.cn","sznews.com","zhibo8.cc","dgtle.com","cyzone.cn","fudan.edu.cn","saraba1st.com","jumei.com","jiameng.com","xitek.com","tmall.com","qianzhan.com","xinhuanet.com","pcgames.com.cn","chinaz.com","stockstar.com","voc.com.cn","iqiyi.com","lesports.com","sinaimg.cn","ed2000.com","bitauto.com","znds.com","alexa.cn","xcar.com.cn","reddit.com","salesforce.com","xueqiu.com","paypal.com","youku.com","lagou.com","focus.cn","joyme.com","southcn.com","haosou.com","cnki.net","a9vg.com","bigccq.cn","dix3.com","hjenglish.com","steampowered.com","07073.com","to8to.com","gome.com.cn","4399.com","cr173.com","wangtu.com","365jia.cn","fengniao.com","amazon.de","lianjia.com","w3school.com.cn","douban.com","baiducontent.com","viidii.info","yhd.com","huomaotv.cn","pingan.com","51.la","wanfangdata.com.cn","imdb.com","tianya.cn","yinxiang.com","thepaper.cn","123cha.com","baomihua.com","dangdang.com","fobshanghai.com","qunar.com","2mnd56.com","jjwxc.net","alitrip.com","ycwb.com","boc.cn","ebrun.com","tower.im","kuaidi100.com","stackexchange.com","youboy.com","tmall.hk","5dcar.com","zhanqi.tv","battlenet.com.cn","facebook.com","umeng.com","linkedin.com","amazon.fr","pc6.com","apple.com","51cto.com","eastmoney.com","huya.com","gh0089.com","douyu.com","wordpress.com","dygang.com","zaobao.com","3987.com","uuu9.com","iiyi.com","ems.com.cn","google.co.jp","yxdown.com","haitao.com","newsmth.net","hc360.com","pconline.com.cn","sina.cn","189.cn","dotamax.com","sonhoo.com","gfan.com","bjrcb.com","duowan.com","yunpan.cn","jb51.net","pcbaby.com.cn","miercn.com","runoob.com","miui.com","soku.com","admaimai.com","chiphell.com","cn163.net","sanguosha.com","toutiao.com","ku6.com","meituan.com","globaltimes.cn","kdnet.net","chsi.com.cn","pcauto.com.cn","makepolo.com","zybang.com","dilidili.com","ttmeiju.com","hao123.com","engadget.com","baidu.com","baidu.com.cn","ifeng.com","google.cn","sohu.com","netease.com","weibo.com","wish.com","cdstm.cn","familydoctor.com.cn","163.com","jianshu.com","ooopic.com","cmbchina.com","jiyoujia.com","zhaopin.com","icloud.com","51sole.com","mi.com","ynet.com","stackoverflow.com","17ok.com","6vhao.com","51auto.com","youtube.com","enet.com.cn","zealer.com","docin.com","godaddy.com","force.com","ithome.com","firefoxchina.cn","liepin.com","uc.cn","iteye.com","tgbus.com","quanjing.com","chengdu.cn","wikipedia.org","178.com","eol.cn","verycd.com","zjol.com.cn","atpanel.com","ganji.com","sourceforge.net","eelly.com","99114.com","btso.pw","btime.com","icolor.com.cn","tuniu.com","58pic.com","oracle.com","wenkang.cn","52pojie.cn","cqnews.net","39yst.com","panda.tv","jqw.com","zimuzu.tv","g4d7.com","babyschool.com.cn","hexun.com","babytree.com","linktech.cn","yahoo.com","iciba.com","t66y.com","ea3w.com","qyer.com","duba.com","netcoc.com","10010.com","sina.com.cn","flyertea.com","youdao.com","qinqinbaby.com","qq.com","jmw.com.cn","chekb.com","weiyun.com","yjbys.com","dxy.cn","kafan.cn","aizhan.com","amazon.cn","cankaoxiaoxi.com","sf-express.com","jd.com","ccb.com.cn","qingdaonews.com","go108.com.cn","wtoutiao.com","kaola.com","1905.com","dwnews.com","cctv.com","cri.cn"
10 | };
11 | }
12 |
13 | static Boolean is_white (String host) {
14 | for (String s: whilte_list) {
15 | if (s.length() == host.length()) {
16 | if (host.equals(s)) {
17 | return true;
18 | }
19 | } else if (s.length() < host.length()) {
20 | if (host.endsWith("." + s)) {
21 | return true;
22 | }
23 | }
24 | }
25 | return false;
26 | }
27 |
28 | public static double score (String host) {
29 | double result = 0.0;
30 | host = host.toLowerCase();
31 | if (!is_white(host)) {
32 | if (host.length() > 20) {
33 | result += (host.length() - 20) * 0.1;
34 | }
35 | for (String s: whilte_list) {
36 | int p = host.indexOf(s);
37 | if (p > 1 && (host.charAt(p - 1) == '.' || host.charAt(p - 1) == '-')) {
38 | return (double)s.length();
39 | }
40 | }
41 | host = host.replace("0", "o");
42 | host = host.replace("l", "1");
43 | //host = host.replace("-", "");
44 | for (String s: whilte_list) {
45 | s = s.replace("0", "o");
46 | s = s.replace("l", "1");
47 | int p = host.indexOf(s);
48 | if (p == 0 || (p > 1 && (host.charAt(p - 1) == '.' || host.charAt(p - 1) == '-'))) {
49 | return (double)s.length() + 2;
50 | }
51 | }
52 | }
53 | return result;
54 | }
55 |
56 | public static void main(String[] args) {
57 | System.out.println("t".endsWith("t"));
58 | System.out.println("qq.com: " + score("qq.com"));
59 | System.out.println("www.qq.com: " + score("www.qq.com"));
60 | System.out.println("www.q-q.com: " + score("www.q-q.com"));
61 | System.out.println("www.a-qq.com: " + score("www.a-qq.com"));
62 | System.out.println("www.qq.com.monster.com: " + score("www.qq.com.monster.com"));
63 | System.out.println("10086.cn: " + score("10086.cn"));
64 | System.out.println("l0086.cn: " + score("l0086.cn"));
65 | System.out.println("monster.com: " + score("monster.com"));
66 | System.out.println("xagfjd.com: " + score("xagfjd.com"));
67 | return;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/ScoreForm.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.util.LinkedList;
4 | import java.util.List;
5 | import java.util.regex.Pattern;
6 |
7 | import chaitin.phishing.HtmlParser.Form;
8 | import chaitin.phishing.HtmlParser.Form.Input;
9 | import chaitin.utils.Pair;
10 |
11 | public class ScoreForm {
12 |
13 | static List> sensitive_action;
14 | static List> sensitive_form_word;
15 |
16 | static {
17 | sensitive_action = new LinkedList>();
18 | sensitive_action.add(new Pair(Pattern.compile(".*fuck.*"), 5.0));
19 | sensitive_action.add(new Pair(Pattern.compile(".*diaoyu.*"), 5.0));
20 | sensitive_action.add(new Pair(Pattern.compile(".*\\.asp.*"), 2.0));
21 | sensitive_form_word = new LinkedList>();
22 | sensitive_form_word.add(new Pair(Pattern.compile(".*身份证.*"), 3.0));
23 | sensitive_form_word.add(new Pair(Pattern.compile(".*额度.*"), 3.0));
24 | sensitive_form_word.add(new Pair(Pattern.compile(".*手机号.*"), 1.0));
25 | }
26 |
27 | public static double scoce_action(String action) {
28 | double result = 0.0;
29 | for (Pair p: sensitive_action) {
30 | if (p.first.matcher(action).matches() && p.second > result) {
31 | result = p.second;
32 | }
33 | }
34 | return result;
35 | }
36 |
37 | public static double score_form(Form form) {
38 | double result = 0.0;
39 | for (Input input: form.input) {
40 | if (input.type == "password") {
41 | result += 3.0;
42 | break;
43 | }
44 | }
45 | for (Pair p: sensitive_form_word) {
46 | if (p.first.matcher(form.text).matches() && p.second > result) {
47 | result = p.second;
48 | }
49 | }
50 | result += scoce_action(form.action);
51 | return result;
52 | }
53 |
54 | public static double score(Form[] forms) {
55 | double result = 0.0;
56 | for (Form form: forms) {
57 | double t = score_form(form);
58 | if (t > result) {
59 | result = t;
60 | }
61 | }
62 | return result;
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/phishing/ScoreTitle.java:
--------------------------------------------------------------------------------
1 | package chaitin.phishing;
2 |
3 | import java.util.LinkedList;
4 | import java.util.List;
5 |
6 | import chaitin.utils.Pair;
7 |
8 | public class ScoreTitle {
9 | static List> sensitive_word;
10 |
11 | static {
12 | sensitive_word = new LinkedList>();
13 | sensitive_word.add(new Pair("中国平安官网-中国平安保险(集团)股份有限公司-保险,银行,投资", 8.0));
14 | sensitive_word.add(new Pair("淘宝网 - 淘!我喜欢", 8.0));
15 | sensitive_word.add(new Pair("京东商城-京东商城官方网站!", 8.0));
16 | sensitive_word.add(new Pair("京东-欢迎登录", 8.0));
17 | sensitive_word.add(new Pair("京东(JD.COM)-综合网购首选-正品低价、品质保障、配送及时、轻松购物!", 8.0));
18 | sensitive_word.add(new Pair("中国移动官方网站", 8.0));
19 | sensitive_word.add(new Pair("中国联通网上营业厅", 8.0));
20 | sensitive_word.add(new Pair("苏宁易购(Suning) -综合网上购物商城,正品行货,全国联保,货到付款!", 8.0));
21 | sensitive_word.add(new Pair("用户登录 - 苏宁易购", 8.0));
22 | sensitive_word.add(new Pair("上天猫,就够了", 8.0));
23 | sensitive_word.add(new Pair("网上超市1号店,省力省钱省时间", 8.0));
24 | sensitive_word.add(new Pair("1号店登录", 8.0));
25 | sensitive_word.add(new Pair("亚马逊-网上购物商城:要网购, 就来Z.cn!", 8.0));
26 | sensitive_word.add(new Pair("Amazon 登录", 8.0));
27 | sensitive_word.add(new Pair("唯品会(原Vipshop.com)特卖会:一家专门做特卖的网站_确保正品_确保低价_货到付款", 8.0));
28 | sensitive_word.add(new Pair("唯品会网站登录", 8.0));
29 | sensitive_word.add(new Pair("美丽说—白领的全球导购", 8.0));
30 | sensitive_word.add(new Pair("登录 - 美丽说", 8.0));
31 | sensitive_word.add(new Pair("易迅网-专业的电脑、数码家电、手机、汽车用品、鞋服百货网上数码大卖场 - 易迅网", 8.0));
32 | sensitive_word.add(new Pair("国美在线(GOME)-综合网购商城,正品低价、品质保障、快速送达、安心服务!", 8.0));
33 | sensitive_word.add(new Pair("用户登录-国美在线", 8.0));
34 | sensitive_word.add(new Pair("聚美优品 - 【极速免税店 品牌防伪码】正品化妆品团购网站BJ,千万用户推荐,拆封30天无条件退货!", 8.0));
35 | sensitive_word.add(new Pair("登录聚美", 8.0));
36 | sensitive_word.add(new Pair("华为商城官网-提供华为手机(", 8.0));
37 | sensitive_word.add(new Pair("欢迎访问中国建设银行网站", 8.0));
38 | sensitive_word.add(new Pair("中国建设银行 个人客户网上银行", 8.0));
39 | sensitive_word.add(new Pair("交通银行 - 交银金融网", 8.0));
40 | sensitive_word.add(new Pair("一网通主页 -- 招商银行官方网站", 8.0));
41 | sensitive_word.add(new Pair("中国银行全球门户网站", 8.0));
42 | sensitive_word.add(new Pair("中国工商银行中国网站", 8.0));
43 | sensitive_word.add(new Pair("中国农业银行", 8.0));
44 | sensitive_word.add(new Pair("首页 - 广发银行", 8.0));
45 | sensitive_word.add(new Pair("中国邮政储蓄银行", 8.0));
46 | sensitive_word.add(new Pair("登录 - 当当网", 8.0));
47 | sensitive_word.add(new Pair("当当—网上购物中心:图书、母婴、美妆、家居、数码、家电、服装、鞋包等,正品低价,货到付款", 8.0));
48 | sensitive_word.add(new Pair("126网易免费邮--你的专业电子邮局", 8.0));
49 | sensitive_word.add(new Pair("163网易免费邮--中文邮箱第一品牌", 8.0));
50 | sensitive_word.add(new Pair("企业邮箱领航者|163企业邮箱-网易企业邮箱-外贸企业邮箱-中文企业邮箱首选品牌解决方案", 8.0));
51 | sensitive_word.add(new Pair("登录QQ邮箱", 8.0));
52 | sensitive_word.add(new Pair("QQ空间-分享生活,留住感动", 8.0));
53 | sensitive_word.add(new Pair("微博-随时随地发现新鲜事", 8.0));
54 | sensitive_word.add(new Pair("登录 - 支付宝", 8.0));
55 | sensitive_word.add(new Pair("登录 | 美团网", 8.0));
56 | sensitive_word.add(new Pair("[官]欢迎访问全国信用在线申请中心", 15.0));
57 | sensitive_word.add(new Pair("登录", 2.0));
58 | sensitive_word.add(new Pair("登陆", 3.0));
59 | sensitive_word.add(new Pair("中奖", 2.0));
60 | sensitive_word.add(new Pair("抽奖", 2.0));
61 | sensitive_word.add(new Pair("信用", 2.0));
62 | }
63 |
64 | public static double score(String title) {
65 | double result = 0.0;
66 | for (Pair p: sensitive_word) {
67 | if (title.indexOf(p.first) != -1 && p.second > result) {
68 | result = p.second;
69 | }
70 | }
71 | return result;
72 | }
73 |
74 | public static void main(String[] args) {
75 | System.out.println(score("[官]欢迎访问全国信用在线申请中心"));
76 | System.out.println(score(" [官]欢迎访问全国信用在线申请中心 "));
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Base64.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.io.ByteArrayOutputStream;
4 | import java.util.Arrays;
5 | import java.util.LinkedList;
6 | import java.util.List;
7 |
8 | public class Base64 {
9 | List decode(byte[] input) {
10 | List result = new LinkedList();
11 | return result;
12 | }
13 |
14 | public static byte alpha(byte c) {
15 | if (Character.isUpperCase(c)) {
16 | return (byte) (c - 'A');
17 | } else if (Character.isLowerCase(c)) {
18 | return (byte) (c - 'a' + 26);
19 | } else if (Character.isDigit(c)) {
20 | return (byte) (c - '0' + 52);
21 | } else if (c == '+') {
22 | return (byte) (62);
23 | } else if (c == '/') {
24 | return (byte) (63);
25 | } else if (c == '=') {
26 | return (byte) (0xFE);
27 | } else {
28 | return (byte)0xFF;
29 | }
30 | }
31 |
32 |
33 | public static boolean is_alpha(byte c) {
34 | return Character.isUpperCase(c)
35 | || Character.isLowerCase(c)
36 | || Character.isDigit(c)
37 | || c == '+'
38 | || c == '/'
39 | || c == '=';
40 | }
41 |
42 | public static byte[] longest_sub_base64(byte[] input) {
43 | if (input.length == 0) {
44 | return input;
45 | }
46 | int max = 0;
47 | int[] result = new int[input.length];
48 | result[0] = is_alpha(input[0]) ? 1 : 0;
49 | for (int i = 1; i < input.length; ++i) {
50 | result[i] = is_alpha(input[i]) ? result[i - 1] + 1 : 0;
51 | if (result[i] > result[max]) {
52 | max = i;
53 | }
54 | }
55 |
56 | return Arrays.copyOfRange(input, max - result[max] + 1, max + 1);
57 | }
58 |
59 | public static byte[] decode_base64(byte[] input) {
60 | ByteArrayOutputStream output = new ByteArrayOutputStream();
61 | int i = 0;
62 | int c;
63 | for (i = 0; i < input.length; i += 4) {
64 | if (i + 1 >= input.length
65 | || alpha(input[i]) == (byte)0xff
66 | || alpha(input[i + 1]) == (byte)0xff) {
67 | break;
68 | }
69 | c = (alpha(input[i]) & 0b111111) << 2;
70 | c |= (alpha(input[i + 1]) & 0b110000) >> 4;
71 | output.write((byte)c);
72 | if (i + 2 >= input.length || alpha(input[i + 2]) == (byte)0xfe) {
73 | c = (alpha(input[i + 1]) & 0b001111) << 4;
74 | output.write((byte)c);
75 | break;
76 | } else if (alpha(input[i + 2]) == (byte)0xff) {
77 | break;
78 | }
79 | c = (alpha(input[i + 1]) & 0b001111) << 4;
80 | c |= (alpha(input[i + 2]) & 0b111100) >> 2;
81 | output.write((byte)c);
82 | if (i + 3 >= input.length || alpha(input[i + 3]) == (byte)0xfe) {
83 | c = (alpha(input[i + 2]) & 0b000011) << 6;
84 | output.write((byte)c);
85 | break;
86 | } else if (i + 3 >= input.length || alpha(input[i + 3]) == (byte)0xff) {
87 | break;
88 | }
89 | c = (alpha(input[i + 2]) & 0b000011) << 6;
90 | c |= (alpha(input[i + 3]) & 0b111111);
91 | output.write((byte)c);
92 | }
93 | return output.toByteArray();
94 | }
95 |
96 |
97 | public static void main(String[] args) {
98 | System.out.println(new String(decode_base64("YWJjZA0==".getBytes())));
99 | System.out.println(new String(longest_sub_base64("!@#$test---123456".getBytes())));
100 | }
101 | }
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/ChaitinRecord.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.util.Date;
4 |
5 | import com.aliyun.odps.Column;
6 | import com.aliyun.odps.data.Record;
7 |
8 | public class ChaitinRecord implements Record {
9 |
10 | Object[] obj;
11 |
12 | public ChaitinRecord(Object[] obj) {
13 | this.obj = obj;
14 | }
15 |
16 | @Override
17 | public Object get(int arg0) {
18 | return obj[arg0];
19 | }
20 |
21 | @Override
22 | public Object get(String arg0) {
23 | // TODO Auto-generated method stub
24 | return null;
25 | }
26 |
27 | @Override
28 | public Long getBigint(int arg0) {
29 | // TODO Auto-generated method stub
30 | return null;
31 | }
32 |
33 | @Override
34 | public Long getBigint(String arg0) {
35 | // TODO Auto-generated method stub
36 | return null;
37 | }
38 |
39 | @Override
40 | public Boolean getBoolean(int arg0) {
41 | // TODO Auto-generated method stub
42 | return null;
43 | }
44 |
45 | @Override
46 | public Boolean getBoolean(String arg0) {
47 | // TODO Auto-generated method stub
48 | return null;
49 | }
50 |
51 | @Override
52 | public byte[] getBytes(int arg0) {
53 | // TODO Auto-generated method stub
54 | return null;
55 | }
56 |
57 | @Override
58 | public byte[] getBytes(String arg0) {
59 | // TODO Auto-generated method stub
60 | return null;
61 | }
62 |
63 | @Override
64 | public int getColumnCount() {
65 | // TODO Auto-generated method stub
66 | return 0;
67 | }
68 |
69 | @Override
70 | public Column[] getColumns() {
71 | // TODO Auto-generated method stub
72 | return null;
73 | }
74 |
75 | @Override
76 | public Date getDatetime(int arg0) {
77 | // TODO Auto-generated method stub
78 | return null;
79 | }
80 |
81 | @Override
82 | public Date getDatetime(String arg0) {
83 | // TODO Auto-generated method stub
84 | return null;
85 | }
86 |
87 | @Override
88 | public Double getDouble(int arg0) {
89 | // TODO Auto-generated method stub
90 | return null;
91 | }
92 |
93 | @Override
94 | public Double getDouble(String arg0) {
95 | // TODO Auto-generated method stub
96 | return null;
97 | }
98 |
99 | @Override
100 | public String getString(int arg0) {
101 | // TODO Auto-generated method stub
102 | return null;
103 | }
104 |
105 | @Override
106 | public String getString(String arg0) {
107 | // TODO Auto-generated method stub
108 | return null;
109 | }
110 |
111 | @Override
112 | public void set(Object[] arg0) {
113 | obj = arg0;
114 |
115 | }
116 |
117 | @Override
118 | public void set(int arg0, Object arg1) {
119 | obj[arg0] = arg1;
120 |
121 | }
122 |
123 | @Override
124 | public void set(String arg0, Object arg1) {
125 | // TODO Auto-generated method stub
126 |
127 | }
128 |
129 | @Override
130 | public void setBigint(int arg0, Long arg1) {
131 | // TODO Auto-generated method stub
132 |
133 | }
134 |
135 | @Override
136 | public void setBigint(String arg0, Long arg1) {
137 | // TODO Auto-generated method stub
138 |
139 | }
140 |
141 | @Override
142 | public void setBoolean(int arg0, Boolean arg1) {
143 | obj[arg0] = arg1;
144 |
145 | }
146 |
147 | @Override
148 | public void setBoolean(String arg0, Boolean arg1) {
149 | // TODO Auto-generated method stub
150 |
151 | }
152 |
153 | @Override
154 | public void setDatetime(int arg0, Date arg1) {
155 | // TODO Auto-generated method stub
156 |
157 | }
158 |
159 | @Override
160 | public void setDatetime(String arg0, Date arg1) {
161 | // TODO Auto-generated method stub
162 |
163 | }
164 |
165 | @Override
166 | public void setDouble(int arg0, Double arg1) {
167 | // TODO Auto-generated method stub
168 |
169 | }
170 |
171 | @Override
172 | public void setDouble(String arg0, Double arg1) {
173 | // TODO Auto-generated method stub
174 |
175 | }
176 |
177 | @Override
178 | public void setString(int arg0, String arg1) {
179 | obj[arg0] = arg1;
180 | }
181 |
182 | @Override
183 | public void setString(String arg0, String arg1) {
184 | // TODO Auto-generated method stub
185 |
186 | }
187 |
188 | @Override
189 | public void setString(int arg0, byte[] arg1) {
190 | // TODO Auto-generated method stub
191 |
192 | }
193 |
194 | @Override
195 | public void setString(String arg0, byte[] arg1) {
196 | // TODO Auto-generated method stub
197 |
198 | }
199 |
200 | @Override
201 | public Object[] toArray() {
202 | // TODO Auto-generated method stub
203 | return obj;
204 | }
205 |
206 | }
207 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Decoder.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.util.LinkedList;
4 | import java.util.List;
5 |
6 | public class Decoder {
7 | List decode(byte[] input) {
8 | List result = new LinkedList();
9 | return result;
10 | }
11 | }
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Gao.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.FileInputStream;
5 | import java.io.FileWriter;
6 | import java.io.IOException;
7 | import java.io.InputStreamReader;
8 |
9 | import com.aliyun.odps.data.Record;
10 | import com.aliyun.odps.mapred.Mapper;
11 | import com.google.gson.Gson;
12 |
13 | import chaitin.webshell.WebshellDetector;
14 | import chaitin.webshell.WebshellMapper;
15 | import chaitin.webshell.parser.AspScore;
16 | import chaitin.webshell.parser.PhpScore;
17 |
18 | public class Gao {
19 |
20 | static String output_file_name = "/tmp/out.txt";
21 | static int count = 0;
22 |
23 | public static void map_file(String input_file_name, Mapper mapper) throws IOException {
24 | //*
25 | new FileWriter(output_file_name).close();
26 | BufferedReader br = new BufferedReader(new InputStreamReader(
27 | new FileInputStream(input_file_name)));
28 | String line;
29 | Gson gson = new Gson();
30 | while ((line = br.readLine()) != null) {
31 | String[] item = gson.fromJson(line, String[].class);
32 | ChaitinRecord record = new ChaitinRecord(item);
33 | mapper.map(0, record, null);
34 | //break;
35 | }
36 | br.close();
37 | //*/
38 | }
39 |
40 | public static void dump(Object[] item) throws IOException {
41 | //*
42 | Gson gson = new Gson();
43 | String result = gson.toJson(item);
44 | System.out.println(result);
45 | FileWriter fw = new FileWriter(output_file_name, true);
46 | fw.write(result + "\n");
47 | fw.close();
48 | //System.exit(0);
49 | count += 1;
50 | //*/
51 | }
52 |
53 | public static void main(String[] args) throws Exception {
54 | WebshellMapper mapper = new WebshellMapper();
55 |
56 | map_file("/Users/Monster/Documents/webshell.in.json", mapper);
57 | System.out.println("count: " + count);
58 | System.out.println("precision: " + (mapper._tp/(mapper._tp + mapper._fp)));
59 | System.out.println("recall: " + (mapper._tp/(mapper._tp + mapper._fn)));
60 |
61 | boolean b = WebshellDetector.isWebshell("", "1\u003d%40eval%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FP%27.%27OST%27%7D%5Bz9%5D%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FPOS%27.%27T%27%7D%5Bz0%5D%29%29%3B\u0026z0\u003dNTk1NTQ2O0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjA");
62 | System.out.println(b);
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Multipart.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.util.LinkedList;
4 | import java.util.List;
5 |
6 | public class Multipart extends Decoder {
7 | @Override
8 | List decode(byte[] input) {
9 | List result = new LinkedList();
10 | return result;
11 | }
12 | /*
13 | static List> multipart(byte[] input) {
14 | List> result = new LinkedList>();
15 | int pos = Multipart.accept(input, 0, "--".getBytes());
16 | if (pos == 0) {
17 | return result;
18 | }
19 | int pos_boundary = pos;
20 | while (pos < input.length && input[pos] != '\r' && input[pos] != '\n') {
21 | pos += 1;
22 | }
23 | byte[] boundary = Arrays.copyOfRange(input, pos_boundary, pos);
24 | return result;
25 | }
26 |
27 | static int accept(byte[] input, int pos, byte[] sub) {
28 | int i = 0;
29 | for (i = 0; i < sub.length && pos + i < input.length && sub[i] == input[pos + i]; ++i) {
30 | ;
31 | }
32 | return i == sub.length ? pos + i : 0;
33 | }
34 |
35 | static int accept_line(byte[] input, int pos) {
36 |
37 | }*/
38 | }
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Pair.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | public class Pair {
4 |
5 | public T1 first;
6 | public T2 second;
7 |
8 |
9 | public Pair() {
10 | super();
11 | }
12 |
13 | public Pair(T1 first, T2 second) {
14 | super();
15 | this.first = first;
16 | this.second = second;
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/ParseUrl.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.net.MalformedURLException;
4 | import java.net.URL;
5 | import java.util.LinkedList;
6 | import java.util.List;
7 |
8 | public class ParseUrl extends Decoder {
9 | public static class Url {
10 | public byte [] protocol;
11 | public byte [] host;
12 | public int port;
13 | public byte [] path;
14 | public byte [] query;
15 | }
16 |
17 | @Override
18 | List decode(byte[] input) {
19 | List result = new LinkedList();
20 | return result;
21 | }
22 |
23 | public static Url parse_url(byte[] input) {
24 | Url url = new Url();
25 | try {
26 | URL _url = new URL(new String(input));
27 | url.protocol = _url.getProtocol().getBytes();
28 | url.host = _url.getHost().getBytes();
29 | url.port = _url.getPort();
30 | url.path = _url.getPath().getBytes();
31 | url.query = _url.getQuery().getBytes();
32 | } catch (MalformedURLException e) {
33 | e.printStackTrace();
34 | }
35 | return url;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/QueryString.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.util.Arrays;
4 | import java.util.LinkedList;
5 | import java.util.List;
6 |
7 | public class QueryString extends Decoder{
8 |
9 | @Override
10 | List decode(byte[] input) {
11 | List result = new LinkedList();
12 | return result;
13 | }
14 |
15 | public static List> query_string(byte[] input) {
16 | List> result = new LinkedList>();
17 | int pos = 0;
18 | while (pos < input.length) {
19 | int pos_key_start = pos;
20 |
21 | while (pos < input.length
22 | && input[pos] != '='
23 | && input[pos] != '&') {
24 | pos += 1;
25 | }
26 | int pos_key_end = pos;
27 |
28 | int pos_value_start = 0;
29 | int pos_value_end = 0;
30 |
31 | if (pos < input.length
32 | && input[pos] == '=') {
33 | pos += 1;
34 | pos_value_start = pos;
35 | while (pos < input.length
36 | && input[pos] != '&') {
37 | pos += 1;
38 | }
39 | pos_value_end = pos;
40 | }
41 |
42 | if (pos < input.length && input[pos] == '&') {
43 | pos += 1;
44 | }
45 |
46 | byte[] key = Unquote.unquote(Arrays.copyOfRange(input, pos_key_start, pos_key_end));
47 | byte[] value = Unquote.unquote(Arrays.copyOfRange(input, pos_value_start, pos_value_end));
48 | if (key.length != 0 || value.length != 0) {
49 | Pair kv_pair = new Pair(key, value);
50 | result.add(kv_pair);
51 | }
52 | }
53 | return result;
54 | }
55 | }
--------------------------------------------------------------------------------
/src/main/java/chaitin/utils/Unquote.java:
--------------------------------------------------------------------------------
1 | package chaitin.utils;
2 |
3 | import java.io.ByteArrayOutputStream;
4 | import java.util.LinkedList;
5 | import java.util.List;
6 |
7 | public class Unquote extends Decoder {
8 | @Override
9 | List decode(byte[] input) {
10 | List result = new LinkedList();
11 | return result;
12 | }
13 |
14 | public static byte[] unquote(byte[] input) {
15 | ByteArrayOutputStream output = new ByteArrayOutputStream();
16 | int pos = 0;
17 | while (pos < input.length) {
18 | if (pos + 2 < input.length
19 | && input[pos] == '%'
20 | && isxdigit(input[pos + 1])
21 | && isxdigit(input[pos + 2])) {
22 | output.write(Unquote.ord(input[pos + 1], input[pos + 2]));
23 | pos += 3;
24 | } else if (pos + 5 < input.length
25 | && input[pos] == '%'
26 | && (input[pos + 1] == 'u' || input[pos + 1] == 'U' )
27 | && isxdigit(input[pos + 2])
28 | && isxdigit(input[pos + 3])
29 | && isxdigit(input[pos + 4])
30 | && isxdigit(input[pos + 5])) {
31 | if (input[pos + 2] != '0' || input[pos + 3] != '0') {
32 | output.write(Unquote.ord(input[pos + 2], input[pos + 3]));
33 | }
34 | output.write(Unquote.ord(input[pos + 4], input[pos + 5]));
35 | pos += 6;
36 | } else if (pos < input.length && input[pos] == '+') {
37 | output.write(' ');
38 | pos += 1;
39 | } else {
40 | output.write(input[pos]);
41 | pos += 1;
42 | }
43 | }
44 | return output.toByteArray();
45 | }
46 |
47 | static boolean isxdigit(byte b) {
48 | return (b >= '0' && b <= '9')
49 | || (b >= 'a' && b <= 'f')
50 | || (b >= 'A' && b <= 'F');
51 | }
52 |
53 | static byte ord(byte a, byte b) {
54 | byte r = 0;
55 | if (a >= '0' && a <= '9') {
56 | a = (byte)(a & 0x0F);
57 | } else {
58 | a = (byte)((a & 0x0F) + 9);
59 | }
60 | if (b >= '0' && b <= '9') {
61 | b = (byte)(b & 0x0F);
62 | } else {
63 | b = (byte)((b & 0x0F) + 9);
64 | }
65 | return (byte)((a << 4) | b);
66 | }
67 | }
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/WebshellDetector.java:
--------------------------------------------------------------------------------
1 | package chaitin.webshell;
2 |
3 | import java.util.HashMap;
4 | import java.util.List;
5 | import java.util.Map;
6 |
7 | import chaitin.utils.Base64;
8 | import chaitin.utils.Pair;
9 | import chaitin.utils.QueryString;
10 | import chaitin.utils.Unquote;
11 | import chaitin.webshell.parser.AspScore;
12 | import chaitin.webshell.parser.PhpScore;
13 |
14 | public class WebshellDetector {
15 |
16 | static double thres_hold = 2.0;
17 |
18 | static Map sensitive_key = new HashMap() {
19 | private static final long serialVersionUID = 97314362015453784L;
20 |
21 | {
22 | put("z", 1.0);
23 | put("z0", 1.5);
24 | put("z1", 1.0);
25 | put("z2", 1.0);
26 | put("caidao", 1.8);
27 | put("mb", 0.8);
28 | put("hk715", 1.2);
29 | put("xise", 1.5);
30 | put("diaosi", 1.0);
31 | }};
32 |
33 |
34 | public static double scorePhp_one(byte[] payload) {
35 | double score = new PhpScore(payload).score();
36 | byte[] payload_base64 = Base64.decode_base64(payload);
37 | double score_base64_decoded = new PhpScore(payload_base64).score();
38 | if (score_base64_decoded > 0.8) {
39 | score_base64_decoded += 0.3;
40 | }
41 | return score > score_base64_decoded ? score : score_base64_decoded;
42 | }
43 |
44 | public static double scorePhp(byte[] payload) {
45 | String s = new String(payload);
46 | double score = scorePhp_one(payload);
47 | /*if (s.indexOf('"') + 1 < payload.length) {
48 | double tscore = scorePhp_one(s.substring(s.indexOf('"') + 1).getBytes());
49 | score = score > tscore ? score : tscore;
50 | }
51 | if (s.indexOf('\'') + 1 < payload.length) {
52 | double tscore = scorePhp_one(s.substring(s.indexOf('\'') + 1).getBytes());
53 | score = score > tscore ? score : tscore;
54 | }*/
55 | return score;
56 | }
57 |
58 | public static double scoreAsp(byte[] payload) {
59 | double score = new AspScore(payload).score();
60 | byte[] payload_base64 = Base64.decode_base64(payload);
61 | byte[] payload_without_op = AspScore.filter_strop(payload);
62 | double score_without_op = new AspScore(payload_without_op).score();
63 | double score_base64_decoded = new AspScore(payload_base64).score();
64 | if (score_base64_decoded > 0.8) {
65 | score_base64_decoded += 0.3;
66 | }
67 | score = score > score_base64_decoded ? score : score_base64_decoded;
68 | return score;
69 | }
70 |
71 | public static double score_key(byte[] payload) {
72 | String s = new String(payload).toLowerCase();
73 | if (sensitive_key.containsKey(s)) {
74 | return sensitive_key.get(s);
75 | }
76 | double score = 0.0;
77 | for (char c: s.toCharArray()) {
78 | if (!Character.isLetter(c)
79 | && !Character.isDigit(c)
80 | && c != '_'
81 | && c != '$') {
82 | score -= 0.5;
83 | }
84 | }
85 | return score;
86 | }
87 |
88 | public static double score(byte[] payload) {
89 | //return WebshellTokenizer.scoreTokens(new String(payload));
90 | double score_php = scorePhp(payload);
91 | double score_asp = scoreAsp(payload);
92 | return score_php > score_asp ? score_php : score_asp;
93 | }
94 |
95 | public static boolean isWebshell(String uri, String data) {
96 |
97 | List> plist = QueryString.query_string(data.getBytes());
98 |
99 | for (Pair p : plist) {
100 | byte[] key = Unquote.unquote(p.first);
101 | byte[] value = Unquote.unquote(p.second);
102 |
103 | if (score(key) >= thres_hold) {
104 | return true;
105 | }
106 | if (score_key(key) + score(value) >= thres_hold) {
107 | return true;
108 | }
109 | }
110 | /*
111 | uri = uri.substring(uri.indexOf('?') + 1);
112 |
113 | if (uri.length() > 1) {
114 | plist = QueryString.query_string(uri.getBytes());
115 | for (Pair p : plist) {
116 |
117 | byte[] key = Unquote.unquote(p.first);
118 | byte[] value = Unquote.unquote(p.second);
119 |
120 | if (score(key) >= thres_hold) {
121 | return true;
122 | }
123 | if (score(value) >= thres_hold) {
124 | return true;
125 | }
126 | }
127 | }*/
128 | return false;
129 | }
130 |
131 | public static void main(String[] args) throws Exception {
132 | // List> plist = QueryString.query_string("/44b676ed1a4a6ea7ba0918cf05093f1d/f9a1e3cd54ace2b54024e1b21a7637ab?_timestamp_=rc_time_grit_hour_one".getBytes());
133 |
134 | //String s = "ysh=execute(\"response.clear:response.write(\"\"jinlaile\"\"):response.end\")";
135 | //System.out.println(isWebshell("", s) == true);
136 |
137 | //s = "xiaoliang=Execute(\"Execute(\"\"On+Error+Resume+Next:Function+bd%28byVal+s%29%3AFor+i%3D1+To+Len%28s%29+Step+2%3Ac%3DMid%28s%2Ci%2C2%29%3AIf+IsNumeric%28Mid%28s%2Ci%2C1%29%29+Then%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26%22%22%22%22%29%22%22%22%22%29%3AElse%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26Mid%28s%2Ci%2B2%2C2%29%26%22%22%22%22%29%22%22%22%22%29%3Ai%3Di%2B2%3AEnd+If%22%22%26chr%2810%29%26%22%22Next%3AEnd+Function:Response.Write(\"\"\"\"#onewordbackdoor->|\"\"\"\"):Execute(\"\"\"\"On+Error+Resume+Next:\"\"\"\"%26bd(\"\"\"\"44696D20533A533D5365727665722E4D61707061746828222E2229266368722839293A53455420433D4372656174654F626A6563742822536372697074696E672E46696C6553797374656D4F626A656374";
138 | //System.out.println(isWebshell("", s) == true);
139 |
140 | //s = "sd=Execute++++++++++++++++++++++++++++++(\"++++++++++++++++++++++++++++++Execute++++++++++++++++++++++++++++++(\"\"++++++++++:Function+bd%28byVal+s%29%3AFor+i%3D1+To+Len%28s%29+Step+2%3Ac%3DMid%28s%2Ci%2C2%29%3AIf+IsNumeric%28Mid%28s%2Ci%2C1%29%29+Then%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26%22%22%22%22%29%22%22%22%22%29%3AElse%3AExecute%28%22%22%22%22bd%3Dbd%26chr%28%26H%22%22%22%22%26c%26Mid%28s%2Ci%2B2%2C2%29%26%22%22%22%22%29%22%22%22%22%29%3Ai%3Di%2B2%3AEnd+If%22%22%26chr%2810%29%26%22%22Next%3AEnd+Function:Response.Write(\"\"\"\"->|\"\"\"\"):++++++++";
141 | //System.out.println(isWebshell("", s) == true);
142 |
143 | //s = "cmd=%40eval%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FP%27.%27OST%27%7D%5Bz9%5D%2F%2A%CE%D2%C8%A5%C4%E3%C2%EE%C1%CB%B8%F4%B1%DA%2A%2F%01%28%24%7B%27%5FPOS%27.%27T%27%7D%5Bz0%5D%29%29%3B&z0=Nzc0MTEwO0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZnVuY3Rpb24gc2V0X3dyaXRlYWJsZSgkZmlsZV9uYW1lKXtpZihAY2htb2QoJGZpbGVfbmFtZSxiYXNlX2NvbnZlcnQoYmFzZTY0X2RlY29kZSgkX1BPU1RbIngyIl0pLDgsMTApKSl7ZWNobyAiMSI7fWVsc2V7ZWNobyAiLTEiO319c2V0X3dyaXRlYWJsZShiYXNlNjRfZGVjb2RlKCRfUE9TVFsieDEiXSkpO2VjaG8oInw8LSIpOztkaWUoKTs%3D&x1=RDovd2Vic2l0ZXMveGluc2p6LmNvbS9wdWJsaWNfaHRtbC9uZXdzLzA0MDM0MjY0Lmh0bWw%3D&x2=MDY2Ng%3D%3D&z9=BaSE64%5FdEcOdE";
144 | //System.out.println(isWebshell("", s) == true);
145 |
146 | //String s = "z0=NTYwNjQ4O0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZWNobyBAZndyaXRlKGZvcGVuKGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MSJdKSwidyIpLGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MiJdKSk%";
147 | //System.out.println(isWebshell("", s) == true);
148 | /*
149 | BufferedReader br = new BufferedReader(new InputStreamReader(
150 | new FileInputStream("/tmp/z0.post_data")));
151 | int c = 0;
152 | for (String line = br.readLine(); line != null; line = br.readLine()) {
153 | if (!isWebshell("", line)) {
154 | System.out.println(line);
155 | c += 1;
156 | if (c > 20) {
157 | break;
158 | }
159 | }
160 | }
161 | br.close();*/
162 | }
163 |
164 | }
165 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/WebshellMapper.java:
--------------------------------------------------------------------------------
1 | package chaitin.webshell;
2 | import java.io.IOException;
3 | import com.aliyun.odps.data.Record;
4 | import com.aliyun.odps.mapred.Mapper;
5 |
6 | import chaitin.utils.Gao;
7 |
8 | public class WebshellMapper implements Mapper {
9 |
10 | public double _tp, _fp, _fn;
11 |
12 | public void setup(TaskContext context) throws IOException {
13 |
14 | }
15 |
16 |
17 | public void map(long recordNum, Record record, TaskContext context) throws IOException {
18 | String id = (String) record.get(0);
19 | String uri = (String) record.get(1);
20 | String data = (String) record.get(2);
21 | Boolean result = WebshellDetector.isWebshell(uri, data);
22 | if (context != null) {
23 | if (result) {
24 | Record result_record = context.createOutputRecord();
25 | result_record.set("id", id);
26 | context.write(result_record);
27 | }
28 | } else {
29 | if (!record.get(3).toString().equals("true") && !record.get(3).toString().equals("false")) {
30 | System.out.println("err! ");
31 | System.exit(0);
32 | }
33 | if (result.toString().equals("true") && record.get(3).toString().equals("true")) {
34 | _tp += 1;
35 | }
36 | if (result.toString().equals("true") && record.get(3).toString().equals("false")) {
37 | _fp += 1;
38 | }
39 | if (result.toString().equals("false") && record.get(3).toString().equals("true")) {
40 | _fn += 1;
41 | }
42 |
43 | if (!result.toString().equals(record.get(3))) {
44 | Gao.dump(record.toArray());
45 | }
46 | }
47 | }
48 |
49 | public void cleanup(TaskContext context) throws IOException {
50 |
51 | }
52 |
53 |
54 | public static void main(String[] args) throws Exception {
55 |
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/WebshellTokenizer.java:
--------------------------------------------------------------------------------
1 | package chaitin.webshell;
2 |
3 | import java.util.HashMap;
4 | import java.util.HashSet;
5 | import java.util.Iterator;
6 | import java.util.Map;
7 | import java.util.Map.Entry;
8 |
9 | import chaitin.utils.Base64;
10 |
11 | public class WebshellTokenizer {
12 |
13 | // no _
14 | private static final String separators = "[`~!@#$%^&*()+=\\-{}\\[\\]|\\:;\"<>',.?/\\\\ \n\r\t]";
15 |
16 | private static final HashMap tokenScore = new HashMap() {
17 | private static final long serialVersionUID = 5401942617951940220L;
18 | {
19 | //put("assert", 1);
20 | //put("allowstaticmethodaccess", 1);
21 | //put("array", 1);
22 | put("array_map", 2);
23 | put("base64_decode", 2);
24 | put("catch", 1);
25 | put("chr", 1);
26 | put("create_function", 1);
27 | put("display_errors", 2);
28 | put("echo", 1);
29 | put("encoding", 2);
30 | put("eval", 2);
31 | put("exception", 2);
32 | put("execute", 1);
33 | put("exit", 1);
34 | put("frombase64string", 2);
35 | put("getencoding", 2);
36 | put("getinputstream", 2);
37 | put("getrequest", 2);
38 | put("getwriter", 2);
39 | put("gzinflate", 2);
40 | put("ini_set", 2);
41 | put("isnumeri", 2);
42 | put("md5", 1);
43 | put("phpinfo", 2);
44 | put("preg_replace", 2);
45 | put("println", 2);
46 | put("response", 1);
47 | put("servletactioncontext", 1);
48 | put("streamconnector", 1);
49 | put("system", 1);
50 | put("write", 1);
51 | put("methodaccessor", 1);
52 | put("_cookie", 1);
53 | put("_get", 1);
54 | put("_post", 1);
55 | put("_request", 1);
56 | put("_server", 1);
57 | }};
58 |
59 | private static final HashMap dangerousTokenScore = new HashMap() {
60 | private static final long serialVersionUID = 4464369449273331205L;
61 | {
62 | put("assert", 1);
63 | put("allowstaticmethodaccess", 1);
64 | put("base64_decode", 2);
65 | put("create_function", 1);
66 | put("display_errors", 2);
67 | put("eval", 2);
68 | put("frombase64string", 2);
69 | put("getencoding", 2);
70 | put("getinputstream", 2);
71 | put("getrequest", 2);
72 | put("getwriter", 2);
73 | put("gzinflate", 2);
74 | put("ini_set", 2);
75 | put("isnumeri", 2);
76 | put("preg_replace", 2);
77 | put("response", 1);
78 | put("servletactioncontext", 1);
79 | put("streamconnector", 1);
80 | //put("try", 1);
81 | put("methodaccessor", 1);
82 | put("_cookie", 2);
83 | put("_get", 2);
84 | put("_post", 2);
85 | put("_request", 2);
86 | put("_server", 2);
87 | }};
88 | public static int scoreTokens(String input) {
89 |
90 | HashSet used = new HashSet();
91 | used.clear();
92 |
93 | int score = 0;
94 |
95 | String[] tokens = input.split(separators);
96 |
97 | for (int i = 0; i < tokens.length; i++) {
98 | tokens[i] = tokens[i].toLowerCase();
99 | if (!used.contains(tokens[i]) && tokenScore.containsKey(tokens[i])) {
100 | score += (Integer)tokenScore.get(tokens[i]);
101 | used.add(tokens[i]);
102 | }
103 | }
104 |
105 | // Next is merge score.
106 | int mergeScore = 0;
107 | String mergeString = String.join("", tokens);
108 |
109 | Iterator> iter = dangerousTokenScore.entrySet().iterator();
110 | while (iter.hasNext()) {
111 | Map.Entry entry = (Map.Entry) iter.next();
112 | String key = (String)entry.getKey();
113 | int val = (Integer)entry.getValue();
114 | if (mergeString.indexOf(key) >= 0) {
115 | mergeScore += val;
116 | }
117 | }
118 | return Math.max(score, mergeScore);
119 | }
120 |
121 | public static int webShellScore(String input) {
122 | String decodedInput = new String(Base64.decode_base64(input.getBytes()));
123 | return Math.max(scoreTokens(input), scoreTokens(decodedInput));
124 | }
125 |
126 | public static void main(String[] args) {
127 |
128 | String s = "2`3~4$1!5@6#7$8%9^8&7*6(5)4+2=1-2{3}4[5]6|7:8;9\"8<7>6'5,4.3?2/1\\0 1\n2\r3\t4";
129 | String s1 = "mb=Response.Write(\"------>|\");var err:Exception;try{+++++++++++++++++++++++++eval++++++++++++++++++++(System.Text.Encoding.GetEncoding(936).GetString(System.Convert.FromBase64String(\"UmVzcG9uc2UuV3JpdGUoImhhb3JlbmdlLmNvbVFRMzE3Mjc1NzM4Iik7\")),\"unsafe\");}catch(err){Response.Write(\"ERROR:// \"+err.essage);}Response.Write(\"|<----\");Response.End();";
130 | String s2 = "wysiwyg=1&subject=2014-3-12-783070+new+balance+sneakers+e.mq.txf.jsp&message=Now+that+the+wind+fairy+emperor+say%2C+equal+opportunity+for+everyone+to.+As+long+as+you+have+fairy+crystal%2C%5Burl%3Dhttp%3A%2F%2Fnewbalancesneakers1.snack.ws%2F%5Dnew+balance+sneakers%5B%2Furl%5D%2C+you+can+switch+to+the+best+training+resources%2C+or+top+fairy.+For+a+time+the+square+the+atmosphere+again+warm+up%2C%5Burl%3Dhttp%3A%2F%2Flouisvuittoniphone5case.snack.ws%2F%5DLouis+vuitton+iPhone+5+Case%5B%2Furl%5D%2C+many+have+a+large+number+of+fairy+crystal+of+a+person+already+in+the+itch+for+a+try%2C%5Burl%3Dhttp%3A%2F%2Fgucciipadminicase.snack.ws%2F%5DGucci+iPad+Mini+Case%5B%2Furl%5D%2C+some+of+them+even+start+together%2C+ready+to+be+fairy+crystal+together+to+buy+top+class+fairy.+That+the+wind+fairy+emperor+in+response+to+the+following+is+nodded+with+satisfaction%2C+and+continued%3A+%22please+one+thousand+one+hundred+to+participate+in+the+small+area+of+the+elite+stage......%22+With+that+wind+fairy+emperor+voice+down%2C+one+thousand+one+hundred+have+the+jade+big+Luo+Xian+went+on+stage.+Although+it+is+one+thousand+one+hundred+people%2C+but+in+the+huge+platform%2C+still+appear+some+empty.+The+thistle+Hun+stood+on+the+platform+of+God+consciousness+and+sweep%2C+she+soon+went+up%2C+she+did+not+see+brother+leaves.+Visible+brother+not+jade%2C+should+leave+Hong+Yuxian+city.+Before+she+was+to+protect+his%2C+just+know+ye+silent+leave+Hong+Yuxian+city+is+already+a+few+days.+%22Now+please+Dragon+River+emperor+said+into+the+small+area+rule%2C+at+the+same+time+about+what+people+can+not+enter+the+small+area.%22+That+the+wind+fairy+emperor+finished%2C+immediately+stand+aside.+As+one+of+the+Great+Dragon+River+four%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmcB8%5Dnew+balance+tilbud%5B%2Furl%5D%2C+looks+very+ordinary%2C+or+even+a+lost+in+the+crowd+which+are+not+to+be+found+in+middle-aged+men.+He+walked+out%2C+smiled%2C+looked+very+nice.+But+here+all+know%2C+the+dragon+river+is+one+of+the+four+emperor%2C+he+be+a+slap+in+the+face+can+crush+dozens+of+sin+city%2C+and+even+a+domain+could+not+help+his+toss.+The+great+dragon+river+walk+out+just+smiled%2C+square+it+seems+everyone+felt+the+gentle+temperament+of+the+great+dragon+river.+%22Into+the+small+area+is+not+easy%2C+we+need+ten+fairy+emperor+also+open+void+gap%2C+it+can+barely+go%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmGgY%5DGucci+iPad+Mini+Case%5B%2Furl%5D%2C+so+limited.+To+be+fair%2C+there+are+people+carrying+space+world%2C+please+stand+up%2C+otherwise+it+will+confiscate+directly+lose+access+to+qualified+jade.%22+Dragon+River%2C+words+like+hammers+generally+play+in+some+carry+small+world%2C+or+carry+the+rest+space+to+cheat+people.+Although+the+Dragon+River+emperor+said+very+tactful%2C%5Burl%3Dhttp%3A%2F%2Fnewbalancetilbud.snack.ws%2F%5Dnew+balance+tilbud%5B%2Furl%5D%2C+but+everyone+knows+the+meaning+of+dragon+river.+%28second+also+sent%2C+thank+gold+teeth+2013%2C+I+1230%2C+skdavid%2C+dust+tear+a+few+friends+continuous+million+dollars+ticket%21%29+%28to+be+continued...+The+first+seven+eight+four+chapters%29+into+small+areas+of+full+text+updates%2C+TXT+download%2C+as+in+the+novel+the+great+Knight+http%3A%2F%2Fwww.xs74.com%2F+dragon+river+in+table+one+thousand+one+hundred+big+Luo+Xian+swept+past%2C+then+calmly+said%3A+%22now+please+carry+space+world+people+hand+over+world+space+of+their+own%2C%5Burl%3Dhttp%3A%2F%2Fnewbalance576sko.snack.ws%2F%5Dnew+balance+576+Sko%5B%2Furl%5D%2C+also+line+up+through+the+artifact+under+the+door......%22+Unequal+Dragon+River+emperor+will+finish%2C+seven+or+eight+big+Luo+Xian+has+active+flying+down%2C%5Burl%3Dhttp%3A%2F%2Fsnk.to%2FmE0p%5DBurberry+iPhone+5s+Case%5B%2Furl%5D%2C+will+own+small+world+to+his+martial+art+in+people+or+acquaintance.+Love+Xian+Chun+Qu%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.988good.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D288017%26fromuid%3D52970%5Dhttp%3A%2F%2Fwww.988good.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D288017%26fromuid%3D52970%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.fadianzhan.com%2Fbbs%2Fhome.php%3Fmod%3Dspace%26uid%3D22628%26do%3Dblog%26quickforward%3D1%26id%3D245050%5Dhttp%3A%2F%2Fwww.fadianzhan.com%2Fbbs%2Fhome.php%3Fmod%3Dspace%26uid%3D22628%26do%3Dblog%26quickforward%3D1%26id%3D245050%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.hongniangxiehui.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D180733%5Dhttp%3A%2F%2Fwww.hongniangxiehui.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D180733%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.cuplf.com%2Fthread-14818-1-1.html%5Dhttp%3A%2F%2Fwww.cuplf.com%2Fthread-14818-1-1.html%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fwww.xiaoxuexiao.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1749007%26fromuid%3D127389%5Dhttp%3A%2F%2Fwww.xiaoxuexiao.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1749007%26fromuid%3D127389%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Figirl.chinatimes.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D61649%26extra%3D%5Dhttp%3A%2F%2Figirl.chinatimes.com%2Fforum.php%3Fmod%3Dviewthread%26tid%3D61649%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fbbxyky.tk%2Fthread-154997-1-1.html%5Dhttp%3A%2F%2Fbbxyky.tk%2Fthread-154997-1-1.html%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fmoshanghua.66rt.com%2Fviewthread.php%3Ftid%3D10070%26extra%3D%5Dhttp%3A%2F%2Fmoshanghua.66rt.com%2Fviewthread.php%3Ftid%3D10070%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fn.800tuan.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D76418%5Dhttp%3A%2F%2Fn.800tuan.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D76418%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fbbs1.imotor.com%2Fviewthread.php%3Ftid%3D276%26extra%3D%5Dhttp%3A%2F%2Fbbs1.imotor.com%2Fviewthread.php%3Ftid%3D276%26extra%3D%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fforums.webcrow.jp%2Fforum.php%3Fmod%3Dviewthread%26tid%3D2120301%5Dhttp%3A%2F%2Fforums.webcrow.jp%2Fforum.php%3Fmod%3Dviewthread%26tid%3D2120301%5B%2Furl%5D%0D%0A++%0D%0A+++%5Burl%3Dhttp%3A%2F%2Fsuofeiya520.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1010284%5Dhttp%3A%2F%2Fsuofeiya520.com%2Fbbs%2Fforum.php%3Fmod%3Dviewthread%26tid%3D1010284%5B%2Furl%5D&save=&formhash=f50c6624&sortid=1&uploadalbum=-2&allownoticeauthor=1&addfeed=1&usesig=1&newalbum=%C7%EB%CA%E4%C8%EB%CF%E0%B2%E1%C3%FB%B3%C6&posttime=1394591880";
131 | String[] r = s2.split(separators);
132 | for (int i = 0; i< r.length; i++) {
133 | if (tokenScore.containsKey(r[i]) && (Integer)tokenScore.get(r[i]) > 0) {
134 | System.out.println("[" + r[i] + "]");
135 | }
136 | }
137 |
138 | System.out.println(1 + (Integer)tokenScore.get("eval"));
139 | try {
140 | //System.out.println(new String(Base64.getDecoder().decode("NzU3MjIyO0Bpbmlfc2V0KCJkaXNwbGF5X2Vycm9ycyIsIjAiKTtAc2V0X3RpbWVfbGltaXQoMCk7QHNldF9tYWdpY19xdW90ZXNfcnVudGltZSgwKTtlY2hvKCItPnwiKTs7ZWNobyBAZndyaXRlKGZvcGVuKGJhc2U2NF9kZWNvZGUoJF9QT1NUWyJ6MSJdKSwidyIpLGJhc2fjkdsalfjas8*&(*2NF9kZWNvZGUoJF9QT1NUWyJ6MiJdKSk/IjEiOiIwIjtlY2hvKCJ8PC0iKTs7ZGllKCk7")));
141 | } catch (Exception e) {
142 | System.out.println("error");
143 | }
144 |
145 | System.out.println("AbcDEF".toLowerCase());
146 | String ts = "";
147 | System.out.println(webShellScore(ts));
148 |
149 | String ss[] = new String[]{"abc", "123", "*(*()"};
150 | System.out.println(String.join("", ss));
151 |
152 | Iterator iter = tokenScore.entrySet().iterator();
153 | while (iter.hasNext()) {
154 | Map.Entry entry = (Map.Entry) iter.next();
155 | String key = (String)entry.getKey();
156 | int val = (Integer)entry.getValue();
157 | System.out.println(key + " " + val);
158 | }
159 |
160 | ts = "array_map(\"ass\".\"ert\",array(\"ev\".\"Al(\\\"\\\\\\$xx%3D\\\\\\\"Ba\".\"SE6\".\"4_dEc\"";
161 | System.out.println(scoreTokens(ts));
162 | }
163 | }
164 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/parser/AspScore.java:
--------------------------------------------------------------------------------
1 | package chaitin.webshell.parser;
2 |
3 | import java.util.*;
4 |
5 | import chaitin.utils.Pair;
6 |
7 |
8 | public class AspScore {
9 |
10 | byte[] data;
11 | double score;
12 | boolean black;
13 | boolean white;
14 | int count_token;
15 | int count_par;
16 | int count_bracket;
17 | int count_brace;
18 |
19 | int ps, p;
20 |
21 | Map existed_key = new HashMap();
22 |
23 | static Map> keyword = new HashMap>() {
24 | private static final long serialVersionUID = 8287097453483845897L;
25 |
26 | {
27 | put("eval", new Pair(1.2, 1));
28 | put("execute", new Pair(1.2, 1));
29 | put("request", new Pair(0.9, 1));
30 | put("response", new Pair(0.9, 1));
31 | put("exception", new Pair(0.8, 1));
32 | put("chr", new Pair(0.4, 4));
33 | put("write", new Pair(0.4, 1));
34 | put("function", new Pair(0.3, 1));
35 | put("server", new Pair(0.2, 1));
36 | put("settimeout", new Pair(0.5, 1));
37 | put("replace", new Pair(0.4, 1));
38 | put("len", new Pair(0.5, 1));
39 | //put("if", new Pair(0.2, 1));
40 | //put("else", new Pair(0.2, 1));
41 | put("on", new Pair(0.2, 1));
42 | put("error", new Pair(0.3, 1));
43 | put("resume", new Pair(0.6, 1));
44 | put("next", new Pair(0.3, 1));
45 | put("isnumeric", new Pair(0.8, 1));
46 | put("_memberaccess", new Pair(-1.0, 1));
47 | put("setaccessible", new Pair(-1.0, 1));
48 | put("getdeclaredfield", new Pair(-1.0, 1));
49 | put("allowstaticmethodaccess", new Pair(-1.0, 1));
50 | }};
51 |
52 |
53 | public AspScore(byte[] data) {
54 | this.data = data;
55 | ps = -1;
56 | p = 0;
57 | score = 0.0;
58 | }
59 |
60 | double keyword_score() {
61 | String s = new String(Arrays.copyOfRange(data, ps, p)).toLowerCase();
62 | //System.out.println(s);
63 |
64 | if (keyword.containsKey(s)) {
65 | if (existed_key.getOrDefault(s, 0) >= keyword.get(s).second) {
66 | return 0.0;
67 | }
68 | existed_key.put(s, existed_key.getOrDefault(s, 0) + 1);
69 | return keyword.get(s).first;
70 | }
71 | return 0.0;
72 | }
73 |
74 | boolean alphabet(byte b) {
75 | return Character.isLetter(b) || Character.isDigit(b) || b == '_';
76 | }
77 |
78 | public static byte[] filter_strop (byte[] input) {
79 | String s = new String(input);
80 | s = s.replace("\"", "");
81 | s = s.replace("&", "");
82 | return s.getBytes();
83 | }
84 |
85 | public double score() {
86 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) {
87 | score -= 3;
88 | }
89 | while (true) {
90 | if (p >= data.length) {
91 | break;
92 | }
93 | if (alphabet(data[p]) && ps == -1) {
94 | ps = p;
95 | } else if (ps != -1 && !alphabet(data[p])) {
96 | score += keyword_score();
97 | ps = -1;
98 | }
99 | if (data[p] < 0) {
100 | //score -= 0.1;
101 | }
102 | p += 1;
103 | }
104 | if (ps != -1) {
105 | score += keyword_score();
106 | }
107 | return score;
108 | }
109 |
110 | public static void main(String[] args) {
111 | System.out.println(new AspScore("a(b(c".getBytes()).score());
112 | System.out.println(new AspScore("eval($_GET['a']);".getBytes()).score());
113 | System.out.println(new AspScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score());
114 |
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/parser/AspTokenizer.rl:
--------------------------------------------------------------------------------
1 | // -*- coding: utf-8 -*-
2 | %%{
3 | machine php_lexer;
4 |
5 | ws = '\r' | '\n' | ' ';
6 |
7 | integer = ('+'|'-')?[0-9]+;
8 |
9 | main :=
10 | |*
11 | integer => { emit(PhpTokenType.Integer, 0.1); };
12 | ws;
13 | *|
14 | ;
15 |
16 | }%%
17 |
18 | package chaitin.webshell.parser;
19 |
20 | import java.util.*;
21 |
22 |
23 | public class PhpTokenizer {
24 | public enum PhpTokenType {
25 | Integer
26 | }
27 |
28 | public class PhpToken {
29 | public byte[] data;
30 | public PhpTokenType type;
31 | public double wight;
32 |
33 | public PhpToken(byte[] data, PhpTokenType type, double wight) {
34 | this.data = data;
35 | this.type = type;
36 | this.wight = wight;
37 | }
38 |
39 | @Override
40 | public String toString() {
41 | return new String(data) + wight;
42 | }
43 | }
44 |
45 | List tokens;
46 | byte[] data;
47 |
48 | int p, pe, eof, te, ts, cs, act;
49 |
50 | %% write data;
51 |
52 | public PhpTokenizer(byte[] data) {
53 | tokens = new LinkedList();
54 | this.data = data;
55 |
56 | p = 0;
57 | pe = data.length;
58 | eof = pe;
59 |
60 | %% write init;
61 | }
62 |
63 | public void emit(PhpTokenType type, double weight) {
64 | tokens.add(new PhpToken(Arrays.copyOfRange(data, ts, te), type, weight));
65 | }
66 |
67 |
68 | public Boolean tokenize() {
69 | %% write exec;
70 |
71 | System.out.println(tokens);
72 | return p == pe;
73 | }
74 |
75 | public static void main(String[] args) {
76 | new PhpTokenizer("123 tést = -100".getBytes()).tokenize();
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/parser/PhpScore.java:
--------------------------------------------------------------------------------
1 |
2 | // line 1 "PhpScore.rl"
3 | // -*- coding: utf-8 -*-
4 |
5 | // line 54 "PhpScore.rl"
6 |
7 |
8 | package chaitin.webshell.parser;
9 |
10 |
11 | import chaitin.utils.Base64;
12 | import java.util.Arrays;
13 | import java.util.Map;
14 | import java.util.HashMap;
15 | import java.util.Set;
16 | import java.util.HashSet;
17 |
18 |
19 |
20 | public class PhpScore {
21 |
22 | byte[] data;
23 | boolean black;
24 | boolean white;
25 | int count_token;
26 | int count_var;
27 | int count_svar;
28 | int count_cmt;
29 | int count_par;
30 | int count_bracket;
31 | int count_brace;
32 |
33 | Set existed_key = new HashSet();
34 |
35 | static Map keyword = new HashMap() {
36 | private static final long serialVersionUID = 6899997024892413801L;
37 | {
38 |
39 | put("PHP_SELF", 1.6);
40 | put("_GET", 1.2);
41 | put("_POST", 1.6);
42 | put("_COOKIE", 1.5);
43 | put("_FILE", 1.2);
44 | put("_ENV", 1.2);
45 | put("_SESSION", 1.5);
46 | put("_REQUEST", 1.5);
47 | put("_SERVER", 1.6);
48 | put("array_map", 1.4);
49 | put("assert", 1.3);
50 | put("array_slice", 1.5);
51 | put("base64_decode", 2.0);
52 | put("base_convert", 1.7);
53 | put("edoced_46esab", 2.0);
54 | put("call_user_func", 1.0);
55 | put("call_user_func_array", 1.0);
56 | put("chr", 0.4);
57 | put("create_function", 0.8);
58 | put("curl_exec", 1.4);
59 | put("curl_multi_exec", 1.4);
60 | put("dirname", 1.0);
61 | put("echo", 1.0);
62 | put("error_reporting", 1.2);
63 | put("eval", 1.0);
64 | put("exec", 1.0);
65 | put("exit", 0.5);
66 | put("file_put_contents", 1.2);
67 | put("gzdecode", 1.2);
68 | put("implode", 1.5);
69 | //put("include", 0.9);
70 | put("include_once", 1.6);
71 | put("ini_set", 1.1);
72 | put("isset", 1.1);
73 | put("movefile", 1.2);
74 | put("ob_start", 1.5);
75 | put("parse_str", 1.5);
76 | put("passthru", 1.8);
77 | put("pcntl_exec", 1.7);
78 | put("phpinfo", 1.9);
79 | put("prege_replace", 1.5);
80 | put("proc_open", 1.7);
81 | //put("require", 0.9);
82 | put("require_once", 1.6);
83 | put("session_start", 1.5);
84 | put("set_magic_quotes_runtime", 1.0);
85 | put("set_time_limit", 1.4);
86 | put("shell_exec", 1.4);
87 | put("str_rot13", 1.2);
88 | put("strrev", 1.1);
89 | put("system", 0.5);
90 |
91 |
92 | put("_memberaccess", -10.0);
93 | put("allowstaticmethodaccess", -10.0);
94 | put("alert", -1.8);
95 | put("document", -1.8);
96 | put("fromcharcode", -10.0);
97 | put("getdeclaredfield", -10.0);
98 | put("parseint", -5.0);
99 | put("println", -4.0);
100 | put("prototype", -5.0);
101 | put("setaccessible", -10.0);
102 | put("string", -1.0);
103 | put("tostring", -4.0);
104 | }};
105 |
106 |
107 | // line 108 "PhpScore.java"
108 | private static byte[] init__php_lexer_actions_0()
109 | {
110 | return new byte [] {
111 | 0, 1, 1, 1, 2, 1, 3, 1, 6, 1, 7, 1,
112 | 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1,
113 | 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
114 | 20, 1, 21, 1, 22, 1, 24, 1, 25, 1, 26, 1,
115 | 27, 1, 28, 1, 29, 1, 30, 1, 31, 1, 32, 1,
116 | 33, 1, 34, 2, 0, 23, 2, 3, 4, 2, 3, 5
117 | };
118 | }
119 |
120 | private static final byte _php_lexer_actions[] = init__php_lexer_actions_0();
121 |
122 |
123 | private static short[] init__php_lexer_key_offsets_0()
124 | {
125 | return new short [] {
126 | 0, 2, 2, 4, 6, 6, 17, 18, 19, 20, 21, 24,
127 | 26, 27, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40,
128 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
129 | 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
130 | 66, 68, 74, 75, 76, 77, 114, 116, 117, 119, 121, 123,
131 | 134, 137, 137, 144, 148, 150, 155, 161, 163, 164, 171, 178,
132 | 185
133 | };
134 | }
135 |
136 | private static final short _php_lexer_key_offsets[] = init__php_lexer_key_offsets_0();
137 |
138 |
139 | private static char[] init__php_lexer_trans_keys_0()
140 | {
141 | return new char [] {
142 | 34, 92, 10, 13, 39, 92, 97, 98, 100, 102, 105, 111,
143 | 114, 115, 117, 0, 32, 114, 114, 97, 121, 41, 0, 32,
144 | 105, 111, 110, 97, 114, 111, 108, 41, 101, 0, 32, 97,
145 | 110, 111, 117, 98, 108, 101, 108, 111, 97, 116, 110, 98,
146 | 106, 101, 99, 101, 97, 108, 116, 114, 105, 110, 103, 110,
147 | 115, 101, 42, 47, 43, 45, 48, 57, 48, 57, 65, 70,
148 | 97, 102, 104, 112, 96, 33, 34, 35, 36, 39, 40, 41,
149 | 44, 47, 48, 60, 66, 79, 91, 93, 94, 96, 98, 111,
150 | 123, 125, 0, 32, 37, 38, 42, 46, 49, 57, 58, 64,
151 | 65, 90, 95, 122, 124, 126, 0, 32, 61, 34, 92, 10,
152 | 13, 39, 92, 97, 98, 100, 102, 105, 111, 114, 115, 117,
153 | 0, 32, 42, 47, 61, 46, 69, 88, 101, 120, 48, 57,
154 | 69, 101, 48, 57, 48, 57, 46, 69, 101, 48, 57, 48,
155 | 57, 65, 70, 97, 102, 37, 63, 112, 95, 48, 57, 65,
156 | 90, 97, 122, 95, 48, 57, 65, 90, 97, 122, 95, 48,
157 | 57, 65, 90, 97, 122, 96, 0
158 | };
159 | }
160 |
161 | private static final char _php_lexer_trans_keys[] = init__php_lexer_trans_keys_0();
162 |
163 |
164 | private static byte[] init__php_lexer_single_lengths_0()
165 | {
166 | return new byte [] {
167 | 2, 0, 2, 2, 0, 9, 1, 1, 1, 1, 1, 2,
168 | 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
169 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
171 | 0, 0, 1, 1, 1, 21, 0, 1, 2, 2, 2, 9,
172 | 3, 0, 5, 2, 0, 3, 0, 2, 1, 1, 1, 1,
173 | 1
174 | };
175 | }
176 |
177 | private static final byte _php_lexer_single_lengths[] = init__php_lexer_single_lengths_0();
178 |
179 |
180 | private static byte[] init__php_lexer_range_lengths_0()
181 | {
182 | return new byte [] {
183 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
184 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
185 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
187 | 1, 3, 0, 0, 0, 8, 1, 0, 0, 0, 0, 1,
188 | 0, 0, 1, 1, 1, 1, 3, 0, 0, 3, 3, 3,
189 | 0
190 | };
191 | }
192 |
193 | private static final byte _php_lexer_range_lengths[] = init__php_lexer_range_lengths_0();
194 |
195 |
196 | private static short[] init__php_lexer_index_offsets_0()
197 | {
198 | return new short [] {
199 | 0, 3, 4, 7, 10, 11, 22, 24, 26, 28, 30, 33,
200 | 36, 38, 40, 42, 44, 46, 50, 52, 54, 56, 58, 60,
201 | 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84,
202 | 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
203 | 111, 113, 117, 119, 121, 123, 153, 155, 157, 160, 163, 166,
204 | 177, 181, 182, 189, 193, 195, 200, 204, 207, 209, 214, 219,
205 | 224
206 | };
207 | }
208 |
209 | private static final short _php_lexer_index_offsets[] = init__php_lexer_index_offsets_0();
210 |
211 |
212 | private static byte[] init__php_lexer_indicies_0()
213 | {
214 | return new byte [] {
215 | 2, 3, 1, 1, 6, 6, 5, 2, 8, 7, 7, 11,
216 | 12, 13, 14, 15, 16, 17, 18, 19, 10, 9, 20, 9,
217 | 21, 9, 22, 9, 23, 9, 24, 23, 9, 25, 26, 9,
218 | 27, 9, 28, 9, 22, 9, 29, 9, 30, 9, 24, 31,
219 | 23, 9, 32, 9, 23, 9, 33, 9, 34, 9, 35, 9,
220 | 36, 9, 23, 9, 37, 9, 38, 9, 39, 9, 23, 9,
221 | 39, 9, 40, 9, 41, 9, 42, 9, 39, 9, 43, 9,
222 | 44, 9, 23, 9, 45, 9, 46, 9, 47, 9, 48, 9,
223 | 23, 9, 49, 9, 50, 9, 39, 9, 53, 52, 54, 52,
224 | 56, 56, 55, 57, 55, 58, 58, 58, 55, 60, 59, 61,
225 | 59, 62, 0, 64, 65, 66, 67, 69, 70, 71, 68, 72,
226 | 73, 75, 77, 78, 79, 81, 68, 82, 77, 78, 83, 84,
227 | 63, 68, 64, 74, 68, 76, 76, 68, 80, 63, 85, 87,
228 | 86, 2, 3, 1, 6, 6, 5, 2, 8, 7, 11, 12,
229 | 13, 14, 15, 16, 17, 18, 19, 10, 89, 52, 5, 87,
230 | 86, 90, 92, 93, 94, 93, 94, 74, 91, 93, 93, 92,
231 | 91, 57, 91, 92, 93, 93, 74, 91, 58, 58, 58, 91,
232 | 95, 96, 86, 98, 97, 76, 76, 76, 76, 99, 76, 76,
233 | 76, 76, 99, 76, 76, 76, 76, 99, 88, 100, 0
234 | };
235 | }
236 |
237 | private static final byte _php_lexer_indicies[] = init__php_lexer_indicies_0();
238 |
239 |
240 | private static byte[] init__php_lexer_trans_targs_0()
241 | {
242 | return new byte [] {
243 | 53, 0, 53, 1, 53, 2, 53, 3, 4, 53, 5, 6,
244 | 11, 20, 25, 29, 30, 34, 37, 42, 7, 8, 9, 10,
245 | 53, 12, 15, 13, 14, 16, 17, 18, 19, 21, 22, 23,
246 | 24, 26, 27, 28, 31, 32, 33, 35, 36, 38, 39, 40,
247 | 41, 43, 44, 53, 45, 46, 61, 53, 48, 64, 66, 53,
248 | 51, 53, 53, 54, 55, 56, 57, 53, 53, 58, 59, 53,
249 | 60, 62, 65, 67, 69, 70, 71, 53, 53, 53, 72, 53,
250 | 53, 53, 53, 53, 53, 53, 53, 53, 63, 47, 49, 53,
251 | 68, 53, 50, 53, 52
252 | };
253 | }
254 |
255 | private static final byte _php_lexer_trans_targs[] = init__php_lexer_trans_targs_0();
256 |
257 |
258 | private static byte[] init__php_lexer_trans_actions_0()
259 | {
260 | return new byte [] {
261 | 59, 0, 7, 0, 61, 0, 11, 0, 0, 53, 0, 0,
262 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
263 | 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
264 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 | 0, 0, 0, 57, 0, 0, 0, 51, 0, 0, 0, 55,
266 | 0, 27, 9, 0, 0, 5, 69, 33, 31, 5, 5, 13,
267 | 66, 5, 5, 0, 0, 0, 0, 15, 35, 17, 5, 19,
268 | 21, 47, 45, 23, 49, 41, 63, 39, 5, 0, 0, 29,
269 | 5, 43, 0, 37, 0
270 | };
271 | }
272 |
273 | private static final byte _php_lexer_trans_actions[] = init__php_lexer_trans_actions_0();
274 |
275 |
276 | private static byte[] init__php_lexer_to_state_actions_0()
277 | {
278 | return new byte [] {
279 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 | 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
284 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
285 | 0
286 | };
287 | }
288 |
289 | private static final byte _php_lexer_to_state_actions[] = init__php_lexer_to_state_actions_0();
290 |
291 |
292 | private static byte[] init__php_lexer_from_state_actions_0()
293 | {
294 | return new byte [] {
295 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 | 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0,
300 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
301 | 0
302 | };
303 | }
304 |
305 | private static final byte _php_lexer_from_state_actions[] = init__php_lexer_from_state_actions_0();
306 |
307 |
308 | private static short[] init__php_lexer_eof_trans_0()
309 | {
310 | return new short [] {
311 | 1, 1, 5, 1, 1, 10, 10, 10, 10, 10, 10, 10,
312 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
313 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
314 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 52, 52, 56,
315 | 56, 56, 60, 60, 1, 0, 86, 87, 89, 89, 89, 90,
316 | 87, 91, 92, 92, 92, 92, 92, 87, 98, 100, 100, 100,
317 | 89
318 | };
319 | }
320 |
321 | private static final short _php_lexer_eof_trans[] = init__php_lexer_eof_trans_0();
322 |
323 |
324 | static final int php_lexer_start = 53;
325 | static final int php_lexer_first_final = 53;
326 | static final int php_lexer_error = -1;
327 |
328 | static final int php_lexer_en_main = 53;
329 |
330 |
331 | // line 155 "PhpScore.rl"
332 |
333 | int p, pe, eof;
334 | int te, ts, cs, act;
335 |
336 |
337 | public PhpScore(byte[] data) {
338 | this.data = data;
339 |
340 | black = false;
341 | white = false;
342 | count_cmt = 0;
343 | count_var = 0;
344 | count_svar = 0;
345 | count_par = 0;
346 | count_token = 0;
347 | count_bracket = 0;
348 | count_brace = 0;
349 |
350 | }
351 |
352 | double string_score() {
353 | byte[] sb = Arrays.copyOfRange(data, ts + 1, te - 1);
354 | String s = new String(sb);
355 | //System.out.println(s);
356 | if (keyword.containsKey(s) && !existed_key.contains(s)) {
357 | existed_key.add(s);
358 | return keyword.get(s);
359 | }
360 | if (s.length() >= 10) {
361 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb));
362 | double s1 = new PhpScore(bsb).score();
363 | double s2 = new PhpScore(sb).score();
364 | s1 = s1 > s2 ? s1 : s2;
365 | s1 = s1 > 0 ? s1 : 0;
366 | return s1;
367 | }
368 | return 0.0;
369 | }
370 |
371 | double keyword_score() {
372 | byte[] sb = Arrays.copyOfRange(data, ts, te);
373 | String s = new String(sb);
374 | //System.out.println(s);
375 | if (keyword.containsKey(s) && !existed_key.contains(s)) {
376 | existed_key.add(s);
377 | return keyword.get(s);
378 | }
379 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb));
380 | double s1 = new PhpScore(bsb).score();
381 | return s1 > 0.0 ? s1 : 0.0;
382 | }
383 |
384 |
385 | double tokenize() {
386 | p = 0;
387 | pe = data.length;
388 | eof = pe;
389 | double score = 0.0;
390 |
391 | // line 392 "PhpScore.java"
392 | {
393 | cs = php_lexer_start;
394 | ts = -1;
395 | te = -1;
396 | act = 0;
397 | }
398 |
399 | // line 214 "PhpScore.rl"
400 |
401 | // line 402 "PhpScore.java"
402 | {
403 | int _klen;
404 | int _trans = 0;
405 | int _acts;
406 | int _nacts;
407 | int _keys;
408 | int _goto_targ = 0;
409 |
410 | _goto: while (true) {
411 | switch ( _goto_targ ) {
412 | case 0:
413 | if ( p == pe ) {
414 | _goto_targ = 4;
415 | continue _goto;
416 | }
417 | case 1:
418 | _acts = _php_lexer_from_state_actions[cs];
419 | _nacts = (int) _php_lexer_actions[_acts++];
420 | while ( _nacts-- > 0 ) {
421 | switch ( _php_lexer_actions[_acts++] ) {
422 | case 2:
423 | // line 1 "NONE"
424 | {ts = p;}
425 | break;
426 | // line 427 "PhpScore.java"
427 | }
428 | }
429 |
430 | _match: do {
431 | _keys = _php_lexer_key_offsets[cs];
432 | _trans = _php_lexer_index_offsets[cs];
433 | _klen = _php_lexer_single_lengths[cs];
434 | if ( _klen > 0 ) {
435 | int _lower = _keys;
436 | int _mid;
437 | int _upper = _keys + _klen - 1;
438 | while (true) {
439 | if ( _upper < _lower )
440 | break;
441 |
442 | _mid = _lower + ((_upper-_lower) >> 1);
443 | if ( data[p] < _php_lexer_trans_keys[_mid] )
444 | _upper = _mid - 1;
445 | else if ( data[p] > _php_lexer_trans_keys[_mid] )
446 | _lower = _mid + 1;
447 | else {
448 | _trans += (_mid - _keys);
449 | break _match;
450 | }
451 | }
452 | _keys += _klen;
453 | _trans += _klen;
454 | }
455 |
456 | _klen = _php_lexer_range_lengths[cs];
457 | if ( _klen > 0 ) {
458 | int _lower = _keys;
459 | int _mid;
460 | int _upper = _keys + (_klen<<1) - 2;
461 | while (true) {
462 | if ( _upper < _lower )
463 | break;
464 |
465 | _mid = _lower + (((_upper-_lower) >> 1) & ~1);
466 | if ( data[p] < _php_lexer_trans_keys[_mid] )
467 | _upper = _mid - 2;
468 | else if ( data[p] > _php_lexer_trans_keys[_mid+1] )
469 | _lower = _mid + 2;
470 | else {
471 | _trans += ((_mid - _keys)>>1);
472 | break _match;
473 | }
474 | }
475 | _trans += _klen;
476 | }
477 | } while (false);
478 |
479 | _trans = _php_lexer_indicies[_trans];
480 | case 3:
481 | cs = _php_lexer_trans_targs[_trans];
482 |
483 | if ( _php_lexer_trans_actions[_trans] != 0 ) {
484 | _acts = _php_lexer_trans_actions[_trans];
485 | _nacts = (int) _php_lexer_actions[_acts++];
486 | while ( _nacts-- > 0 )
487 | {
488 | switch ( _php_lexer_actions[_acts++] )
489 | {
490 | case 0:
491 | // line 23 "PhpScore.rl"
492 | { count_cmt += 1; }
493 | break;
494 | case 3:
495 | // line 1 "NONE"
496 | {te = p+1;}
497 | break;
498 | case 4:
499 | // line 48 "PhpScore.rl"
500 | {act = 17;}
501 | break;
502 | case 5:
503 | // line 51 "PhpScore.rl"
504 | {act = 20;}
505 | break;
506 | case 6:
507 | // line 32 "PhpScore.rl"
508 | {te = p+1;{ score += string_score(); }}
509 | break;
510 | case 7:
511 | // line 33 "PhpScore.rl"
512 | {te = p+1;{ score += 0.5 + string_score(); }}
513 | break;
514 | case 8:
515 | // line 34 "PhpScore.rl"
516 | {te = p+1;}
517 | break;
518 | case 9:
519 | // line 36 "PhpScore.rl"
520 | {te = p+1;{ if (count_par > 0) { count_par -= 1; } else { black = true; } }}
521 | break;
522 | case 10:
523 | // line 37 "PhpScore.rl"
524 | {te = p+1;{ count_bracket += 1; }}
525 | break;
526 | case 11:
527 | // line 38 "PhpScore.rl"
528 | {te = p+1;{ if (count_bracket > 0) { count_bracket -= 1; } else { black = true; } }}
529 | break;
530 | case 12:
531 | // line 39 "PhpScore.rl"
532 | {te = p+1;{ count_brace += 1; }}
533 | break;
534 | case 13:
535 | // line 40 "PhpScore.rl"
536 | {te = p+1;{ if (count_brace > 0) { count_brace -= 1; } else { black = true; } }}
537 | break;
538 | case 14:
539 | // line 41 "PhpScore.rl"
540 | {te = p+1;{ /*score += 0.1;*/ }}
541 | break;
542 | case 15:
543 | // line 44 "PhpScore.rl"
544 | {te = p+1;{ score += 1.5; }}
545 | break;
546 | case 16:
547 | // line 45 "PhpScore.rl"
548 | {te = p+1;{ score -= 999.0; }}
549 | break;
550 | case 17:
551 | // line 47 "PhpScore.rl"
552 | {te = p+1;{ score -= 999.0; }}
553 | break;
554 | case 18:
555 | // line 48 "PhpScore.rl"
556 | {te = p+1;}
557 | break;
558 | case 19:
559 | // line 49 "PhpScore.rl"
560 | {te = p+1;{ /*score += 0.1;*/ }}
561 | break;
562 | case 20:
563 | // line 51 "PhpScore.rl"
564 | {te = p+1;{ score -= 1; }}
565 | break;
566 | case 21:
567 | // line 30 "PhpScore.rl"
568 | {te = p;p--;{ score += keyword_score(); }}
569 | break;
570 | case 22:
571 | // line 31 "PhpScore.rl"
572 | {te = p;p--;}
573 | break;
574 | case 23:
575 | // line 34 "PhpScore.rl"
576 | {te = p;p--;}
577 | break;
578 | case 24:
579 | // line 35 "PhpScore.rl"
580 | {te = p;p--;{ count_par += 1; }}
581 | break;
582 | case 25:
583 | // line 46 "PhpScore.rl"
584 | {te = p;p--;{ score -= 999.0; }}
585 | break;
586 | case 26:
587 | // line 48 "PhpScore.rl"
588 | {te = p;p--;}
589 | break;
590 | case 27:
591 | // line 50 "PhpScore.rl"
592 | {te = p;p--;}
593 | break;
594 | case 28:
595 | // line 51 "PhpScore.rl"
596 | {te = p;p--;{ score -= 1; }}
597 | break;
598 | case 29:
599 | // line 31 "PhpScore.rl"
600 | {{p = ((te))-1;}}
601 | break;
602 | case 30:
603 | // line 35 "PhpScore.rl"
604 | {{p = ((te))-1;}{ count_par += 1; }}
605 | break;
606 | case 31:
607 | // line 46 "PhpScore.rl"
608 | {{p = ((te))-1;}{ score -= 999.0; }}
609 | break;
610 | case 32:
611 | // line 48 "PhpScore.rl"
612 | {{p = ((te))-1;}}
613 | break;
614 | case 33:
615 | // line 51 "PhpScore.rl"
616 | {{p = ((te))-1;}{ score -= 1; }}
617 | break;
618 | case 34:
619 | // line 1 "NONE"
620 | { switch( act ) {
621 | case 20:
622 | {{p = ((te))-1;} score -= 1; }
623 | break;
624 | default:
625 | {{p = ((te))-1;}}
626 | break;
627 | }
628 | }
629 | break;
630 | // line 631 "PhpScore.java"
631 | }
632 | }
633 | }
634 |
635 | case 2:
636 | _acts = _php_lexer_to_state_actions[cs];
637 | _nacts = (int) _php_lexer_actions[_acts++];
638 | while ( _nacts-- > 0 ) {
639 | switch ( _php_lexer_actions[_acts++] ) {
640 | case 1:
641 | // line 1 "NONE"
642 | {ts = -1;}
643 | break;
644 | // line 645 "PhpScore.java"
645 | }
646 | }
647 |
648 | if ( ++p != pe ) {
649 | _goto_targ = 1;
650 | continue _goto;
651 | }
652 | case 4:
653 | if ( p == eof )
654 | {
655 | if ( _php_lexer_eof_trans[cs] > 0 ) {
656 | _trans = _php_lexer_eof_trans[cs] - 1;
657 | _goto_targ = 3;
658 | continue _goto;
659 | }
660 | }
661 |
662 | case 5:
663 | }
664 | break; }
665 | }
666 |
667 | // line 215 "PhpScore.rl"
668 | return score;
669 | }
670 |
671 | public double score() {
672 | double score = 0.0;
673 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) {
674 | score -= 3;
675 | }
676 |
677 | score += tokenize();
678 |
679 | score += count_cmt > 0 ? 1.0 : 0;
680 | score += count_svar * 0.5;
681 | score += count_var > 0 ? 0.6 : 0;
682 | black |= p != pe;
683 | //if (black) {
684 | // score = 0.0;
685 | //}
686 | return score;
687 | }
688 |
689 | public static void main(String[] args) {
690 | System.out.println(new PhpScore("a(b(c".getBytes()).score());
691 | System.out.println(new PhpScore("eval($_GET['a']);".getBytes()).score());
692 | System.out.println(new PhpScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score());
693 |
694 | }
695 | }
696 |
--------------------------------------------------------------------------------
/src/main/java/chaitin/webshell/parser/PhpScore.rl:
--------------------------------------------------------------------------------
1 | // -*- coding: utf-8 -*-
2 | %%{
3 | machine php_lexer;
4 |
5 | ws = '\r' | '\n' | ' ' | '\t' | (0x00 .. 0x1F);
6 | label = ('_' | alpha) ('_' | alnum)*;
7 | number = ('0'
8 | ('x'i xdigit+)
9 | | ('b'i ('0' .. '1'))
10 | | ('o'i ('0' .. '7')))
11 | | (digit+ ('.' digit*)? ('e'i ('+' | '-') digit+)?
12 | );
13 | string = (
14 | ( "'" ( ( any - '\\' - "'" ) | ( '\\' any ) )* "'" ) |
15 | ( '"' ( ( any - '\\' - '"' ) | ( '\\' any ) )* '"' )
16 | );
17 | backtick_string = '`' (any - '`') '`';
18 | comment = (
19 | (
20 | ( "//" | '#' ) ( any - '\r' - '\n')* ( '\r' | '\n' )
21 | ) |
22 | (
23 | ("/*" ( ( any - '*' ) | ( '*' ( any - '/' ) ) )* "*/") % { count_cmt += 1; }
24 | )
25 | );
26 | value_cast = '(' ws* ('int' | 'real' | 'double' | 'float' | 'string' | 'binary' | 'array' | 'object' | 'boolean' | 'bool' | 'unset') ws* ')';
27 |
28 |
29 | main := |*
30 | label => { score += keyword_score(); };
31 | number;
32 | string => { score += string_score(); };
33 | backtick_string => { score += 0.5 + string_score(); };
34 | comment;
35 | '(' => { count_par += 1; };
36 | ')' => { if (count_par > 0) { count_par -= 1; } else { black = true; } };
37 | '[' => { count_bracket += 1; };
38 | ']' => { if (count_bracket > 0) { count_bracket -= 1; } else { black = true; } };
39 | '{' => { count_brace += 1; };
40 | '}' => { if (count_brace > 0) { count_brace -= 1; } else { black = true; } };
41 | ('.=' | '+=' | '-=' | '*=' | '/=' | '!=') => { /*score += 0.1;*/ };
42 | #'$' label => { count_var += 1; };
43 | #'${' label '}' => { count_svar += 1; };
44 | value_cast => { score += 1.5; };
45 | ' { score -= 999.0; };
46 | '' => { score -= 999.0; };
47 | '<%' => { score -= 999.0; };
48 | ('=' | ',' | '%' | '+' | '-' | '*' | '/' | ';' | '?' | ':' | '!' | '.' | '&' | '|' | '^' | '~' | '<' | '>' | '@');
49 | '$' => { /*score += 0.1;*/ };
50 | ws+;
51 | any => { score -= 1; };
52 | *|;
53 |
54 | }%%
55 |
56 | package chaitin.webshell.parser;
57 |
58 |
59 | import chaitin.utils.Base64;
60 | import java.util.Arrays;
61 | import java.util.Map;
62 | import java.util.HashMap;
63 | import java.util.Set;
64 | import java.util.HashSet;
65 |
66 |
67 |
68 | public class PhpScore {
69 |
70 | byte[] data;
71 | boolean black;
72 | boolean white;
73 | int count_token;
74 | int count_var;
75 | int count_svar;
76 | int count_cmt;
77 | int count_par;
78 | int count_bracket;
79 | int count_brace;
80 |
81 | Set existed_key = new HashSet();
82 |
83 | static Map keyword = new HashMap() {
84 | private static final long serialVersionUID = 6899997024892413801L;
85 | {
86 |
87 | put("PHP_SELF", 1.6);
88 | put("_GET", 1.2);
89 | put("_POST", 1.6);
90 | put("_COOKIE", 1.5);
91 | put("_FILE", 1.2);
92 | put("_ENV", 1.2);
93 | put("_SESSION", 1.5);
94 | put("_REQUEST", 1.5);
95 | put("_SERVER", 1.6);
96 | put("array_map", 1.4);
97 | put("assert", 1.3);
98 | put("array_slice", 1.5);
99 | put("base64_decode", 2.0);
100 | put("base_convert", 1.7);
101 | put("edoced_46esab", 2.0);
102 | put("call_user_func", 1.0);
103 | put("call_user_func_array", 1.0);
104 | put("chr", 0.4);
105 | put("create_function", 0.8);
106 | put("curl_exec", 1.4);
107 | put("curl_multi_exec", 1.4);
108 | put("dirname", 1.0);
109 | put("echo", 1.0);
110 | put("error_reporting", 1.2);
111 | put("eval", 1.0);
112 | put("exec", 1.0);
113 | put("exit", 0.5);
114 | put("file_put_contents", 1.2);
115 | put("gzdecode", 1.2);
116 | put("implode", 1.5);
117 | //put("include", 0.9);
118 | put("include_once", 1.6);
119 | put("ini_set", 1.1);
120 | put("isset", 1.1);
121 | put("movefile", 1.2);
122 | put("ob_start", 1.5);
123 | put("parse_str", 1.5);
124 | put("passthru", 1.8);
125 | put("pcntl_exec", 1.7);
126 | put("phpinfo", 1.9);
127 | put("prege_replace", 1.5);
128 | put("proc_open", 1.7);
129 | //put("require", 0.9);
130 | put("require_once", 1.6);
131 | put("session_start", 1.5);
132 | put("set_magic_quotes_runtime", 1.0);
133 | put("set_time_limit", 1.4);
134 | put("shell_exec", 1.4);
135 | put("str_rot13", 1.2);
136 | put("strrev", 1.1);
137 | put("system", 0.5);
138 |
139 |
140 | put("_memberaccess", -10.0);
141 | put("allowstaticmethodaccess", -10.0);
142 | put("alert", -1.8);
143 | put("document", -1.8);
144 | put("fromcharcode", -10.0);
145 | put("getdeclaredfield", -10.0);
146 | put("parseint", -5.0);
147 | put("println", -4.0);
148 | put("prototype", -5.0);
149 | put("setaccessible", -10.0);
150 | put("string", -1.0);
151 | put("tostring", -4.0);
152 | }};
153 |
154 | %% write data;
155 |
156 | int p, pe, eof;
157 | int te, ts, cs, act;
158 |
159 |
160 | public PhpScore(byte[] data) {
161 | this.data = data;
162 |
163 | black = false;
164 | white = false;
165 | count_cmt = 0;
166 | count_var = 0;
167 | count_svar = 0;
168 | count_par = 0;
169 | count_token = 0;
170 | count_bracket = 0;
171 | count_brace = 0;
172 |
173 | }
174 |
175 | double string_score() {
176 | byte[] sb = Arrays.copyOfRange(data, ts + 1, te - 1);
177 | String s = new String(sb);
178 | //System.out.println(s);
179 | if (keyword.containsKey(s) && !existed_key.contains(s)) {
180 | existed_key.add(s);
181 | return keyword.get(s);
182 | }
183 | if (s.length() >= 10) {
184 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb));
185 | double s1 = new PhpScore(bsb).score();
186 | double s2 = new PhpScore(sb).score();
187 | s1 = s1 > s2 ? s1 : s2;
188 | s1 = s1 > 0 ? s1 : 0;
189 | return s1;
190 | }
191 | return 0.0;
192 | }
193 |
194 | double keyword_score() {
195 | byte[] sb = Arrays.copyOfRange(data, ts, te);
196 | String s = new String(sb);
197 | //System.out.println(s);
198 | if (keyword.containsKey(s) && !existed_key.contains(s)) {
199 | existed_key.add(s);
200 | return keyword.get(s);
201 | }
202 | byte[] bsb = Base64.decode_base64(Base64.longest_sub_base64(sb));
203 | double s1 = new PhpScore(bsb).score();
204 | return s1 > 0.0 ? s1 : 0.0;
205 | }
206 |
207 |
208 | double tokenize() {
209 | p = 0;
210 | pe = data.length;
211 | eof = pe;
212 | double score = 0.0;
213 | %% write init;
214 | %% write exec;
215 | return score;
216 | }
217 |
218 | public double score() {
219 | double score = 0.0;
220 | if (data.length > 0 && (data[0] == '[' || data[0] == '{')) {
221 | score -= 3;
222 | }
223 |
224 | score += tokenize();
225 |
226 | score += count_cmt > 0 ? 1.0 : 0;
227 | score += count_svar * 0.5;
228 | score += count_var > 0 ? 0.6 : 0;
229 | black |= p != pe;
230 | //if (black) {
231 | // score = 0.0;
232 | //}
233 | return score;
234 | }
235 |
236 | public static void main(String[] args) {
237 | System.out.println(new PhpScore("a(b(c".getBytes()).score());
238 | System.out.println(new PhpScore("eval($_GET['a']);".getBytes()).score());
239 | System.out.println(new PhpScore("560648;@ini_set(\"display_errors\",\"0\");@set_time_limit(0);@set_magic_quotes_runtime(0);echo(\"->|\");;echo @fwrite(fopen(base64_decode($_POST[\"z1\"]),\"w\"),base64_decode($_POST[\"z2\"]))".getBytes()).score());
240 |
241 | }
242 | }
243 |
--------------------------------------------------------------------------------
/src/main/resources/META-INF/base.mapred.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 1
5 | 1
6 | webshell
7 | workflow/myfolder
8 |
9 |
10 | chaitin.webshell.JobLauncher
11 | chaitin.webshell.MyMapper
12 | chaitin.webshell.MyReducer
13 | chaitin.webshell.MyCombiner
14 |
15 |
16 | word:string
17 | cnt:bigint
18 |
27 |
28 |
29 |
30 |
31 | wordcount_in
32 |
33 | dt={yyyymmdd}
34 | dt={yyyymmdd-1}
35 |
36 |
37 |
38 |
39 | wordcount_out
40 | dt={yyyymmdd}
41 |
42 |
43 |
--------------------------------------------------------------------------------
/src/main/resources/credential.properties:
--------------------------------------------------------------------------------
1 | base_id=1
2 | project_id=1
3 | token=xxx
--------------------------------------------------------------------------------
/src/test/java/chaitin/test/phishing/ScoreDomainTest.java:
--------------------------------------------------------------------------------
1 | package chaitin.test.phishing;
2 |
3 | public class ScoreDomainTest {
4 |
5 | }
6 |
--------------------------------------------------------------------------------