22 |
--------------------------------------------------------------------------------
/ip-locator.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.lic.iplocator
8 | ip-locator
9 | 1.0-SNAPSHOT
10 | jar
11 |
12 |
13 |
14 |
15 | src/main/resources
16 |
17 |
18 | *.*
19 |
20 |
21 |
22 |
23 |
24 |
25 | maven-compiler-plugin
26 | 2.3.2
27 |
28 | 1.6
29 | 1.6
30 | UTF-8
31 |
32 |
33 |
34 |
35 | org.apache.maven.plugins
36 | maven-assembly-plugin
37 | 2.2
38 |
39 |
40 | assemble
41 | package
42 |
43 | single
44 |
45 | false
46 |
47 |
48 | src/main/resources/dist.xml
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | com.alibaba
60 | fastjson
61 | 1.2.3
62 |
63 |
64 |
65 | org.apache.httpcomponents
66 | httpclient
67 | 4.3.4
68 |
69 |
70 |
71 | junit
72 | junit
73 | 4.11
74 |
75 |
76 |
77 | org.slf4j
78 | slf4j-api
79 | 1.7.5
80 |
81 |
82 |
83 | org.slf4j
84 | slf4j-log4j12
85 | 1.7.5
86 |
87 |
88 |
89 | commons-lang
90 | commons-lang
91 | 2.6
92 |
93 |
94 |
95 | log4j
96 | log4j
97 | 1.2.17
98 |
99 |
100 |
101 | com.googlecode.concurrentlinkedhashmap
102 | concurrentlinkedhashmap-lru
103 | 1.4
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/Crawler.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import com.alibaba.fastjson.JSONObject;
5 | import org.lic.ip.util.HttpClientPool;
6 | import org.lic.ip.util.IPUtil;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.io.BufferedReader;
11 | import java.io.File;
12 | import java.io.FileReader;
13 | import java.io.IOException;
14 | import java.util.HashMap;
15 | import java.util.LinkedList;
16 | import java.util.Map;
17 |
18 | /**
19 | * Created by lc on 15/1/8.
20 | */
21 | public class Crawler {
22 | private static final Logger logger = LoggerFactory.getLogger(Crawler.class);
23 |
24 | private static final String TAOBAO_URL = "http://ip.taobao.com/service/getIpInfo.php";
25 |
26 | private LimitRate limitRate = new LimitRate(1000L, 10);
27 |
28 | private static String cn_delegated = "delegated-apnic-test";
29 |
30 | private static String CN_OUT_ORIGINAL = "delegated-cn-original";
31 |
32 | private static String FN_OUT_ORIGINAL = "delegated-fn-original";
33 |
34 | private static String CN_OUT_MERGED = "delegated-cn-merged";
35 |
36 | private static String FN_OUT_MERGED = "delegated-fn-merged";
37 |
38 | private static String IN_PATH = "input/";
39 |
40 | private static String OUT_PATH = "output/";
41 |
42 | private static String[] all_delegated = {"delegated-afrinic-latest","delegated-apnic-latest",
43 | "delegated-arin-latest", "delegated-lacnic-latest", "delegated-ripencc-latest"};
44 |
45 | private static Map countryCode = new HashMap();
46 |
47 | private static Map> dict = new HashMap>();
48 |
49 | private static LinkedList availableIPs = new LinkedList();
50 |
51 | public IPv4RadixTree scanFNIP() {
52 | IPv4RadixTree fnTree = new IPv4RadixTree();
53 | BufferedReader reader = null;
54 | try {
55 | // load country code
56 | reader = new BufferedReader(
57 | new FileReader(new File(IN_PATH + "country_code")));
58 | String line;
59 | while ((line = reader.readLine()) != null) {
60 | String[] sp = line.split(" ");
61 | countryCode.put(sp[0].trim(), sp[1].trim());
62 | }
63 | reader.close();
64 |
65 | for (String file : all_delegated) {
66 | reader = new BufferedReader(
67 | new FileReader(new File(IN_PATH + file)));
68 | while ((line = reader.readLine()) != null) {
69 | String[] params = line.split("\\|");
70 |
71 | if (params.length >= 4
72 | && params[2].equals("ipv4")
73 | && !params[3].equals("*")
74 | && !params[1].equals("CN")) {
75 | long startIP = IPUtil.ipString2Long(params[3]);
76 | long endIP = startIP + Integer.parseInt(params[4]);
77 | logger.info(startIP + " " + endIP + " " + Integer
78 | .parseInt(params[4]));
79 | IPRange ipRange = new IPRange(startIP, endIP);
80 | ipRange.prefixlen = IPUtil.getSmallestMasklen(Integer.parseInt(params[4]));
81 | if (params[1].equals("")) {
82 | availableIPs.addAll(IPUtil.iprangeToCidrs(ipRange));
83 | } else {
84 | if (dict.containsKey(params[1])) {
85 | LinkedList lst = dict.get(params[1]);
86 | lst.addAll(IPUtil.iprangeToCidrs(ipRange));
87 | dict.put(params[1], lst);
88 | } else {
89 | dict.put(params[1], IPUtil.iprangeToCidrs(ipRange));
90 | }
91 | }
92 | }
93 | }
94 | }
95 | System.out.println("finish read");
96 |
97 | for (String key : dict.keySet()) {
98 | IPv4Network net = dict.get(key).getLast();
99 | String randomIP = IPUtil
100 | .getRandomIp(net.getCIDR().split("/")[0], net.getMasklen());
101 | for (IPv4Network network : dict.get(key)) {
102 | IpData ipData = new IpData();
103 | ipData.setNetwork(network.getCIDR());
104 | ipData.setCountry(countryCode.get(key));
105 | ipData.setProvince("");
106 | ipData.setCity("");
107 | ipData.setIsp("");
108 | ipData.setIp(randomIP);
109 | ipData.setIpAmount(IPUtil.getAmount(network.getCIDR()));
110 | fnTree.put(network.getCIDR(), ipData);
111 | }
112 | }
113 |
114 | for (IPv4Network network : availableIPs) {
115 | String randomIP = IPUtil.getRandomIp(network.getCIDR().split("/")[0],network.getMasklen());
116 | IpData ipData = null;
117 | System.out.println("query ip " + network.getCIDR());
118 | if (ipData == null) {
119 | while (ipData == null) {
120 | try {
121 | ipData = queryFromTaobao(randomIP);
122 | } catch (Exception e) {
123 | logger.error(
124 | "queryFromTaobao exception: " + e
125 | .getMessage(), e);
126 | }
127 | }
128 | }
129 | ipData.setIpAmount(IPUtil.getAmount(network.getCIDR()));
130 | ipData.setNetwork(network.getCIDR());
131 | fnTree.put(network.getCIDR(), ipData);
132 | }
133 |
134 | } catch (Exception e) {
135 | logger.error(e.getMessage(), e);
136 | e.printStackTrace();
137 | } finally {
138 | if (reader != null) {
139 | try {
140 | reader.close();
141 | } catch (IOException e) {
142 | e.printStackTrace();
143 | }
144 | }
145 | try {
146 | fnTree.writeRawToFile(OUT_PATH + FN_OUT_ORIGINAL);
147 | fnTree.merge();
148 | fnTree.writeRawToFile(OUT_PATH + FN_OUT_MERGED);
149 | } catch (IOException e) {
150 | logger.error(e.getMessage(), e);
151 | }
152 | }
153 | return fnTree;
154 | }
155 |
156 | public IPv4RadixTree scanCNIP() {
157 | IPv4RadixTree cnTree = new IPv4RadixTree();
158 | BufferedReader reader = null;
159 | try {
160 | reader = new BufferedReader(
161 | new FileReader(new File(IN_PATH + cn_delegated)));
162 | String line;
163 | while ((line = reader.readLine()) != null) {
164 | String[] params = line.split("\\|");
165 | if (params.length >= 4 && params[1].equals("CN")
166 | && params[2].equals("ipv4") && !params[3].equals("*")) {
167 |
168 | String baseIP = params[3];
169 | int masklen = 32 - (int) (log(Integer.parseInt(params[4]), 2));
170 | String prefix = baseIP + "/" + masklen;
171 | if (masklen > 24) masklen = 24;
172 | IPv4Network networks = new IPv4Network(prefix);
173 | for (String subnet : networks.getSubnet(24)) {
174 | String startIP = subnet.split("/")[0];
175 | String subMasklen = subnet.split("/")[1];
176 | String randomIP = IPUtil.getRandomIp(startIP, Integer.parseInt(subMasklen));
177 | IpData ipData = null;
178 | while (ipData == null) {
179 | try {
180 | ipData = queryFromTaobao(randomIP);
181 | } catch (Exception e) {
182 | logger.error(
183 | "queryFromTaobao exception: " + e
184 | .getMessage(), e);
185 | }
186 | }
187 | int amount = 1<<(32 - masklen);
188 | ipData.setIpAmount(amount);
189 | ipData.setNetwork(subnet);
190 | logger.info(ipData.toFileString());
191 | cnTree.put(subnet, ipData);
192 | }
193 | }
194 | }
195 | } catch (Exception e) {
196 | logger.error(e.getMessage(), e);
197 | } finally {
198 | if (reader != null) {
199 | try {
200 | reader.close();
201 | } catch (IOException e) {
202 | e.printStackTrace();
203 | }
204 | }
205 | try {
206 | cnTree.writeRawToFile(OUT_PATH + CN_OUT_ORIGINAL);
207 | cnTree.merge();
208 | cnTree.writeRawToFile(OUT_PATH + CN_OUT_MERGED);
209 | } catch (IOException e) {
210 | logger.error(e.getMessage(), e);
211 | }
212 | }
213 | return cnTree;
214 | }
215 |
216 | private IpData queryFromTaobao(String ip) throws Exception {
217 | limitRate.check();
218 | String ret = HttpClientPool.getInstance().getMethod(TAOBAO_URL + "?ip=" + ip, 5000);
219 | if (ret == null) {
220 | return null;
221 | } else {
222 | JSONObject json = JSON.parseObject(ret);
223 | if (json.getInteger("code") == 0) {
224 | JSONObject dataJson = json.getJSONObject("data");
225 | IpData ipData = new IpData();
226 | ipData.setCountry(dataJson.getString("country"));
227 | ipData.setProvince(dataJson.getString("region"));
228 | ipData.setCity(dataJson.getString("city"));
229 | ipData.setIsp(dataJson.getString("isp"));
230 | ipData.setIp(ip);
231 | return ipData;
232 | } else {
233 | return null;
234 | }
235 | }
236 | }
237 |
238 | public static double log(double value, double base) {
239 | return Math.log(value) / Math.log(base);
240 | }
241 |
242 | public static void main(String[] args) throws Exception {
243 | if (args.length > 1) {
244 | IN_PATH = args[0];
245 | OUT_PATH = args[1];
246 | Crawler crawler = new Crawler();
247 | crawler.scanCNIP();
248 | crawler.scanFNIP();
249 | IPv4RadixTree retTree = new IPv4RadixTree();
250 | retTree.loadFromLocalFile(OUT_PATH + FN_OUT_MERGED);
251 | retTree.loadFromLocalFile(OUT_PATH + CN_OUT_MERGED);
252 | retTree.merge();
253 | retTree.writeRawToFile(OUT_PATH + "ipdb.dat");
254 | logger.info("finish");
255 | } else {
256 | System.out.println("miss param, abandon !!!");
257 | System.exit(1);
258 | }
259 | }
260 | }
261 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/IPRange.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | /**
4 | * Created by lc on 15/1/14.
5 | */
6 | public class IPRange implements Comparable {
7 |
8 | public IPRange(long start, long end) {
9 | this.start = start;
10 | this.end = end;
11 | }
12 |
13 | public IPRange(long start, long end, String cidr) {
14 | this.start = start;
15 | this.end = end;
16 | this.cidr = cidr;
17 | }
18 |
19 | public long start;
20 |
21 | public long end;
22 |
23 | public String cidr;
24 |
25 | public int prefixlen;
26 |
27 | @Override public int compareTo(IPRange ipRange) {
28 | long ret = start - ipRange.start;
29 | if (ret != 0)
30 | return (int)ret;
31 | else
32 | return (int) (ipRange.end - end);
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/IPv4Network.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | import org.lic.ip.util.IPUtil;
4 |
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | /**
9 | * Created by lc on 15/1/9.
10 | */
11 | public class IPv4Network {
12 | long baseIPnumeric; // 起始ip
13 |
14 | int netmaskNumeric; // 掩码 netmask
15 |
16 | int numericCIDR; // cidr
17 |
18 | /**
19 | * i.e. IPv4Network("1.1.1.0/24");
20 | *
21 | * @param IPinCIDRFormat
22 | */
23 | public IPv4Network(String IPinCIDRFormat) throws NumberFormatException {
24 |
25 | String[] st = IPinCIDRFormat.split("/");
26 | if (st.length != 2) {
27 | throw new NumberFormatException("Invalid CIDR format '"
28 | + IPinCIDRFormat + "', should be: xx.xx.xx.xx/xx");
29 | }
30 | String symbolicIP = st[0];
31 | String symbolicCIDR = st[1];
32 |
33 | Integer numericCIDR = new Integer(symbolicCIDR);
34 | if (numericCIDR > 32) {
35 | throw new NumberFormatException("CIDR can not be greater than 32: " + IPinCIDRFormat);
36 | }
37 |
38 | st = symbolicIP.split("\\.");
39 | if (st.length != 4) {
40 | throw new NumberFormatException("Invalid IP address: " + IPinCIDRFormat);
41 | }
42 |
43 | int i = 24;
44 | baseIPnumeric = 0;
45 | for (int n = 0; n < st.length; n++) {
46 | int value = Integer.parseInt(st[n]);
47 | if (value != (value & 0xff)) {
48 | throw new NumberFormatException("Invalid IP address: " + IPinCIDRFormat);
49 | }
50 | baseIPnumeric += value << i;
51 | i -= 8;
52 | }
53 |
54 | /* netmask from CIDR */
55 | if (numericCIDR < 8) {
56 | throw new NumberFormatException("Netmask CIDR can not be less than 8: " + IPinCIDRFormat);
57 | }
58 | netmaskNumeric = 0xffffffff;
59 | netmaskNumeric = netmaskNumeric << (32 - numericCIDR);
60 | this.numericCIDR = numericCIDR;
61 | }
62 |
63 | /**
64 | * i.e. IPv4Network(16843008, 24);
65 | *
66 | * @param ip
67 | * @param prefixlen
68 | * @throws NumberFormatException
69 | */
70 | public IPv4Network(long ip, int prefixlen) throws NumberFormatException {
71 |
72 | if (prefixlen > 32 || prefixlen < 8) {
73 | throw new NumberFormatException("CIDR can not be >32 or <8 " + prefixlen);
74 | }
75 |
76 | baseIPnumeric = ip;
77 | netmaskNumeric = 0xffffffff;
78 | netmaskNumeric = netmaskNumeric << (32 - prefixlen);
79 | this.numericCIDR = prefixlen;
80 | }
81 |
82 | /**
83 | * 起始ip i.e. xxx.xxx.xxx.xxx
84 | *
85 | * @return
86 | */
87 | public String getStartIP() {
88 | return IPUtil.ipLong2String(baseIPnumeric);
89 | }
90 |
91 | /**
92 | * int型ip转为string型
93 | *
94 | * @param ip
95 | * @return
96 | */
97 | private String convertNumericIpToSymbolic(Integer ip) {
98 | StringBuffer sb = new StringBuffer(15);
99 | for (int shift = 24; shift > 0; shift -= 8) {
100 | sb.append(Integer.toString((ip >>> shift) & 0xff));
101 | sb.append('.');
102 | }
103 | sb.append(Integer.toString(ip & 0xff));
104 | return sb.toString();
105 | }
106 |
107 | /**
108 | * 获取子网掩码 i.e. 255.255.255.0
109 | *
110 | * @return
111 | */
112 | public String getNetmask() {
113 | StringBuffer sb = new StringBuffer(15);
114 | for (int shift = 24; shift > 0; shift -= 8) {
115 | sb.append(Long.toString((netmaskNumeric >>> shift) & 0xff));
116 | sb.append('.');
117 | }
118 | sb.append(Long.toString(netmaskNumeric & 0xff));
119 | return sb.toString();
120 | }
121 |
122 | /**
123 | * 包含CIDR的IP i.e. 1.1.1.0/24
124 | *
125 | * @return
126 | */
127 | public String getCIDR() {
128 | int i;
129 | for (i = 0; i < 32; i++) {
130 | if ((netmaskNumeric << i) == 0)
131 | break;
132 | }
133 | return IPUtil.ipLong2String(baseIPnumeric & netmaskNumeric) + "/"
134 | + i;
135 | }
136 |
137 | // CIDR数值
138 | public int getMasklen() {
139 | return this.numericCIDR;
140 | }
141 |
142 | /**
143 | * 有效IPs
144 | *
145 | * @return
146 | */
147 | public List getAvailableIPs(Integer numberofIPs) {
148 | ArrayList result = new ArrayList();
149 | int numberOfBits;
150 | for (numberOfBits = 0; numberOfBits < 32; numberOfBits++) {
151 | if ((netmaskNumeric << numberOfBits) == 0)
152 | break;
153 | }
154 |
155 | Integer numberOfIPs = 0;
156 | for (int n = 0; n < (32 - numberOfBits); n++) {
157 | numberOfIPs = numberOfIPs << 1;
158 | numberOfIPs = numberOfIPs | 0x01;
159 | }
160 |
161 | Long baseIP = baseIPnumeric & netmaskNumeric;
162 | for (int i = 1; i < (numberOfIPs) && i < numberofIPs; i++) {
163 | Long ourIP = baseIP + i;
164 | String ip = IPUtil.ipLong2String(ourIP);
165 | result.add(ip);
166 | }
167 | return result;
168 | }
169 |
170 | /**
171 | * IP范围 i.e. 1.1.1.1 - 1.1.1.255
172 | *
173 | * @return
174 | */
175 | public String getHostAddressRange() {
176 |
177 | int numberOfBits;
178 | for (numberOfBits = 0; numberOfBits < 32; numberOfBits++) {
179 | if ((netmaskNumeric << numberOfBits) == 0)
180 | break;
181 | }
182 | Integer numberOfIPs = 0;
183 | for (int n = 0; n < (32 - numberOfBits); n++) {
184 | numberOfIPs = numberOfIPs << 1;
185 | numberOfIPs = numberOfIPs | 0x01;
186 | }
187 |
188 | Long baseIP = baseIPnumeric & netmaskNumeric;
189 | String firstIP = IPUtil.ipLong2String(baseIP + 1);
190 | String lastIP = IPUtil.ipLong2String(baseIP + numberOfIPs - 1);
191 | return firstIP + " - " + lastIP;
192 | }
193 |
194 | /**
195 | * ip范围
196 | *
197 | * @return
198 | */
199 | public IPRange getIPRange() {
200 | long endIP = baseIPnumeric + (1<<(32 - numericCIDR));
201 | return new IPRange(baseIPnumeric, endIP, getCIDR());
202 | }
203 |
204 | public List getSubnet(int masklen) {
205 | if (masklen > 32 || masklen < 8 || masklen < numericCIDR) {
206 | throw new NumberFormatException("masklen can not be greater than 32");
207 | }
208 | int numberOfIPs = 1 << (32 - masklen);
209 | Long startIP = baseIPnumeric & netmaskNumeric;
210 | List list = new ArrayList();
211 | for (int i=0; i 0; shift -= 8) {
253 |
254 | // process 3 bytes, from high order byte down.
255 | sb.append(Long.toString((wildcardMask >>> shift) & 0xff));
256 |
257 | sb.append('.');
258 | }
259 | sb.append(Long.toString(wildcardMask & 0xff));
260 |
261 | return sb.toString();
262 |
263 | }
264 |
265 | public String getBroadcastAddress() {
266 |
267 | if (netmaskNumeric == 0xffffffff)
268 | return "0.0.0.0";
269 |
270 | int numberOfBits;
271 | for (numberOfBits = 0; numberOfBits < 32; numberOfBits++) {
272 |
273 | if ((netmaskNumeric << numberOfBits) == 0)
274 | break;
275 |
276 | }
277 | Integer numberOfIPs = 0;
278 | for (int n = 0; n < (32 - numberOfBits); n++) {
279 |
280 | numberOfIPs = numberOfIPs << 1;
281 | numberOfIPs = numberOfIPs | 0x01;
282 | }
283 |
284 | Long baseIP = baseIPnumeric & netmaskNumeric;
285 | Long ourIP = baseIP + numberOfIPs;
286 |
287 | String ip = IPUtil.ipLong2String(ourIP);
288 |
289 | return ip;
290 | }
291 |
292 | private String getBinary(int number) {
293 | String result = "";
294 |
295 | Integer ourMaskBitPattern = 1;
296 | for (int i = 1; i <= 32; i++) {
297 |
298 | if ((number & ourMaskBitPattern) != 0) {
299 |
300 | result = "1" + result; // the bit is 1
301 | } else { // the bit is 0
302 |
303 | result = "0" + result;
304 | }
305 | if ((i % 8) == 0 && i != 0 && i != 32)
306 |
307 | result = "." + result;
308 | ourMaskBitPattern = ourMaskBitPattern << 1;
309 |
310 | }
311 | return result;
312 | }
313 |
314 | public String getNetmaskInBinary() {
315 |
316 | return getBinary(netmaskNumeric);
317 | }
318 |
319 | /**
320 | * Checks if the given IP address contains in subnet
321 | *
322 | * @param IPaddress
323 | * @return
324 | */
325 | public boolean contains(String IPaddress) {
326 |
327 | Integer checkingIP = 0;
328 | String[] st = IPaddress.split("\\.");
329 |
330 | if (st.length != 4)
331 | throw new NumberFormatException("Invalid IP address: " + IPaddress);
332 |
333 | int i = 24;
334 | for (int n = 0; n < st.length; n++) {
335 |
336 | int value = Integer.parseInt(st[n]);
337 |
338 | if (value != (value & 0xff)) {
339 |
340 | throw new NumberFormatException("Invalid IP address: "
341 | + IPaddress);
342 | }
343 |
344 | checkingIP += value << i;
345 | i -= 8;
346 | }
347 |
348 | if ((baseIPnumeric & netmaskNumeric) == (checkingIP & netmaskNumeric))
349 |
350 | return true;
351 | else
352 | return false;
353 | }
354 |
355 | public boolean contains(IPv4Network child) {
356 |
357 | Long subnetID = child.baseIPnumeric;
358 |
359 | int subnetMask = child.netmaskNumeric;
360 |
361 | if ((subnetID & this.netmaskNumeric) == (this.baseIPnumeric & this.netmaskNumeric)) {
362 |
363 | if ((this.netmaskNumeric < subnetMask) == true
364 | && this.baseIPnumeric <= subnetID) {
365 |
366 | return true;
367 | }
368 |
369 | }
370 | return false;
371 |
372 | }
373 |
374 | /**
375 | * @param args
376 | */
377 | public static void main(String[] args) {
378 | IPv4Network ipv4 = new IPv4Network("192.72.40.0/21");
379 | System.out.println(ipv4.getCIDR());
380 | System.out.println(ipv4.getNetmask());
381 | System.out.println(ipv4.getNumberOfHosts());
382 | System.out.println(ipv4.getWildcardMask());
383 | System.out.println(ipv4.getBroadcastAddress());
384 | System.out.println(ipv4.getHostAddressRange());
385 | System.out.println(ipv4.getSubnet(24));
386 | }
387 | }
388 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/IPv4RadixTree.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 |
4 | import org.lic.ip.util.IPUtil;
5 |
6 | import java.io.*;
7 | import java.net.InetAddress;
8 | import java.net.UnknownHostException;
9 | import java.nio.ByteBuffer;
10 | import java.util.*;
11 |
12 | /**
13 | * Created by lc on 15/1/8.
14 | */
15 | public class IPv4RadixTree {
16 | /**
17 | * Special value that designates that there are no value stored in the key so far.
18 | * One can't use store value in a tree.
19 | */
20 | public static final IpData NO_VALUE = null;
21 |
22 | private static final int NULL_PTR = -1;
23 | private static final int ROOT_PTR = 0;
24 |
25 | private static final long MAX_IPV4_BIT = 0x80000000L;
26 |
27 | private int[] rights;
28 | private int[] lefts;
29 | private IpData[] values;
30 |
31 | private int allocatedSize;
32 | private int size;
33 |
34 | /**
35 | * Initializes IPv4 radix tree with default capacity of 1024 nodes. It should
36 | * be sufficient for small databases.
37 | */
38 | public IPv4RadixTree() {
39 | init(1024);
40 | }
41 |
42 | /**
43 | * Initializes IPv4 radix tree with a given capacity.
44 | * @param allocatedSize initial capacity to allocate
45 | */
46 | public IPv4RadixTree(int allocatedSize) {
47 | init(allocatedSize);
48 | }
49 |
50 | private void init(int allocatedSize) {
51 | this.allocatedSize = allocatedSize;
52 |
53 | rights = new int[this.allocatedSize];
54 | lefts = new int[this.allocatedSize];
55 | values = new IpData[this.allocatedSize];
56 |
57 | size = 1;
58 | lefts[0] = NULL_PTR;
59 | rights[0] = NULL_PTR;
60 | values[0] = NO_VALUE;
61 | }
62 |
63 | /**
64 | * Puts a key-value pair in a tree.
65 | * @param key IPv4 network prefix
66 | * @param mask IPv4 netmask in networked byte order format (for example,
67 | * 0xffffff00L = 4294967040L corresponds to 255.255.255.0 AKA /24 network
68 | * bitmask)
69 | * @param value an arbitrary value that would be stored under a given key
70 | */
71 | public void put(long key, long mask, IpData value) {
72 | long bit = 0x80000000L; // 128.0.0.0
73 | int node = ROOT_PTR;
74 | int next = ROOT_PTR;
75 |
76 | while ((bit & mask) != 0) {
77 | next = ((key & bit) != 0) ? rights[node] : lefts[node];
78 | if (next == NULL_PTR)
79 | break;
80 | bit >>= 1;
81 | node = next;
82 | }
83 |
84 | if (next != NULL_PTR) {
85 | values[node] = value;
86 | return;
87 | }
88 |
89 | while ((bit & mask) != 0) {
90 | if (size == allocatedSize)
91 | expandAllocatedSize();
92 | next = size;
93 | values[next] = NO_VALUE;
94 | rights[next] = NULL_PTR;
95 | lefts[next] = NULL_PTR;
96 | if ((key & bit) != 0) {
97 | rights[node] = next;
98 | } else {
99 | lefts[node] = next;
100 | }
101 | bit >>= 1;
102 | node = next;
103 | size++;
104 | }
105 | values[node] = value;
106 | }
107 |
108 | private void expandAllocatedSize() {
109 | int oldSize = allocatedSize;
110 | allocatedSize = allocatedSize * 2;
111 |
112 | int[] newLefts = new int[allocatedSize];
113 | System.arraycopy(lefts, 0, newLefts, 0, oldSize);
114 | lefts = newLefts;
115 |
116 | int[] newRights = new int[allocatedSize];
117 | System.arraycopy(rights, 0, newRights, 0, oldSize);
118 | rights = newRights;
119 |
120 | IpData[] newValues = new IpData[allocatedSize];
121 | System.arraycopy(values, 0, newValues, 0, oldSize);
122 | values = newValues;
123 | }
124 |
125 | /**
126 | * Selects a value for a given IPv4 address, traversing tree and choosing
127 | * most specific value available for a given address.
128 | * @param key IPv4 address to look up
129 | * @return value at most specific IPv4 network in a tree for a given IPv4
130 | * address
131 | */
132 | public IpData selectValue(long key) {
133 | long bit = MAX_IPV4_BIT;
134 | IpData value = NO_VALUE;
135 | int node = ROOT_PTR;
136 |
137 | while (node != NULL_PTR) {
138 | if (values[node] != NO_VALUE)
139 | value = values[node];
140 | node = ((key & bit) != 0) ? rights[node] : lefts[node];
141 | bit >>= 1;
142 | }
143 |
144 | return value;
145 | }
146 |
147 | /**
148 | * Puts a key-value pair in a tree, using a string representation of IPv4 prefix.
149 | * @param ipNet IPv4 network as a string in form of "a.b.c.d/e", where a, b, c, d
150 | * are IPv4 octets (in decimal) and "e" is a netmask in CIDR notation
151 | * @param value an arbitrary value that would be stored under a given key
152 | * @throws java.net.UnknownHostException
153 | */
154 | public void put(String ipNet, IpData value) throws UnknownHostException {
155 | int pos = ipNet.indexOf('/');
156 | String ipStr = ipNet.substring(0, pos);
157 | long ip = inet_aton(ipStr);
158 |
159 | String netmaskStr = ipNet.substring(pos + 1);
160 | int cidr = Integer.parseInt(netmaskStr);
161 | long netmask = ((1L << (32 - cidr)) - 1L) ^ 0xffffffffL;
162 |
163 | put(ip, netmask, value);
164 | }
165 |
166 | /**
167 | * Selects a value for a given IPv4 address, traversing tree and choosing
168 | * most specific value available for a given address.
169 | * @param ipStr IPv4 address to look up, in string form (i.e. "a.b.c.d")
170 | * @return value at most specific IPv4 network in a tree for a given IPv4
171 | * address
172 | * @throws java.net.UnknownHostException
173 | */
174 | public IpData selectValue(String ipStr) throws UnknownHostException {
175 | return selectValue(inet_aton(ipStr));
176 | }
177 |
178 | /**
179 | * Helper function that reads IPv4 radix tree from a local file in tab-separated format:
180 | * (IPv4 net => value)
181 | * @param filename name of a local file to read
182 | * @return a fully constructed IPv4 radix tree from that file
183 | * @throws java.io.IOException
184 | */
185 | public void loadFromLocalFile(String filename) throws IOException {
186 | IPv4RadixTree tr = new IPv4RadixTree(countLinesInLocalFile(filename));
187 | BufferedReader br = new BufferedReader(new FileReader(filename));
188 | String l;
189 | IpData value;
190 | //["country", "province", "city", "isp", "ip", "ip_amount"]
191 | while ((l = br.readLine()) != null) {
192 | String[] c = l.split(";");
193 | //1.0.0.0/24;澳大利亚;;;;223.255.255.111;256
194 | //1.0.1.0/24;中国;福建省;福州市;电信;1.0.1.53;256
195 | value = new IpData();
196 | value.setNetwork(c[0]);
197 | value.setCountry(c[1]);
198 | value.setProvince(c[2]);
199 | value.setCity(c[3]);
200 | value.setIsp(c[4]);
201 | value.setIp(c[5]);
202 | value.setIpAmount(Integer.parseInt(c[6]));
203 |
204 | put(c[0], value);
205 | }
206 | }
207 |
208 | private static long inet_aton(String ipStr) throws UnknownHostException {
209 | ByteBuffer bb = ByteBuffer.allocate(8);
210 | bb.putInt(0);
211 | bb.put(InetAddress.getByName(ipStr).getAddress());
212 | bb.rewind();
213 | return bb.getLong();
214 | }
215 |
216 | private static int countLinesInLocalFile(String filename) throws IOException {
217 | BufferedReader br = new BufferedReader(new FileReader(filename));
218 | int n = 0;
219 | String l;
220 | while ((l = br.readLine()) != null) {
221 | n++;
222 | }
223 | return n;
224 | }
225 |
226 | /**
227 | * Returns a size of tree in number of nodes (not number of prefixes stored).
228 | * @return a number of nodes in current tree
229 | */
230 | public int size() { return size; }
231 |
232 | public void writeRawToFile(String filename) throws IOException {
233 |
234 | OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(new File(filename)), "UTF-8");
235 | TreeSet valuesTree = new TreeSet();
236 | for (IpData ipData : values) {
237 | if (ipData != null) {
238 | valuesTree.add(ipData);
239 | }
240 | }
241 | for (IpData ipData : valuesTree) {
242 | if (ipData != null) {
243 | writer.write(ipData.toFileString() + "\n");
244 | System.out.println(ipData.toFileString());
245 | }
246 | }
247 | writer.close();
248 | }
249 |
250 | public void merge() throws UnknownHostException {
251 | Deque mergedDeque = new ArrayDeque();
252 | Deque tmpDeque = new ArrayDeque();
253 | TreeSet valuesTree = new TreeSet();
254 | for (IpData ipData : values) {
255 | if (ipData != null) {
256 | valuesTree.add(ipData);
257 | }
258 | }
259 | for (IpData ipData : valuesTree) {
260 | if (ipData == null) continue;
261 | ipData.setIpAmount(IPUtil.getAmount(ipData.getNetwork()));
262 | if (!tmpDeque.isEmpty()) {
263 | IpData pdata = tmpDeque.peekLast();
264 | if (ipData.equals(pdata) || (!ipData.getCountry().equals("中国") && ipData
265 | .getCountry().equals(pdata.getCountry()))) {
266 | // 相同
267 | tmpDeque.addLast(ipData);
268 | } else {
269 | // 不同,合并tmpDeque,写入mergedDeque
270 | IpData first = tmpDeque.peekFirst().copy();
271 | List tmpCidrs = new ArrayList();
272 | for (IpData d : tmpDeque) {
273 | tmpCidrs.add(d.getNetwork());
274 | }
275 | List mergedCidrs = IPUtil.mergeCidrs(tmpCidrs);
276 | for (String cidr : mergedCidrs) {
277 | IpData d = first.copy();
278 | d.setNetwork(cidr);
279 | d.setIpAmount(IPUtil.getAmount(cidr));
280 | mergedDeque.addLast(d);
281 | }
282 | tmpDeque.clear();
283 | tmpDeque.add(ipData);
284 | }
285 | } else {
286 | tmpDeque.addLast(ipData);
287 | }
288 | }
289 | if (tmpDeque.size() > 0) {
290 | IpData first = tmpDeque.peekFirst().copy();
291 | List tmpCidrs = new ArrayList();
292 | for (IpData d : tmpDeque) {
293 | tmpCidrs.add(d.getNetwork());
294 | }
295 | List mergedCidrs = IPUtil.mergeCidrs(tmpCidrs);
296 | for (String cidr : mergedCidrs) {
297 | IpData d = first.copy();
298 | d.setNetwork(cidr);
299 | d.setIpAmount(IPUtil.getAmount(cidr));
300 | mergedDeque.addLast(d);
301 | }
302 | tmpDeque.clear();
303 | }
304 |
305 | init(1024);
306 | for (IpData ipData : mergedDeque) {
307 | put(ipData.getNetwork(), ipData);
308 | }
309 | }
310 |
311 | public static void main(String[] args) throws IOException {
312 | IPv4RadixTree tree = new IPv4RadixTree();
313 | tree.loadFromLocalFile("/Users/lc/github/ipdb_creator/output/delegated-fn-original");
314 | tree.merge();
315 | tree.writeRawToFile("/Users/lc/github/ipdb_creator/output/delegated-fn-merged");
316 | }
317 | }
318 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/IpData.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | import org.lic.ip.util.IPUtil;
4 |
5 | /**
6 | * Created by lc on 15/1/9.
7 | */
8 | public class IpData implements Comparable {
9 | private String network;
10 |
11 | private String country;
12 |
13 | private String province;
14 |
15 | private String city;
16 |
17 | private String isp;
18 |
19 | private String ip;
20 |
21 | private int ipAmount;
22 |
23 | public String getCountry() {
24 | return country;
25 | }
26 |
27 | public void setCountry(String country) {
28 | this.country = country;
29 | }
30 |
31 | public String getProvince() {
32 | return province;
33 | }
34 |
35 | public void setProvince(String province) {
36 | this.province = province;
37 | }
38 |
39 | public String getCity() {
40 | return city;
41 | }
42 |
43 | public void setCity(String city) {
44 | this.city = city;
45 | }
46 |
47 | public String getIsp() {
48 | return isp;
49 | }
50 |
51 | public void setIsp(String isp) {
52 | this.isp = isp;
53 | }
54 |
55 | public String getIp() {
56 | return ip;
57 | }
58 |
59 | public void setIp(String ip) {
60 | this.ip = ip;
61 | }
62 |
63 | public int getIpAmount() {
64 | return ipAmount;
65 | }
66 |
67 | public void setIpAmount(int ipAmount) {
68 | this.ipAmount = ipAmount;
69 | }
70 |
71 | public String getNetwork() {
72 | return network;
73 | }
74 |
75 | public void setNetwork(String network) {
76 | this.network = network;
77 | }
78 |
79 | public String toFileString() {
80 | //["country", "province", "city", "isp", "ip", "ip_amount"]
81 | return new StringBuilder(network).append(";")
82 | .append(country).append(";")
83 | .append(province).append(";")
84 | .append(city).append(";")
85 | .append(isp).append(";")
86 | .append(ip).append(";")
87 | .append(ipAmount).toString();
88 | }
89 |
90 | public IpData copy() {
91 | IpData d = new IpData();
92 | d.setNetwork(network);
93 | d.setCountry(country);
94 | d.setCity(city);
95 | d.setProvince(province);
96 | d.setIsp(isp);
97 | d.setIp(ip);
98 | d.setIpAmount(ipAmount);
99 | return d;
100 | }
101 |
102 | @Override public String toString() {
103 | return "Data{" +
104 | "network='" + network + '\'' +
105 | ", country='" + country + '\'' +
106 | ", province='" + province + '\'' +
107 | ", city='" + city + '\'' +
108 | ", isp='" + isp + '\'' +
109 | ", ip='" + ip + '\'' +
110 | ", ipAmount=" + ipAmount +
111 | '}';
112 | }
113 |
114 | @Override
115 | public boolean equals(Object o) {
116 | if (this == o) {
117 | return true;
118 | }
119 | if (o == null || getClass() != o.getClass()) {
120 | return false;
121 | }
122 |
123 | IpData ipData = (IpData) o;
124 |
125 | if (city != null ? !city.equals(ipData.city) : ipData.city != null) {
126 | return false;
127 | }
128 | if (country != null ?
129 | !country.equals(ipData.country) :
130 | ipData.country != null) {
131 | return false;
132 | }
133 | if (isp != null ? !isp.equals(ipData.isp) : ipData.isp != null) {
134 | return false;
135 | }
136 | if (province != null ?
137 | !province.equals(ipData.province) :
138 | ipData.province != null) {
139 | return false;
140 | }
141 |
142 | return true;
143 | }
144 |
145 | @Override
146 | public int hashCode() {
147 | int result = country != null ? country.hashCode() : 0;
148 | result = 31 * result + (province != null ? province.hashCode() : 0);
149 | result = 31 * result + (city != null ? city.hashCode() : 0);
150 | result = 31 * result + (isp != null ? isp.hashCode() : 0);
151 | return result;
152 | }
153 |
154 | @Override public int compareTo(IpData ipData) {
155 | long startIP = IPUtil.ipString2Long(network.split("/")[0]);
156 | long cIP = IPUtil.ipString2Long(ipData.network.split("/")[0]);
157 | if (startIP - cIP > 0) return 1;
158 | else if (startIP - cIP < 0) return -1;
159 | else return 0;
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/LimitQueue.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | import java.util.Collection;
4 | import java.util.Iterator;
5 | import java.util.LinkedList;
6 | import java.util.Queue;
7 |
8 | /**
9 | * Created by lc on 15/1/9.
10 | */
11 | public class LimitQueue implements Queue{
12 | //队列长度
13 | private int limit;
14 |
15 | private Queue queue = new LinkedList();
16 |
17 | public LimitQueue(int limit){
18 | this.limit = limit;
19 | }
20 |
21 | @Override
22 | public boolean offer(E e){
23 | if(queue.size() >= limit){
24 | //如果超出长度,入队时,先出队
25 | queue.poll();
26 | }
27 | return queue.offer(e);
28 | }
29 |
30 | @Override
31 | public E poll() {
32 | return queue.poll();
33 | }
34 |
35 | /**
36 | * 获取限制大小
37 | * @return
38 | */
39 | public int getLimit(){
40 | return limit;
41 | }
42 |
43 | @Override
44 | public boolean add(E e) {
45 | return queue.add(e);
46 | }
47 |
48 | @Override
49 | public E element() {
50 | return queue.element();
51 | }
52 |
53 | @Override
54 | public E peek() {
55 | return queue.peek();
56 | }
57 |
58 | @Override
59 | public boolean isEmpty() {
60 | return queue.size() == 0 ? true : false;
61 | }
62 |
63 | @Override
64 | public int size() {
65 | return queue.size();
66 | }
67 |
68 | @Override
69 | public E remove() {
70 | return queue.remove();
71 | }
72 |
73 | @Override
74 | public boolean addAll(Collection extends E> c) {
75 | return queue.addAll(c);
76 | }
77 |
78 | @Override
79 | public void clear() {
80 | queue.clear();
81 | }
82 |
83 | @Override
84 | public boolean contains(Object o) {
85 | return queue.contains(o);
86 | }
87 |
88 | @Override
89 | public boolean containsAll(Collection> c) {
90 | return queue.containsAll(c);
91 | }
92 |
93 | @Override
94 | public Iterator iterator() {
95 | return queue.iterator();
96 | }
97 |
98 | @Override
99 | public boolean remove(Object o) {
100 | return queue.remove(o);
101 | }
102 |
103 | @Override
104 | public boolean removeAll(Collection> c) {
105 | return queue.removeAll(c);
106 | }
107 |
108 | @Override
109 | public boolean retainAll(Collection> c) {
110 | return queue.retainAll(c);
111 | }
112 |
113 | @Override
114 | public Object[] toArray() {
115 | return queue.toArray();
116 | }
117 |
118 | @Override
119 | public T[] toArray(T[] a) {
120 | return queue.toArray(a);
121 | }
122 |
123 | }
124 |
125 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/crawler/LimitRate.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.crawler;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 |
6 | import java.util.Queue;
7 |
8 | /**
9 | * Created by lc on 15/1/9.
10 | */
11 | public class LimitRate {
12 | private static final Logger logger = LoggerFactory.getLogger(LimitRate.class);
13 |
14 | private LimitQueue queue;
15 |
16 | private long duration;
17 |
18 | private int limit;
19 |
20 | public LimitRate(long duration, int limit) {
21 | queue = new LimitQueue(limit);
22 | this.duration = duration;
23 | this.limit = limit;
24 | }
25 |
26 | public void check() throws InterruptedException {
27 | if (queue.size() < limit)
28 | return;
29 | Long first = queue.peek();
30 | if (first == null)
31 | return;
32 | long now = System.currentTimeMillis();
33 | if (now - first <= duration) {
34 | logger.info("limit rate checked, sleep a while");
35 | Thread.sleep(duration - now + first + 1);
36 | }
37 | queue.offer(now);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/iplocator/IPLocation.java:
--------------------------------------------------------------------------------
1 | package org.lic.ip.iplocator;
2 |
3 | /**
4 | * 用来封装ip相关信息,目前只有两个字段,ip所在的国家和地区
5 | *
6 | * @author lc
7 | */
8 | public class IPLocation {
9 |
10 | public static final String UNKNOWN_COUNTRY = "unknown_country";
11 |
12 | public static final String UNKNOWN_AREA = "unknown_area";
13 |
14 | public String country;
15 |
16 | public String area;
17 |
18 | public static IPLocation getNullInstance() {
19 | IPLocation ipl = new IPLocation();
20 | ipl.country = UNKNOWN_COUNTRY;
21 | ipl.area = UNKNOWN_AREA;
22 | return ipl;
23 | }
24 |
25 | @Override
26 | public String toString() {
27 | return "IPLocation [country=" + country + ", area=" + area + "]";
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/iplocator/IPv4RadixIntTree.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2012 Openstat
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.lic.ip.iplocator;
18 |
19 | import org.apache.commons.lang.time.StopWatch;
20 | import org.slf4j.Logger;
21 | import org.slf4j.LoggerFactory;
22 |
23 | import java.io.*;
24 | import java.net.InetAddress;
25 | import java.net.UnknownHostException;
26 | import java.nio.ByteBuffer;
27 |
28 | /**
29 | * A minimalistic, memory size-savvy and fairly fast radix tree (AKA Patricia
30 | * trie) implementation that uses IPv4 addresses with netmasks as keys and
31 | * 32-bit signed integers as values. This tree is generally uses in read-only
32 | * manner: there are no key removal operation and the whole thing works best in
33 | * pre-allocated fashion.
34 | */
35 |
36 | /**
37 | * https://github.com/openstat/ip-radix-tree
38 | */
39 | public class IPv4RadixIntTree {
40 | private static final Logger logger = LoggerFactory
41 | .getLogger(IPv4RadixIntTree.class);
42 |
43 | /**
44 | * Special value that designates that there are no value stored in the key
45 | * so far. One can't use store value in a tree.
46 | */
47 | public static final IPLocation NO_VALUE = IPLocation.getNullInstance();
48 |
49 | private static final int NULL_PTR = -1;
50 |
51 | private static final int ROOT_PTR = 0;
52 |
53 | private static final long MAX_IPV4_BIT = 0x80000000L;
54 |
55 | private int[] rights;
56 |
57 | private int[] lefts;
58 |
59 | private IPLocation[] values;
60 |
61 | private int allocatedSize;
62 |
63 | private int size;
64 |
65 | private static class SingletonHolder {
66 | public static final IPv4RadixIntTree instance = new IPv4RadixIntTree();
67 | }
68 |
69 | public static IPv4RadixIntTree getInstance() {
70 | return SingletonHolder.instance;
71 | }
72 |
73 | private IPv4RadixIntTree() {
74 | StopWatch sw = new StopWatch();
75 | sw.start();
76 |
77 | try {
78 | String filepath = getClass().getClassLoader()
79 | .getResource("ipdb_all_2015-01-19").getPath();
80 |
81 | int lines = countLinesInLocalFile(filepath);
82 | logger.info("file lines: {}", lines);
83 |
84 | init(lines);
85 | loadFromLocalFile(filepath);
86 | } catch (Exception e) {
87 | logger.error(e.getMessage(), e);
88 | }
89 |
90 | sw.stop();
91 | logger.info("init cost: {}ms", sw.getTime());
92 | }
93 |
94 | private void init(int allocatedSize) {
95 | this.allocatedSize = allocatedSize;
96 |
97 | rights = new int[this.allocatedSize];
98 | lefts = new int[this.allocatedSize];
99 | values = new IPLocation[this.allocatedSize];
100 |
101 | size = 1;
102 | lefts[0] = NULL_PTR;
103 | rights[0] = NULL_PTR;
104 | values[0] = NO_VALUE;
105 | }
106 |
107 | private int countLinesInLocalFile(String filepath) throws IOException {
108 | BufferedReader br = new BufferedReader(new FileReader(filepath));
109 | int n = 0;
110 | while (br.readLine() != null) {
111 | n++;
112 | }
113 | br.close();
114 | return n;
115 | }
116 |
117 | /**
118 | * Helper function that reads IPv4 radix tree from a local file in
119 | * tab-separated format: (IPv4 net => value)
120 | *
121 | * @param filepath
122 | * name of a local file to read
123 | * @return a fully constructed IPv4 radix tree from that file
124 | * @throws java.io.IOException
125 | */
126 | private void loadFromLocalFile(String filepath) throws IOException {
127 | BufferedReader br = new BufferedReader(new InputStreamReader(
128 | new FileInputStream(filepath), "UTF-8"));
129 | String l;
130 | IPLocation value;
131 |
132 | // 1.0.1.0/24;中国;福建省;福州市;电信;1.0.1.208;256
133 | while ((l = br.readLine()) != null) {
134 | String[] c = l.split(";");
135 |
136 | // value = String.format("%s %s %s %s", c[1], c[2], c[3], c[4]);
137 | value = new IPLocation();
138 | value.country = c[1].equals("中国") ? c[2] : c[1]; // 如果是国内ip,country字段放省名
139 | value.area = c[4]; // 运营商名
140 |
141 | put(c[0], value);
142 | }
143 |
144 | br.close();
145 | logger.info("load ok, tree size: {}", size());
146 | }
147 |
148 | public void prefixMerge() {
149 |
150 | }
151 |
152 | /**
153 | * Puts a key-value pair in a tree, using a string representation of IPv4
154 | * prefix.
155 | *
156 | * @param ipNet
157 | * IPv4 network as a string in form of "a.b.c.d/e", where a, b,
158 | * c, d are IPv4 octets (in decimal) and "e" is a netmask in CIDR
159 | * notation
160 | * @param value
161 | * an arbitrary value that would be stored under a given key
162 | * @throws java.net.UnknownHostException
163 | */
164 | private void put(String ipNet, IPLocation value)
165 | throws UnknownHostException {
166 | int pos = ipNet.indexOf('/');
167 | String ipStr = ipNet.substring(0, pos);
168 | long ip = inet_aton(ipStr);
169 |
170 | String netmaskStr = ipNet.substring(pos + 1);
171 | int cidr = Integer.parseInt(netmaskStr);
172 | long netmask = ((1L << (32 - cidr)) - 1L) ^ 0xffffffffL;
173 |
174 | put(ip, netmask, value);
175 | }
176 |
177 | /**
178 | * Puts a key-value pair in a tree.
179 | *
180 | * @param key
181 | * IPv4 network prefix
182 | * @param mask
183 | * IPv4 netmask in networked byte order format (for example,
184 | * 0xffffff00L = 4294967040L corresponds to 255.255.255.0 AKA /24
185 | * network bitmask)
186 | * @param value
187 | * an arbitrary value that would be stored under a given key
188 | */
189 | private void put(long key, long mask, IPLocation value) {
190 | long bit = MAX_IPV4_BIT;
191 | int node = ROOT_PTR;
192 | int next = ROOT_PTR;
193 |
194 | while ((bit & mask) != 0) {
195 | next = ((key & bit) != 0) ? rights[node] : lefts[node];
196 | if (next == NULL_PTR)
197 | break;
198 | bit >>= 1;
199 | node = next;
200 | }
201 |
202 | if (next != NULL_PTR) {
203 | // if (node.value != NO_VALUE) {
204 | // throw new IllegalArgumentException();
205 | // }
206 |
207 | values[node] = value;
208 | return;
209 | }
210 |
211 | while ((bit & mask) != 0) {
212 | if (size == allocatedSize)
213 | expandAllocatedSize();
214 |
215 | next = size;
216 | values[next] = NO_VALUE;
217 | rights[next] = NULL_PTR;
218 | lefts[next] = NULL_PTR;
219 |
220 | if ((key & bit) != 0) {
221 | rights[node] = next;
222 | } else {
223 | lefts[node] = next;
224 | }
225 |
226 | bit >>= 1;
227 | node = next;
228 | size++;
229 | }
230 |
231 | values[node] = value;
232 | }
233 |
234 | private void expandAllocatedSize() {
235 | int oldSize = allocatedSize;
236 | allocatedSize = allocatedSize * 2;
237 | logger.info("expandAllocatedSize: {} -> {}", oldSize, allocatedSize);
238 |
239 | int[] newLefts = new int[allocatedSize];
240 | System.arraycopy(lefts, 0, newLefts, 0, oldSize);
241 | lefts = newLefts;
242 |
243 | int[] newRights = new int[allocatedSize];
244 | System.arraycopy(rights, 0, newRights, 0, oldSize);
245 | rights = newRights;
246 |
247 | IPLocation[] newValues = new IPLocation[allocatedSize];
248 | System.arraycopy(values, 0, newValues, 0, oldSize);
249 | values = newValues;
250 | }
251 |
252 | /**
253 | * Selects a value for a given IPv4 address, traversing tree and choosing
254 | * most specific value available for a given address.
255 | *
256 | * @param ipStr
257 | * IPv4 address to look up, in string form (i.e. "a.b.c.d")
258 | * @return value at most specific IPv4 network in a tree for a given IPv4
259 | * address
260 | * @throws java.net.UnknownHostException
261 | */
262 | public IPLocation get(String ipStr) {
263 | return get(inet_aton(ipStr));
264 | }
265 |
266 | /**
267 | * Selects a value for a given IPv4 address, traversing tree and choosing
268 | * most specific value available for a given address.
269 | *
270 | * @param key
271 | * IPv4 address to look up
272 | * @return value at most specific IPv4 network in a tree for a given IPv4
273 | * address
274 | */
275 | public IPLocation get(long key) {
276 | long bit = MAX_IPV4_BIT;
277 | IPLocation value = NO_VALUE;
278 | int node = ROOT_PTR;
279 |
280 | while (node != NULL_PTR) {
281 | if (values[node] != NO_VALUE)
282 | value = values[node];
283 | node = ((key & bit) != 0) ? rights[node] : lefts[node];
284 | bit >>= 1;
285 | }
286 |
287 | return value;
288 | }
289 |
290 | private static long inet_aton(String ipStr) {
291 | try {
292 | ByteBuffer bb = ByteBuffer.allocate(8);
293 | bb.putInt(0);
294 | bb.put(InetAddress.getByName(ipStr).getAddress());
295 | bb.rewind();
296 | return bb.getLong();
297 | } catch (UnknownHostException e) {
298 | logger.error(e.getMessage(), e);
299 | }
300 | return 0;
301 | }
302 |
303 | /**
304 | * Returns a size of tree in number of nodes (not number of prefixes
305 | * stored).
306 | *
307 | * @return a number of nodes in current tree
308 | */
309 | public int size() {
310 | return size;
311 | }
312 |
313 | public static void main(String[] args) throws Exception {
314 | final IPv4RadixIntTree ipTree = IPv4RadixIntTree.getInstance();
315 | //
316 | // final String ipArray[] = { "123.58.181.1", "115.236.97.158",
317 | // "182.140.134.24", "115.236.153.148", "114.113.197.131",
318 | // "115.236.153.148", "123.58.181.1", "115.236.153.148",
319 | // "123.58.181.58", "127.0.0.1" };
320 | //
321 | // for (int i = 0; i < 1; i++) {
322 | // new Thread() {
323 | // @Override
324 | // public void run() {
325 | // // while (true) {
326 | // try {
327 | // for (String ip: ipArray) {
328 | // IPLocation ipl = ipTree.get(ip);
329 | // System.out.println(String.format("%s [%s %s]", ip,
330 | // ipl.country, ipl.area));
331 | // }
332 | //
333 | // Thread.sleep(10000);
334 | // } catch (Exception e) {
335 | // e.printStackTrace();
336 | // }
337 | // // }
338 | // }
339 | // }.start();
340 | // }
341 | // int cidr = 16;
342 | // long netmask = ((1L << (32 - cidr)) - 1L) ^ 0xffffffffL;
343 | // System.out.println(Long.toHexString(netmask));
344 | }
345 |
346 | }
347 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/ipseeker/IPEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * LumaQQ - Java QQ Client
3 | *
4 | * Copyright (C) 2004 luma
5 | *
6 | * This program is free software; you can redistribute it and/or modify
7 | * it under the terms of the GNU General Public License as published by
8 | * the Free Software Foundation; either version 2 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * This program is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU General Public License
17 | * along with this program; if not, write to the Free Software
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 | */
20 | package org.lic.ip.ipseeker;
21 |
22 | /**
23 | *
24 | * 一条IP范围记录,不仅包括国家和区域,也包括起始IP和结束IP
25 | *
26 | *
27 | * @author luma
28 | */
29 | public class IPEntry {
30 | public String beginIp;
31 |
32 | public String endIp;
33 |
34 | public String country;
35 |
36 | public String area;
37 |
38 | /**
39 | * 构造函数
40 | */
41 | public IPEntry() {
42 | beginIp = endIp = country = area = "";
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/org/lic/ip/ipseeker/IPSeeker.java:
--------------------------------------------------------------------------------
1 | /*
2 | * LumaQQ - Java QQ Client
3 | *
4 | * Copyright (C) 2004 luma
5 | *
6 | * This program is free software; you can redistribute it and/or modify
7 | * it under the terms of the GNU General Public License as published by
8 | * the Free Software Foundation; either version 2 of the License, or
9 | * (at your option) any later version.
10 | *
11 | * This program is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU General Public License for more details.
15 | *
16 | * You should have received a copy of the GNU General Public License
17 | * along with this program; if not, write to the Free Software
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 | */
20 |
21 | /*
22 | * 去掉了每次都需要直接访问磁盘文件的查询代码,只允许访问内存映射文件
23 | * modified by liyalong 2012.07.31
24 | */
25 | package org.lic.ip.ipseeker;
26 |
27 | import java.io.IOException;
28 | import java.io.RandomAccessFile;
29 | import java.io.UnsupportedEncodingException;
30 | import java.nio.ByteOrder;
31 | import java.nio.MappedByteBuffer;
32 | import java.nio.channels.FileChannel;
33 | import java.util.ArrayList;
34 | import java.util.List;
35 | import java.util.Map;
36 | import java.util.Random;
37 | import java.util.StringTokenizer;
38 |
39 |
40 | import org.apache.commons.lang.time.StopWatch;
41 | import org.apache.log4j.Logger;
42 |
43 | import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
44 | import org.lic.ip.iplocator.IPLocation;
45 |
46 | /**
47 | *