├── .gitignore ├── .travis.yml ├── README.md ├── data └── whitelist.pac ├── gh-pages └── index.html ├── main.py ├── requirements.txt ├── test.html └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | .venv 25 | 26 | # Installer logs 27 | pip-log.txt 28 | pip-delete-this-directory.txt 29 | 30 | # Unit test / coverage reports 31 | htmlcov/ 32 | .tox/ 33 | .coverage 34 | .cache 35 | nosetests.xml 36 | coverage.xml 37 | 38 | # Translations 39 | *.mo 40 | 41 | # Mr Developer 42 | .mr.developer.cfg 43 | .project 44 | .pydevproject 45 | 46 | # Rope 47 | .ropeproject 48 | 49 | # Django stuff: 50 | *.log 51 | *.pot 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # Temporary file 57 | *.swp 58 | *.tmp 59 | *.bak 60 | /.idea 61 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.6" 5 | 6 | install: 7 | - pip install -r requirements.txt 8 | 9 | script: 10 | - python main.py --output gh-pages/whitelist.pac 11 | 12 | deploy: 13 | provider: pages 14 | skip_cleanup: true 15 | github_token: $GITHUB_TOKEN 16 | local_dir: gh-pages 17 | on: 18 | branch: master 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GFW White List 2 | [![Build Status](https://travis-ci.org/R0uter/gfw_domain_whitelist.svg?branch=master)](https://travis-ci.org/R0uter/gfw_domain_whitelist) 3 | 4 | This PAC file uses a white list, which contains websites that can be directly accessed. If some domains are not included, they will access through the proxy. 5 | 6 | If you use this PAC file, you may need a proxy which not bill with the flow. 7 | 8 | This white list comes from [felixonmars dnsmasq-china-list](https://github.com/felixonmars/dnsmasq-china-list) 9 | 10 | This project location : [https://github.com/R0uter/gfw_domain_whitelist](https://github.com/R0uter/gfw_domain_whitelist) 11 | 12 | More information 13 | ------- 14 | Please go to [WIKI](https://github.com/R0uter/gfw_whitelist/wiki) 15 | 16 | ## How to use 17 | 18 | **Switch to the `gh-pages` branch to [download the latest pac file](https://R0uter.github.io/gfw_domain_whitelist/)!** 19 | 20 | Download the [`whitelist.pac`](https://R0uter.github.io/gfw_domain_whitelist/), and edit the server IP and the proxy type. After that, change your browser's config, and point to `whitelist.pac`. 21 | 22 | var proxy = new Array( "SOCKS5 127.0.0.1:1080; SOCKS 127.0.0.1:1080;", 23 | Change the type of proxy, it also can be 'HTTPS' 24 | Make sure to change both SOCKS5 and SOCKS 25 | 26 | 27 | ### Use script to generate the PAC file 28 | 29 | Execute command `python3 main.py`, `whitelist.pac` will be updated. 30 | 31 | 32 | ### Load-Balance 33 | 34 | You can change the `okToLoadBalance` value to `true` to use the load balance feature. When you edit `whitelist.pac`, you will find three proxy configs in there. Only the first config will become effective if you leave `okToLoadBalance` maintain `false`, but if you want to use load balance, you need to edit all of the proxy row as well. 35 | 36 | "SOCKS5 127.0.0.1:1083; SOCKS 127.0.0.1:1083;", 37 | Different port or IP, and do not lose the comma! 38 | 39 | 40 | As you see, `proxy` is an array. You can add at most ten proxies to load balance! Though three is good enough. 41 | 42 | There is one more thing you should know that load-balancing is domain-based load balance, so it would not accelerate video or download something. Do not use this feature if your proxies are not speed the same. 43 | 44 | 45 | PAC performance (100,000 repeats) 46 | ---------------- 47 | Firefox 48 | whitelist.pac 50ms 49 | load balance: whitelist.pac 40ms 50 | 51 | Chrome 52 | whitelist.pac 70ms 53 | load balance: whitelist.pac 68ms 54 | 55 | Safari 56 | whitelist.pac 50ms 57 | load balance: whitelist.pac 44ms 58 | 59 | Based on 60 | ------------ 61 | [breakwa11 gfw_whitelist](https://github.com/breakwa11/gfw_whitelist) 62 | [clowwindy gfwlist2pac](https://github.com/clowwindy/gfwlist2pac) 63 | [felixonmars dnsmasq-china-list](https://github.com/felixonmars/dnsmasq-china-list) 64 | 65 | ## MIT License (MIT) 66 | 67 | The MIT License (MIT) 68 | 69 | Copyright (c) 2016 R0uter 70 | 71 | Permission is hereby granted, free of charge, to any person obtaining a copy 72 | of this software and associated documentation files (the "Software"), to deal 73 | in the Software without restriction, including without limitation the rights 74 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 75 | copies of the Software, and to permit persons to whom the Software is 76 | furnished to do so, subject to the following conditions: 77 | 78 | The above copyright notice and this permission notice shall be included in all 79 | copies or substantial portions of the Software. 80 | 81 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 82 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 83 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 84 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 85 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 86 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 87 | SOFTWARE. 88 | -------------------------------------------------------------------------------- /data/whitelist.pac: -------------------------------------------------------------------------------- 1 | // if false, use proxy[0] by default, 2 | // edit function loadBalance to change it! 3 | var okToLoadBalance = false; 4 | 5 | var proxy = [ 6 | // add more proxies to load-balance! 7 | __PROXY__, 8 | "SOCKS5 127.0.0.1:1081; SOCKS 127.0.0.1:1081", 9 | "SOCKS5 127.0.0.1:1082; SOCKS 127.0.0.1:1082", 10 | "SOCKS5 127.0.0.1:1083; SOCKS 127.0.0.1:1083" 11 | ]; 12 | 13 | var direct = "DIRECT"; 14 | 15 | /* 16 | * Copyright (C) 2015 - 2017 R0uter 17 | * https://github.com/R0uter/gfw_domain_whitelist 18 | */ 19 | 20 | var white_domains = __DOMAINS__; 21 | 22 | // ip list must in order for matching 23 | var subnetIp4RangeList = [ 24 | 0, 1, // 0.0.0.0/32 25 | 167772160, 184549376, // 10.0.0.0/8 26 | 2130706432, 2130706688, // 127.0.0.0/24 27 | 2886729728, 2887778304, // 172.16.0.0/12 28 | 3232235520, 3232301056 // 192.168.0.0/16 29 | ]; 30 | 31 | var subnetIp6RangeList = [ 32 | [0x0, 0x0, 0x0, 0x0], [0x0, 0x0, 0x0, 0x2], // ::/127 33 | [0xfe800000, 0x0, 0x0, 0x0], [0xfe800000, 0x1, 0x0, 0x0], // fe80::/64 34 | [0xfec00000, 0x0, 0x0, 0x0], [0xfec00000, 0x10000, 0x0, 0x0], // fec0::/48 35 | ]; 36 | 37 | var hasOwnProperty = Object.hasOwnProperty; 38 | 39 | function check_ipv4(host) { 40 | var re_ipv4 = /^\d+\.\d+\.\d+\.\d+$/; 41 | return re_ipv4.test(host); 42 | } 43 | 44 | function convertIp4Address(strIp) { 45 | var bytes = strIp.split('.'); 46 | var result = (bytes[0] << 24) | 47 | (bytes[1] << 16) | 48 | (bytes[2] << 8) | 49 | (bytes[3]); 50 | // javascript simulates the bit operation of 32-bit signed int 51 | // so "1 << 31" is a negative number, use ">>>" to fix it 52 | return result >>> 0; 53 | } 54 | 55 | function isInIp4RangeList(ipRange, intIp) { 56 | if (ipRange.length === 0) 57 | return false; 58 | var left = 0, right = ipRange.length - 1; 59 | do { 60 | var mid = Math.floor((left + right) / 2); 61 | if (mid & 0x1) { 62 | if (intIp >= ipRange[mid - 1]) { 63 | if (intIp < ipRange[mid]) { 64 | return true 65 | } else { 66 | left = mid + 1; 67 | } 68 | } else { 69 | right = mid - 2 70 | } 71 | } else { 72 | if (intIp >= ipRange[mid]) { 73 | if (intIp < ipRange[mid + 1]) { 74 | return true; 75 | } else { 76 | left = mid + 2; 77 | } 78 | } else { 79 | right = mid - 1; 80 | } 81 | } 82 | } while (left < right); 83 | return false; 84 | } 85 | 86 | function getProxyFromIp4(strIp) { 87 | var intIp = convertIp4Address(strIp); 88 | 89 | if (isInIp4RangeList(subnetIp4RangeList, intIp)) { 90 | return direct; 91 | } 92 | // in theory, we can add chnroutes test here. 93 | return loadBalance(); 94 | } 95 | 96 | // don't support ipv4-mapped ipv6 address 97 | function check_ipv6(host) { 98 | // http://home.deds.nl/~aeron/regex/ 99 | var re_ipv6 = /^((?=.*::)(?!.*::.+::)(::)?([\dA-F]{1,4}:(:|\b)|){5}|([\dA-F]{1,4}:){6})((([\dA-F]{1,4}((?!\3)::|:\b|$))|(?!\2\3)){2})$/i; 100 | return re_ipv6.test(host) 101 | } 102 | 103 | // ipv6 format as [0xffff1234, 0xffff1234, 0xffff1234, 0xffff1234] 104 | function convertIp6Address(strIp) { 105 | var words = strIp.split(':'); 106 | var pos = words.indexOf(''); 107 | if (pos === 0) 108 | pos = words.indexOf('', pos + 1); 109 | var result = [0, 0, 0, 0]; 110 | var len = words.length; 111 | var index = 0, // index of ipv6 112 | wordi = 0; // index of words 113 | do { 114 | if (pos === wordi) { 115 | index += 9 - len; 116 | } else { 117 | var word = words[wordi]; 118 | if (word) { 119 | if (index & 0x1) 120 | result[index >>> 1] += parseInt(word, 16); 121 | else 122 | result[index >>> 1] = (parseInt(word, 16) << 16) >>> 0; 123 | } 124 | index++; 125 | } 126 | wordi++; 127 | } while (wordi < len); 128 | return result; 129 | } 130 | 131 | function compareIp6(a, b) { 132 | if (a[0] > b[0]) return 1; 133 | if (a[0] < b[0]) return -1; 134 | if (a[1] > b[1]) return 1; 135 | if (a[1] < b[1]) return -1; 136 | if (a[2] > b[2]) return 1; 137 | if (a[2] < b[2]) return -1; 138 | if (a[3] > b[3]) return 1; 139 | if (a[3] < b[3]) return -1; 140 | return 0; 141 | } 142 | 143 | function isInIp6RangeList(ipRange, intIp) { 144 | if (ipRange.length === 0) 145 | return false; 146 | var left = 0, right = ipRange.length - 1; 147 | do { 148 | var mid = Math.floor((left + right) / 2); 149 | if (mid & 0x1) { 150 | if (compareIp6(intIp, ipRange[mid - 1]) >= 0) { 151 | if (compareIp6(intIp, ipRange[mid]) < 0) { 152 | return true 153 | } else { 154 | left = mid + 1; 155 | } 156 | } else { 157 | right = mid - 2 158 | } 159 | } else { 160 | if (compareIp6(intIp, ipRange[mid]) >= 0) { 161 | if (compareIp6(intIp, ipRange[mid + 1]) < 0) { 162 | return true; 163 | } else { 164 | left = mid + 2; 165 | } 166 | } else { 167 | right = mid - 1; 168 | } 169 | } 170 | } while (left < right); 171 | return false; 172 | } 173 | 174 | function getProxyFromIp6(strIp) { 175 | var intIp = convertIp6Address(strIp); 176 | 177 | if (isInIp6RangeList(subnetIp6RangeList, intIp)) { 178 | return direct; 179 | } 180 | 181 | return loadBalance(); 182 | } 183 | 184 | function isInDomains(domain_dict, host) { 185 | var pos = host.lastIndexOf('.'); 186 | var suffix = host.substring(pos + 1); 187 | 188 | if (suffix === "cn") { 189 | return true; 190 | } 191 | 192 | pos = host.lastIndexOf('.', pos - 1); 193 | 194 | while (true) { 195 | if (pos === -1) { 196 | return hasOwnProperty.call(domain_dict, host); 197 | } 198 | 199 | suffix = host.substring(pos + 1); 200 | if (hasOwnProperty.call(domain_dict, suffix)) { 201 | return true; 202 | } 203 | 204 | pos = host.lastIndexOf('.', pos - 1); 205 | } 206 | } 207 | 208 | function loadBalance() { 209 | if (okToLoadBalance) { 210 | var random = Math.floor(Math.random() * proxy.length); 211 | return proxy[random]; 212 | } 213 | return proxy[0]; 214 | } 215 | 216 | function FindProxyForURL(url, host) { 217 | if (isPlainHostName(host)) { 218 | return direct; 219 | } 220 | 221 | if (check_ipv4(host)) { 222 | return getProxyFromIp4(host); 223 | } 224 | 225 | if (check_ipv6(host)) { 226 | return getProxyFromIp6(host); 227 | } 228 | 229 | if (isInDomains(white_domains, host)) { 230 | return direct; 231 | } 232 | 233 | return loadBalance(); 234 | } 235 | -------------------------------------------------------------------------------- /gh-pages/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | PAC 6 | 7 | 8 | whitelist.pac 9 | 10 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import re 5 | import urllib3 6 | import certifi 7 | import codecs 8 | 9 | from argparse import ArgumentParser 10 | 11 | def parse_args(): 12 | parser = ArgumentParser() 13 | parser.add_argument('-i', '--input', dest='input', default=os.path.join('data','whitelist.pac'), 14 | help='path to gfwlist') 15 | parser.add_argument('-o', '--output', dest='output', default='whitelist.pac', 16 | help='path to output pac', metavar='PAC') 17 | parser.add_argument('-p', '--proxy', dest='proxy', default='"SOCKS5 127.0.0.1:1080; SOCKS 127.0.0.1:1080;"', 18 | help='the proxy parameter in the pac file, for example,\ 19 | "127.0.0.1:1080;"', metavar='SOCKS5') 20 | return parser.parse_args() 21 | 22 | def writefile(input_file, proxy, output_file): 23 | 24 | domains_content = final_list() 25 | proxy_content = get_file_data(input_file) 26 | proxy_content = proxy_content.replace('__PROXY__', proxy) 27 | proxy_content = proxy_content.replace('__DOMAINS__', domains_content) 28 | 29 | with open(output_file, 'w') as file_obj: 30 | file_obj.write(proxy_content) 31 | 32 | 33 | def get_list(): 34 | print('Getting domain whitelist...') 35 | dnsmasq_china_list = 'https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/accelerated-domains.china.conf' 36 | whitelist = [] 37 | try: 38 | content = getList(dnsmasq_china_list) 39 | content = content.decode('utf-8') 40 | f = codecs.open('whitelistCache', 'w', 'utf-8') 41 | f.write(content) 42 | f.close() 43 | 44 | except: 45 | print('Get list update failed,use cache to update instead.') 46 | 47 | f = codecs.open('whitelistCache', 'r', 'utf-8') 48 | for line in f.readlines(): 49 | l = re.findall(r'(?<==/).+?(?=/)', line) 50 | whitelist.append('"'+l[0]+'":1,') 51 | f.close() 52 | 53 | return whitelist 54 | 55 | def getList(listUrl): 56 | http = urllib3.PoolManager( 57 | cert_reqs='CERT_REQUIRED', # Force certificate check. 58 | ca_certs=certifi.where(), # Path to the Certifi bundle. 59 | ) 60 | 61 | data = http.request('GET', listUrl, timeout=10).data 62 | return data 63 | 64 | 65 | def final_list(): 66 | 67 | list_result = get_list() 68 | content = '\n'.join(list_result) 69 | content = '{\n' + content + '\n"yourdomainhere.com":1\n}' 70 | print('All done!') 71 | return content 72 | 73 | 74 | def get_file_data(filename): 75 | content = '' 76 | with open(filename, 'r') as file_obj: 77 | content = file_obj.read() 78 | return content 79 | 80 | 81 | def main(): 82 | args = parse_args() 83 | writefile(args.input, '"' + args.proxy.strip('"') + '"', args.output) 84 | 85 | if __name__ == '__main__': 86 | main() 87 | 88 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi 2 | urllib3==1.26.6 3 | -------------------------------------------------------------------------------- /test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | PAC Test 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 |
13 |
14 | 15 |
16 |
17 | 18 |
19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | var test_cases = [ 2 | // subnet ips 3 | 0, "127.0.0.1", 4 | 0, "::1", 5 | 0, "fec0::1", 6 | // plain hosts 7 | 0, "localhost", 8 | // white domains 9 | 0, "qq.com", 10 | 0, "im.qq.com", 11 | 0, "www.imqq.com", 12 | // gfw domains 13 | 1, "google.com", 14 | // unknown domains 15 | 1, "qwq.com", 16 | 1, "a.b.c.d.com" 17 | ]; 18 | 19 | function isPlainHostName(host) { 20 | if ( host.toLowerCase() == 'localhost' ) 21 | return true; 22 | return false; 23 | } 24 | 25 | function dnsResolve(host) { 26 | return "27.40.0.0"; 27 | return "27.50.96.0"; 28 | return "27.50.128.0"; 29 | } 30 | 31 | function isInNet(ip, ipstart, ipmask) { 32 | return false; 33 | } 34 | 35 | function shExpMatch(a, b) { 36 | return false; 37 | } 38 | 39 | function test(url, host) { 40 | ret = FindProxyForURL(url, host); 41 | if ( typeof(direct) == "undefined" ) { 42 | if ( ret.toLowerCase().indexOf("direct") >= 0 ) { 43 | return 0; 44 | } 45 | return 1; 46 | } else if ( ret === direct ) 47 | return 0; 48 | else 49 | return 1; 50 | } 51 | 52 | function output_result(out_obj) { 53 | output.value = ""; 54 | for (var j = 0; j < test_cases.length; j+=2) { 55 | var test_case = test_cases[j+1]; 56 | var test_result = test(test_case, test_case); 57 | var out_line = "" + test_result + " " + test_case + " "; 58 | if ( test_result === test_cases[j] ) { 59 | out_line = out_line + "Pass"; 60 | } else { 61 | out_line = out_line + "NOT Pass"; 62 | } 63 | out_obj.value = out_obj.value + out_line + "\n"; 64 | } 65 | var start = new Date(); 66 | if ( test_cases.length > 1 ) { 67 | for (var j = 0; j < 100000; ++j) { 68 | var test_case = test_cases[1]; 69 | test(test_case, test_case); 70 | } 71 | } 72 | var end = new Date(); 73 | alert(String(end - start) + "ms in 100,000 tests"); 74 | } 75 | 76 | function begin_test() { 77 | var output = document.getElementById("output"); 78 | output_result( output ); 79 | } 80 | 81 | function test_one() { 82 | var input = document.getElementById("input"); 83 | var result_obj = document.getElementById("result"); 84 | result = test(input.value, input.value); 85 | if ( result === 1 ) 86 | result_obj.value = "Proxy"; 87 | else 88 | result_obj.value = "Direct"; 89 | } 90 | --------------------------------------------------------------------------------