├── scripts ├── lib │ ├── __init__.py │ ├── config.py │ ├── github.py │ └── util.py ├── generateManifestFiles.js ├── cpplint.py ├── uploadDataFiles.js ├── generateDataFiles.js └── check.js ├── include.js ├── CHANGELOG.md ├── .gitmodules ├── .travis.yml ├── data_file_version.h ├── cosmetic_filter.cc ├── test ├── util.h ├── test_main.cc ├── util.cc ├── js │ ├── fingerprintTest.js │ ├── utilTest.js │ ├── filteringTest.js │ ├── serializationTest.js │ ├── filterListTest.js │ ├── parsingTest.js │ └── matchingTest.js ├── binding.gyp ├── cosmetic_filter_test.cc ├── data │ ├── brave-unbreak.txt │ └── ublock-unbreak.txt ├── rule_types_test.cc ├── protocol_test.cc └── options_test.cc ├── .npmignore ├── .gitignore ├── protocol.h ├── brave └── BUILD.gn ├── addon.cc ├── filter_list.cc ├── lists ├── malware.h └── default.h ├── filter_list.h ├── Makefile ├── lib ├── filtering.js └── util.js ├── base.h ├── muon └── BUILD.gn ├── no_fingerprint_domain.h ├── sample └── binding.gyp ├── binding.gyp ├── perf └── binding.gyp ├── package.json ├── ad_block_client_wrap.h ├── no_fingerprint_domain.cc ├── bad_fingerprint.h ├── cosmetic_filter.h ├── perf.cc ├── main.cc ├── ad_block_client.h ├── protocol.cc ├── README.md ├── filter.h ├── bad_fingerprints8.h ├── bad_fingerprints7.h ├── bad_fingerprints5.h └── LICENSE /scripts/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /include.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | console.log(path.relative('.', __dirname), '..') 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 3.0.0 4 | - Added support for parsing HTML filtering rules 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/depot_tools"] 2 | path = vendor/depot_tools 3 | url = https://chromium.googlesource.com/chromium/tools/depot_tools.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "node" 4 | addons: 5 | apt: 6 | sources: 7 | - ubuntu-toolchain-r-test 8 | packages: 9 | - gcc-4.8 10 | - g++-4.8 11 | - ninja-build 12 | 13 | env: 14 | - TRAVIS=travis CXX=g++-4.8 15 | -------------------------------------------------------------------------------- /data_file_version.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef DATA_FILE_VERSION_H_ 7 | #define DATA_FILE_VERSION_H_ 8 | 9 | static constexpr int DATA_FILE_VERSION = 4; 10 | 11 | #endif // DATA_FILE_VERSION_H_ 12 | -------------------------------------------------------------------------------- /cosmetic_filter.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include "./cosmetic_filter.h" 7 | #include "hashFn.h" 8 | 9 | static HashFn fn(19); 10 | 11 | uint64_t CosmeticFilter::hash() const { 12 | return fn(data, static_cast(strlen(data))); 13 | } 14 | -------------------------------------------------------------------------------- /test/util.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef TEST_UTIL_H_ 7 | #define TEST_UTIL_H_ 8 | 9 | #include 10 | 11 | SimpleString StringFrom(const std::string& value); 12 | std::string getFileContents(const char *filename); 13 | bool compareNums(int actual, int expected); 14 | 15 | #endif // TEST_UTIL_H_ 16 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Program databases 12 | *.pdb 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | 22 | # Compiled Static libraries 23 | *.lai 24 | *.la 25 | *.a 26 | *.lib 27 | 28 | # Executables 29 | *.exe 30 | *.out 31 | *.app 32 | 33 | build 34 | 35 | node_modules 36 | 37 | run 38 | 39 | # ctags 40 | tags 41 | 42 | # Built data files 43 | out 44 | 45 | # Generated data files 46 | *.dat 47 | -------------------------------------------------------------------------------- /test/test_main.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include "./CppUnitLite/TestHarness.h" 8 | #include "./util.h" 9 | 10 | SimpleString StringFrom(const std::string& value) { 11 | return SimpleString(value.c_str()); 12 | } 13 | 14 | int main() { 15 | TestResult tr; 16 | TestRegistry::runAllTests(tr); 17 | return 0; 18 | } 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | 30 | build 31 | 32 | node_modules 33 | 34 | run 35 | 36 | # ctags 37 | tags 38 | 39 | # output from sample which can be used for importing 40 | *.dat 41 | 42 | # Python compiled files 43 | *.pyc 44 | 45 | .DS_Store 46 | 47 | out 48 | 49 | # VIM temp files 50 | *.swp 51 | -------------------------------------------------------------------------------- /scripts/generateManifestFiles.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | const {AdBlockClient} = require('..') 6 | const fs = require('fs') 7 | 8 | const client = new AdBlockClient() 9 | if (!fs.existsSync('out')) { 10 | fs.mkdirSync('./out') 11 | } 12 | 13 | client.generateDefaultManifestFile('out') 14 | client.generateRegionalManifestFiles('out') 15 | 16 | process.on('uncaughtException', (err) => { 17 | console.error('Caught exception:', err) 18 | process.exit(1) 19 | }) 20 | 21 | process.on('unhandledRejection', (err) => { 22 | console.error('Unhandled rejection:', err) 23 | process.exit(1) 24 | }) 25 | -------------------------------------------------------------------------------- /protocol.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef PROTOCOL_H_ 7 | #define PROTOCOL_H_ 8 | 9 | // Checks whether the URL can be blocked by ABP, based on its protocol. 10 | // 11 | // We only apply ABP rules against certain protocols (http, https, ws, wss). 12 | // This function checks to see if the given url is of one of these protocol. 13 | // For the purposes of this function, blob indicators are ignored (e.g. 14 | // "blob:http://" is treated the same as "http://"). 15 | bool isBlockableProtocol(const char *url, int urlLen); 16 | 17 | #endif // PROTOCOL_H_ 18 | -------------------------------------------------------------------------------- /brave/BUILD.gn: -------------------------------------------------------------------------------- 1 | if (is_android) { 2 | import("//build/config/android/rules.gni") 3 | dep_base = rebase_path("./", "//") 4 | } else { 5 | dep_base = rebase_path("../..", "//") 6 | } 7 | 8 | config("internal_config") { 9 | include_dirs = [ ".." ] 10 | } 11 | 12 | source_set("ad-block") { 13 | configs += [ ":internal_config" ] 14 | sources = [ 15 | "../ad_block_client.cc", 16 | "../ad_block_client.h", 17 | "../cosmetic_filter.cc", 18 | "../cosmetic_filter.h", 19 | "../filter.cc", 20 | "../filter.h", 21 | "../filter_list.cc", 22 | "../filter_list.h", 23 | "../no_fingerprint_domain.cc", 24 | "../no_fingerprint_domain.h", 25 | "../protocol.cc", 26 | "../protocol.h", 27 | ] 28 | 29 | deps = [ 30 | rebase_path("hashset-cpp/brave:hashset-cpp", dep_base), 31 | rebase_path("bloom-filter-cpp/brave:bloom-filter-cpp", dep_base), 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /test/util.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "./CppUnitLite/TestHarness.h" 11 | #include "./test/util.h" 12 | 13 | using std::cout; 14 | using std::endl; 15 | 16 | std::string getFileContents(const char *filename) { 17 | std::ifstream in(filename, std::ios::in); 18 | if (in) { 19 | std::ostringstream contents; 20 | contents << in.rdbuf(); 21 | in.close(); 22 | return(contents.str()); 23 | } 24 | throw(errno); 25 | } 26 | 27 | bool compareNums(int actual, int expected) { 28 | if (actual != expected) { 29 | cout << "Actual: " << actual << endl << "Expected: " << expected << endl; 30 | return false; 31 | } 32 | return true; 33 | } 34 | -------------------------------------------------------------------------------- /addon.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include "./ad_block_client_wrap.h" 8 | 9 | namespace { 10 | 11 | using v8::Local; 12 | using v8::Object; 13 | 14 | void InitAll(Local exports) { 15 | ad_block_client_wrap::AdBlockClientWrap::Init(exports); 16 | } 17 | 18 | void InitAll(v8::Local exports, 19 | v8::Local unused, void* priv) { 20 | InitAll(exports); 21 | } 22 | 23 | void InitAll(v8::Local exports, v8::Local unused, 24 | v8::Local context, void* priv) { 25 | InitAll(exports); 26 | } 27 | 28 | } // namespace 29 | 30 | #ifdef AD_BLOCK_INTERNAL_MODULE 31 | NODE_MODULE_CONTEXT_AWARE_BUILTIN(ad_block, InitAll) 32 | #else 33 | NODE_MODULE(ad_block, InitAll) 34 | #endif 35 | -------------------------------------------------------------------------------- /filter_list.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | 9 | #include "./filter_list.h" 10 | 11 | FilterList::FilterList(const std::string& uuid, 12 | const std::string& url, 13 | const std::string& title, 14 | const std::vector& langs, 15 | const std::string& support_url, 16 | const std::string& component_id, 17 | const std::string& base64_public_key) 18 | : uuid(uuid), 19 | url(url), 20 | title(title), 21 | langs(langs), 22 | support_url(support_url), 23 | component_id(component_id), 24 | base64_public_key(base64_public_key) {} 25 | 26 | FilterList::FilterList(const FilterList& other) = default; 27 | 28 | FilterList::~FilterList() { 29 | } 30 | -------------------------------------------------------------------------------- /lists/malware.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef LISTS_MALWARE_H_ 7 | #define LISTS_MALWARE_H_ 8 | 9 | #include 10 | #include "../filter_list.h" 11 | 12 | const std::vector malware_lists = { 13 | FilterList({ 14 | "AE08317A-778F-4B95-BC12-7E78C1FB26A3", 15 | "https://raw.githubusercontent.com/Dawsey21/Lists/master/adblock-list.txt", 16 | "Spam404 Domain Blacklist", 17 | {}, 18 | "http://www.spam404.com/domain-blacklist.html", 19 | "", 20 | "" 21 | }), FilterList({ 22 | // This list has a filtering function defined 23 | "FBB430E8-3910-4761-9373-840FC3B43FF2", 24 | "https://s3.amazonaws.com/lists.disconnect.me/simple_malware.txt", 25 | "Disconnect Simple Malware", 26 | {}, 27 | "https://disconnect.me/", 28 | "", 29 | "" 30 | }) 31 | }; 32 | 33 | #endif // LISTS_MALWARE_H_ 34 | -------------------------------------------------------------------------------- /filter_list.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef FILTER_LIST_H_ 7 | #define FILTER_LIST_H_ 8 | 9 | #include 10 | #include 11 | 12 | class FilterList { 13 | public: 14 | FilterList(const std::string& uuid, 15 | const std::string& url, 16 | const std::string& title, 17 | const std::vector& langs, 18 | const std::string& support_url, 19 | const std::string& component_id, 20 | const std::string& base64_public_key); 21 | FilterList(const FilterList& other); 22 | ~FilterList(); 23 | 24 | const std::string uuid; 25 | const std::string url; 26 | const std::string title; 27 | const std::vector langs; 28 | const std::string support_url; 29 | const std::string component_id; 30 | const std::string base64_public_key; 31 | }; 32 | 33 | #endif // FILTER_LIST_H_ 34 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | .PHONY: test 3 | .PHONY: sample 4 | .PHONY: perf 5 | .PHONY: clean 6 | 7 | build: 8 | ./node_modules/.bin/node-gyp configure && ./node_modules/.bin/node-gyp build 9 | 10 | test: 11 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f ninja test/binding.gyp 12 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f xcode test/binding.gyp 13 | ninja -C build/out/Default -f build.ninja 14 | ./build/out/Default/test || [ $$? -eq 0 ] 15 | 16 | sample: 17 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f ninja sample/binding.gyp 18 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f xcode sample/binding.gyp 19 | ninja -C build/out/Default -f build.ninja 20 | ./build/out/Default/sample 21 | 22 | perf: 23 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f ninja perf/binding.gyp 24 | ./node_modules/node-gyp/gyp/gyp_main.py --generator-output=./build --depth=. -f xcode perf/binding.gyp 25 | ninja -C build/out/Default -f build.ninja 26 | ./build/out/Default/perf 27 | 28 | clean: 29 | rm -Rf build 30 | -------------------------------------------------------------------------------- /lib/filtering.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | const I = (x) => x 6 | 7 | /** 8 | * Same as filterPredicate but will log if there is a LOG_OUTPUT env variable 9 | */ 10 | const filterPredicateWithPossibleLogging = (rule, filterPredicate = I) => { 11 | const result = filterPredicate(rule) 12 | if (process.env['LOG_OUTPUT'] && !result) { 13 | console.log('Filtering out rule: ', rule) 14 | } 15 | return result 16 | } 17 | 18 | /** 19 | * Mapping rule which reformats rules 20 | */ 21 | const mapRule = (rule) => rule 22 | 23 | /** 24 | * Given a list of inputs returns a filtered list of rules that should be used. 25 | * 26 | * @param input {string} - ABP filter syntax to filter 27 | * @return A better filter list 28 | */ 29 | const sanitizeABPInput = (input, filterPredicate = I) => 30 | input.split('\n') 31 | .filter((rule) => 32 | filterPredicateWithPossibleLogging(rule, filterPredicate)) 33 | .map(mapRule) 34 | .join('\n') 35 | 36 | module.exports = { 37 | sanitizeABPInput 38 | } 39 | -------------------------------------------------------------------------------- /base.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef BASE_H_ 7 | #define BASE_H_ 8 | 9 | #if !defined(nullptr) && !defined(_MSC_VER) 10 | #define nullptr 0 11 | #endif 12 | 13 | #include 14 | 15 | #if defined(_MSC_VER) && _MSC_VER < 1900 16 | #include 17 | #include 18 | #define snprintf c99_snprintf 19 | #define vsnprintf c99_vsnprintf 20 | inline int c99_vsnprintf(char *outBuf, size_t size, 21 | const char *format, va_list ap) { 22 | int count = -1; 23 | if (size != 0) { 24 | count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap); 25 | } 26 | if (count == -1) { 27 | count = _vscprintf(format, ap); 28 | } 29 | return count; 30 | } 31 | 32 | inline int c99_snprintf(char *outBuf, size_t size, 33 | const char *format, ...) { 34 | int count; 35 | va_list ap; 36 | va_start(ap, format); 37 | count = c99_vsnprintf(outBuf, size, format, ap); 38 | va_end(ap); 39 | return count; 40 | } 41 | #endif 42 | 43 | #endif // BASE_H_ 44 | -------------------------------------------------------------------------------- /muon/BUILD.gn: -------------------------------------------------------------------------------- 1 | dep_base = rebase_path("../..", "//") 2 | 3 | config("external_config") { 4 | include_dirs = [ ".." ] 5 | } 6 | 7 | config("internal_config") { 8 | include_dirs = [ 9 | rebase_path("node/src", dep_base), 10 | ] 11 | 12 | defines = [ "NODE_SHARED_MODE", "AD_BLOCK_INTERNAL_MODULE" ] 13 | 14 | cflags = [] 15 | 16 | if (is_clang) { 17 | cflags += [ 18 | "-Wno-error", 19 | "-Wno-deprecated-declarations", 20 | ] 21 | } 22 | } 23 | 24 | source_set("ad_block") { 25 | public_configs = [ ":external_config" ] 26 | 27 | configs += [ ":internal_config" ] 28 | 29 | sources = [ 30 | "../ad_block_client.cc", 31 | "../ad_block_client.h", 32 | "../ad_block_client_wrap.cc", 33 | "../ad_block_client_wrap.h", 34 | "../addon.cc", 35 | "../cosmetic_filter.cc", 36 | "../cosmetic_filter.h", 37 | "../filter.cc", 38 | "../filter.h", 39 | "../filter_list.cc", 40 | "../filter_list.h", 41 | "../no_fingerprint_domain.cc", 42 | "../no_fingerprint_domain.h", 43 | "../protocol.cc", 44 | "../protocol.h", 45 | ] 46 | 47 | deps = [ 48 | rebase_path("hashset-cpp", dep_base), 49 | rebase_path("bloom-filter-cpp", dep_base), 50 | "//v8", 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /no_fingerprint_domain.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef NO_FINGERPRINT_DOMAIN_H_ 7 | #define NO_FINGERPRINT_DOMAIN_H_ 8 | 9 | #include "./base.h" 10 | 11 | class NoFingerprintDomain { 12 | public: 13 | NoFingerprintDomain(); 14 | NoFingerprintDomain(const NoFingerprintDomain &other); 15 | NoFingerprintDomain(const char * data, int dataLen); 16 | ~NoFingerprintDomain(); 17 | 18 | uint64_t hash() const; 19 | uint64_t GetHash() const { 20 | return hash(); 21 | } 22 | 23 | uint32_t Serialize(char *buffer); 24 | uint32_t Deserialize(char *buffer, uint32_t bufferSize); 25 | // Nothing needs to be updated when being added multiple times 26 | void Update(const NoFingerprintDomain&) {} 27 | 28 | bool operator==(const NoFingerprintDomain &rhs) const; 29 | 30 | private: 31 | // Holds true if the data should not free memory because for example it 32 | // was loaded from a large buffer somewhere else via the serialize and 33 | // deserialize functions. 34 | bool borrowed_data; 35 | char *data; 36 | int dataLen; 37 | }; 38 | 39 | #endif // NO_FINGERPRINT_DOMAIN_H_ 40 | -------------------------------------------------------------------------------- /sample/binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [{ 3 | "target_name": "sample", 4 | "type": "executable", 5 | "sources": [ 6 | "../main.cc", 7 | "../ad_block_client.cc", 8 | "../ad_block_client.h", 9 | "../cosmetic_filter.cc", 10 | "../cosmetic_filter.h", 11 | "../filter.cc", 12 | "../filter.h", 13 | "../no_fingerprint_domain.cc", 14 | "../no_fingerprint_domain.h", 15 | "../node_modules/bloom-filter-cpp/BloomFilter.cpp", 16 | "../node_modules/bloom-filter-cpp/BloomFilter.h", 17 | "../node_modules/bloom-filter-cpp/hashFn.cpp", 18 | "../node_modules/bloom-filter-cpp/hashFn.h", 19 | "../node_modules/hashset-cpp/hash_set.cc", 20 | "../node_modules/hashset-cpp/hash_set.h" 21 | ], 22 | "include_dirs": [ 23 | "..", 24 | '../node_modules/bloom-filter-cpp', 25 | '../node_modules/hashset-cpp' 26 | ], 27 | "conditions": [ 28 | ['OS=="win"', { 29 | }, { 30 | 'cflags_cc': [ '-fexceptions' ] 31 | } 32 | ] 33 | ], 34 | "xcode_settings": { 35 | "OTHER_CFLAGS": [ "-ObjC" ], 36 | "OTHER_CPLUSPLUSFLAGS" : ["-std=c++11","-stdlib=libc++", "-v"], 37 | "OTHER_LDFLAGS": ["-stdlib=libc++"], 38 | "MACOSX_DEPLOYMENT_TARGET": "10.9", 39 | "GCC_ENABLE_CPP_EXCEPTIONS": "YES", 40 | }, 41 | "cflags": [ 42 | "-std=c++11" 43 | ], 44 | }] 45 | } 46 | -------------------------------------------------------------------------------- /scripts/cpplint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import fnmatch 4 | import os 5 | import sys 6 | 7 | from lib.util import execute 8 | 9 | IGNORE_FILES = [ 10 | os.path.join('./bad_fingerprints.h'), 11 | os.path.join('./bad_fingerprints4.h'), 12 | os.path.join('./bad_fingerprints5.h'), 13 | os.path.join('./bad_fingerprints6.h'), 14 | os.path.join('./bad_fingerprints7.h'), 15 | os.path.join('./bad_fingerprints8.h') 16 | ] 17 | 18 | SOURCE_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 19 | 20 | 21 | def main(): 22 | os.chdir(SOURCE_ROOT) 23 | files = list_files([''], 24 | ['*.cpp', '*.cc', '*.h']) 25 | 26 | node_modules_files = list_files(['node_modules'], 27 | ['*.cpp', '*.cc', '*.h']) 28 | 29 | call_cpplint(list(set(files) - set(IGNORE_FILES) - set(node_modules_files))) 30 | 31 | 32 | def list_files(directories, filters): 33 | matches = [] 34 | for directory in directories: 35 | for root, _, filenames, in os.walk(os.path.join('./', directory)): 36 | for f in filters: 37 | for filename in fnmatch.filter(filenames, f): 38 | matches.append(os.path.join(root, filename)) 39 | return matches 40 | 41 | 42 | def call_cpplint(files): 43 | cpplint = os.path.join(SOURCE_ROOT, 'vendor', 'depot_tools', 'cpplint.py') 44 | execute([sys.executable, cpplint] + files) 45 | 46 | 47 | if __name__ == '__main__': 48 | sys.exit(main()) 49 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [{ 3 | "target_name": "ad-block", 4 | "sources": [ 5 | "addon.cc", 6 | "ad_block_client_wrap.cc", 7 | "ad_block_client_wrap.h", 8 | "ad_block_client.cc", 9 | "ad_block_client.h", 10 | "cosmetic_filter.cc", 11 | "cosmetic_filter.h", 12 | "filter.cc", 13 | "filter.h", 14 | "filter_list.cc", 15 | "filter_list.h", 16 | "no_fingerprint_domain.cc", 17 | "no_fingerprint_domain.h", 18 | "protocol.cc", 19 | "protocol.h", 20 | "./node_modules/bloom-filter-cpp/BloomFilter.cpp", 21 | "./node_modules/bloom-filter-cpp/BloomFilter.h", 22 | "./node_modules/bloom-filter-cpp/hashFn.cpp", 23 | "./node_modules/bloom-filter-cpp/hashFn.h", 24 | "./node_modules/hashset-cpp/hash_set.cc", 25 | "./node_modules/hashset-cpp/hash_set.h" 26 | ], 27 | "include_dirs": [ 28 | ".", 29 | './node_modules/bloom-filter-cpp', 30 | './node_modules/hashset-cpp' 31 | ], 32 | "dependencies": [ 33 | ], 34 | "conditions": [ 35 | ['OS=="win"', { 36 | }, { 37 | 'cflags_cc': [ '-fexceptions' ] 38 | } 39 | ] 40 | ], 41 | "xcode_settings": { 42 | "OTHER_CFLAGS": [ "-ObjC" ], 43 | "OTHER_CPLUSPLUSFLAGS" : ["-std=c++11","-stdlib=libc++", "-v"], 44 | "MACOSX_DEPLOYMENT_TARGET": "10.9", 45 | "GCC_ENABLE_CPP_EXCEPTIONS": "YES", 46 | }, 47 | }] 48 | } 49 | -------------------------------------------------------------------------------- /test/js/fingerprintTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it, before */ 5 | 6 | const assert = require('assert') 7 | const {AdBlockClient} = require('../..') 8 | 9 | describe('getFingerprint', function () { 10 | before(function () { 11 | this.adBlockClient = new AdBlockClient() 12 | }) 13 | it('Extracts simple fingerprint', function () { 14 | assert.equal(this.adBlockClient.getFingerprint('fdasfdsafdas'), 'fdasfd') 15 | }) 16 | it('Does not use special characters for fingerprints', function () { 17 | assert.equal(this.adBlockClient.getFingerprint('*fdasfdsafdas'), 'fdasfd') 18 | }) 19 | it('Extracts host anchored filter fingerprint', function () { 20 | assert.equal(this.adBlockClient.getFingerprint('||brave.com'), 'brave.') 21 | }) 22 | it('Does not extract a fingerprint for strings that are too short', function () { 23 | assert.equal(this.adBlockClient.getFingerprint('prime'), undefined) 24 | }) 25 | it('Does not extract a fingerprint for blacklisted strings', function () { 26 | assert.equal(this.adBlockClient.getFingerprint('https://'), undefined) 27 | }) 28 | it('Extract a fingerprint for short host anchored filters', function () { 29 | assert.equal(this.adBlockClient.getFingerprint('||a.ca/brianbondy'), 'a.ca/b') 30 | }) 31 | }) 32 | -------------------------------------------------------------------------------- /test/js/utilTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it */ 5 | 6 | const {makeAdBlockClientFromListUUID} = require('../../lib/util') 7 | 8 | const err = new Error() 9 | describe('utilTest', function () { 10 | this.timeout(0) 11 | describe('makeAdBlockClientFromListUUID', function () { 12 | it('throws an error for an invalid uuid which does not exist', function (cb) { 13 | makeAdBlockClientFromListUUID().then(() => { 14 | cb(err) 15 | }).catch((e) => { 16 | cb() 17 | }) 18 | }) 19 | it('can obtain list from default lists by uuid', function (cb) { 20 | makeAdBlockClientFromListUUID('67F880F5-7602-4042-8A3D-01481FD7437A').then(() => { 21 | cb() 22 | }).catch((e) => { 23 | cb(err) 24 | }) 25 | }) 26 | it('can obtain list from regions list by uuid', function (cb) { 27 | makeAdBlockClientFromListUUID('9FCEECEC-52B4-4487-8E57-8781E82C91D0').then(() => { 28 | cb() 29 | }).catch((e) => { 30 | cb(err) 31 | }) 32 | }) 33 | it('can obtain list from malware list by uuid', function (cb) { 34 | makeAdBlockClientFromListUUID('AE08317A-778F-4B95-BC12-7E78C1FB26A3').then(() => { 35 | cb() 36 | }).catch((e) => { 37 | cb(err) 38 | }) 39 | }) 40 | }) 41 | }) 42 | -------------------------------------------------------------------------------- /perf/binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [{ 3 | "target_name": "perf", 4 | "type": "executable", 5 | "sources": [ 6 | "../perf.cc", 7 | "../protocol.cc", 8 | "../protocol.h", 9 | "../ad_block_client.cc", 10 | "../ad_block_client.h", 11 | "../cosmetic_filter.cc", 12 | "../cosmetic_filter.h", 13 | "../filter.cc", 14 | "../filter.h", 15 | "../no_fingerprint_domain.cc", 16 | "../no_fingerprint_domain.h", 17 | "../node_modules/bloom-filter-cpp/BloomFilter.cpp", 18 | "../node_modules/bloom-filter-cpp/BloomFilter.h", 19 | "../node_modules/bloom-filter-cpp/hashFn.cpp", 20 | "../node_modules/bloom-filter-cpp/hashFn.h", 21 | "../node_modules/hashset-cpp/hash_set.cc", 22 | "../node_modules/hashset-cpp/hash_set.h" 23 | ], 24 | "include_dirs": [ 25 | "..", 26 | '../node_modules/bloom-filter-cpp', 27 | '../node_modules/hashset-cpp' 28 | ], 29 | "defines": ["PERF_STATS"], 30 | "conditions": [ 31 | ['OS=="win"', { 32 | }, { 33 | 'cflags_cc': [ '-fexceptions' ] 34 | } 35 | ] 36 | ], 37 | "xcode_settings": { 38 | "OTHER_CFLAGS": [ "-ObjC" ], 39 | "OTHER_CPLUSPLUSFLAGS" : ["-std=c++11","-stdlib=libc++", "-v"], 40 | "OTHER_LDFLAGS": ["-stdlib=libc++"], 41 | "MACOSX_DEPLOYMENT_TARGET": "10.9", 42 | "GCC_ENABLE_CPP_EXCEPTIONS": "YES", 43 | }, 44 | "cflags": [ 45 | "-std=c++11" 46 | ] 47 | }] 48 | } 49 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ad-block", 3 | "main": "./build/Release/ad-block", 4 | "version": "4.1.1", 5 | "description": "Ad block engine used in the Brave browser for ABP filter syntax based lists like EasyList.", 6 | "directories": { 7 | "test": "test" 8 | }, 9 | "dependencies": { 10 | "bloom-filter-cpp": "^1.2.0", 11 | "cppunitlite": "^1.0.0", 12 | "hashset-cpp": "^2.1.0", 13 | "nan": "^2.10.0" 14 | }, 15 | "devDependencies": { 16 | "commander": "^2.15.1", 17 | "mocha": "^5.2.0", 18 | "node-gyp": "^3.7.0", 19 | "nsp": "^3.2.1", 20 | "pre-commit": "^1.2.2", 21 | "s3": "^4.4.0", 22 | "standard": "^11.0.1" 23 | }, 24 | "scripts": { 25 | "test": "npm run test-cpp && npm run test-js", 26 | "test-cpp": "make test", 27 | "test-js": "mocha test/js", 28 | "build": "make", 29 | "sample": "make sample", 30 | "perf": "make perf", 31 | "preinstall": "npm install bloom-filter-cpp && npm install hashset-cpp", 32 | "install": "node-gyp rebuild", 33 | "lint": "npm run lint-cpp && npm run lint-js", 34 | "lint-cpp": "./scripts/cpplint.py", 35 | "lint-js": "standard", 36 | "checks": "npm run check-security", 37 | "check-security": "nsp check", 38 | "data-files": "node scripts/generateDataFiles.js", 39 | "manifest-files": "node scripts/generateManifestFiles.js", 40 | "upload": "node scripts/uploadDataFiles.js" 41 | }, 42 | "repository": { 43 | "type": "git", 44 | "url": "git+https://github.com/brave/ad-block.git" 45 | }, 46 | "author": "Brian R. Bondy (http://www.brianbondy.com)", 47 | "license": "MPL-2.0", 48 | "gypfile": true, 49 | "bugs": { 50 | "url": "https://github.com/brave/ad-block/issues" 51 | }, 52 | "homepage": "https://github.com/brave/ad-block#readme", 53 | "pre-commit": [ 54 | "lint-cpp", 55 | "lint-js" 56 | ] 57 | } 58 | -------------------------------------------------------------------------------- /scripts/lib/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import errno 4 | import os 5 | import platform 6 | import sys 7 | 8 | 9 | BASE_URL = os.getenv('LIBCHROMIUMCONTENT_MIRROR') or \ 10 | 'https://s3.amazonaws.com/github-janky-artifacts/libchromiumcontent' 11 | LIBCHROMIUMCONTENT_COMMIT = 'cfbe8ec7e14af4cabd1474386f54e197db1f7ac1' 12 | 13 | PLATFORM = { 14 | 'cygwin': 'win32', 15 | 'darwin': 'darwin', 16 | 'linux2': 'linux', 17 | 'win32': 'win32', 18 | }[sys.platform] 19 | 20 | verbose_mode = False 21 | 22 | 23 | def get_platform_key(): 24 | if os.environ.has_key('MAS_BUILD'): 25 | return 'mas' 26 | else: 27 | return PLATFORM 28 | 29 | 30 | def get_target_arch(): 31 | try: 32 | target_arch_path = os.path.join(__file__, '..', '..', '..', 'vendor', 33 | 'brightray', 'vendor', 'download', 34 | 'libchromiumcontent', '.target_arch') 35 | with open(os.path.normpath(target_arch_path)) as f: 36 | return f.read().strip() 37 | except IOError as e: 38 | if e.errno != errno.ENOENT: 39 | raise 40 | 41 | if PLATFORM == 'win32': 42 | return 'ia32' 43 | else: 44 | return 'x64' 45 | 46 | 47 | def get_chromedriver_version(): 48 | return 'v2.15' 49 | 50 | 51 | def s3_config(): 52 | config = (os.environ.get('ATOM_SHELL_S3_BUCKET', ''), 53 | os.environ.get('ATOM_SHELL_S3_ACCESS_KEY', ''), 54 | os.environ.get('ATOM_SHELL_S3_SECRET_KEY', '')) 55 | message = ('Error: Please set the $ATOM_SHELL_S3_BUCKET, ' 56 | '$ATOM_SHELL_S3_ACCESS_KEY, and ' 57 | '$ATOM_SHELL_S3_SECRET_KEY environment variables') 58 | assert all(len(c) for c in config), message 59 | return config 60 | 61 | 62 | def enable_verbose_mode(): 63 | print 'Running in verbose mode' 64 | global verbose_mode 65 | verbose_mode = True 66 | 67 | 68 | def is_verbose_mode(): 69 | return verbose_mode 70 | -------------------------------------------------------------------------------- /test/binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | "targets": [{ 3 | "target_name": "test", 4 | "type": "executable", 5 | "sources": [ 6 | "../test/test_main.cc", 7 | "../test/parser_test.cc", 8 | "../test/options_test.cc", 9 | "../test/rule_types_test.cc", 10 | "../test/cosmetic_filter_test.cc", 11 | "../test/protocol_test.cc", 12 | "../test/util.cc", 13 | "../protocol.cc", 14 | "../protocol.h", 15 | "../ad_block_client.cc", 16 | "../ad_block_client.h", 17 | "../cosmetic_filter.cc", 18 | "../cosmetic_filter.h", 19 | "../filter.cc", 20 | "../filter.h", 21 | "../no_fingerprint_domain.cc", 22 | "../no_fingerprint_domain.h", 23 | "../node_modules/bloom-filter-cpp/BloomFilter.cpp", 24 | "../node_modules/bloom-filter-cpp/BloomFilter.h", 25 | "../node_modules/bloom-filter-cpp/hashFn.cpp", 26 | "../node_modules/bloom-filter-cpp/hashFn.h", 27 | "../node_modules/hashset-cpp/hash_set.cc", 28 | "../node_modules/hashset-cpp/hash_set.h" 29 | ], 30 | "include_dirs": [ 31 | "..", 32 | '../node_modules/bloom-filter-cpp', 33 | '../node_modules/hashset-cpp', 34 | '../node_modules/cppunitlite', 35 | '../node_modules/nan' 36 | "..", 37 | ], 38 | "dependencies": [ 39 | "../node_modules/cppunitlite/binding.gyp:CppUnitLite", 40 | ], 41 | "conditions": [ 42 | ['OS=="win"', { 43 | }, { 44 | 'cflags_cc': [ '-fexceptions' ] 45 | } 46 | ], 47 | ['OS=="win"', { 48 | "defines": ["ENABLE_REGEX"], 49 | }, { 50 | }] 51 | ], 52 | "xcode_settings": { 53 | "OTHER_CFLAGS": [ "-ObjC" ], 54 | "OTHER_CPLUSPLUSFLAGS" : ["-std=c++11", "-stdlib=libc++", "-v"], 55 | "OTHER_LDFLAGS": ["-stdlib=libc++"], 56 | "MACOSX_DEPLOYMENT_TARGET": "10.9", 57 | "GCC_ENABLE_CPP_EXCEPTIONS": "YES", 58 | }, 59 | "cflags": [ 60 | "-std=c++11" 61 | ] 62 | }] 63 | } 64 | -------------------------------------------------------------------------------- /test/js/filteringTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it */ 5 | 6 | const assert = require('assert') 7 | const {sanitizeABPInput} = require('../../lib/filtering') 8 | const filteredOutRule = '*/test' 9 | const predicate = (rule) => !rule.startsWith('*') 10 | 11 | describe('filtering', function () { 12 | describe('filterPredicate', function () { 13 | it('Filters out rules that start with a *, for now', function () { 14 | assert(!predicate('*test/ad')) 15 | }) 16 | it('Does not filter out rules with a *', function () { 17 | assert(predicate('test/*/ad')) 18 | }) 19 | }) 20 | describe('sanitizeABPInput', function () { 21 | it('Rebuilds lists which do not have filtered out rules', function () { 22 | const I = '&ad_channel=\n&ad_classid=\n&ad_height=\n&ad_keyword=' 23 | assert(sanitizeABPInput(I, predicate) === I) 24 | }) 25 | it('Rebuilds lists which have filtered out rules at the start', function () { 26 | const rules = '&ad_channel=\n&ad_classid=\n&ad_height=\n&ad_keyword=' 27 | assert(sanitizeABPInput(`${filteredOutRule}\n${rules}`, predicate) === rules) 28 | }) 29 | it('Rebuilds lists which have filtered out rules at the end', function () { 30 | const rules = '&ad_channel=\n&ad_classid=\n&ad_height=\n&ad_keyword=' 31 | assert(sanitizeABPInput(`${rules}\n${filteredOutRule}`, predicate) === rules) 32 | }) 33 | it('Rebuilds lists which have filtered out rules in the middle', function () { 34 | const rules = '&ad_channel=\n&ad_classid=\n&ad_height=\n&ad_keyword=' 35 | assert(sanitizeABPInput(`&ad_channel=\n${filteredOutRule}\n&ad_classid=\n&ad_height=\n&ad_keyword=`, predicate) === rules) 36 | }) 37 | it('Rebuilds lists which have multiple filtered out rules', function () { 38 | const rules = '&ad_channel=\n&ad_classid=\n&ad_height=\n&ad_keyword=' 39 | assert(sanitizeABPInput(`${filteredOutRule}\n&ad_channel=\n${filteredOutRule}\n&ad_classid=\n&ad_height=\n&ad_keyword=`, predicate) === rules) 40 | }) 41 | }) 42 | }) 43 | -------------------------------------------------------------------------------- /lists/default.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef LISTS_DEFAULT_H_ 7 | #define LISTS_DEFAULT_H_ 8 | 9 | #include 10 | #include "../filter_list.h" 11 | 12 | const std::string kAdBlockDefaultComponentId("cffkpbalmllkdoenhmdmpbkajipdjfam"); 13 | const std::string kAdBlockDefaultBase64PublicKey = 14 | "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAs0qzJmHSgIiw7IGFCxij" 15 | "1NnB5hJ5ZQ1LKW9htL4EBOaMJvmqaDs/wfq0nw/goBHWsqqkMBynRTu2Hxxirvdb" 16 | "cugn1Goys5QKPgAvKwDHJp9jlnADWm5xQvPQ4GE1mK1/I3ka9cEOCzPW6GI+wGLi" 17 | "VPx9VZrxHHsSBIJRaEB5Tyi5bj0CZ+kcfMnRTsXIBw3C6xJgCVKISQUkd8mawVvG" 18 | "vqOhBOogCdb9qza5eJ1Cgx8RWKucFfaWWxKLOelCiBMT1Hm1znAoVBHG/blhJJOD" 19 | "5HcH/heRrB4MvrE1J76WF3fvZ03aHVcnlLtQeiNNOZ7VbBDXdie8Nomf/QswbBGa" 20 | "VwIDAQAB"; 21 | 22 | const std::vector default_lists = { 23 | { 24 | "67F880F5-7602-4042-8A3D-01481FD7437A", 25 | "https://easylist.to/easylist/easylist.txt", 26 | "EasyList", 27 | {}, 28 | "https://easylist.to/", 29 | "", 30 | "" 31 | }, { 32 | "48010209-AD34-4DF5-A80C-3D2A7C3920C0", 33 | "https://easylist.to/easylist/easyprivacy.txt", 34 | "EasyPrivacy", 35 | {}, 36 | "https://easylist.to/", 37 | "", 38 | "" 39 | }, { 40 | "200392E7-9A0F-40DF-86EB-6AF7E4071322", 41 | "https://raw.githubusercontent.com/brave/adblock-lists/master/ublock-unbreak.txt", // NOLINT 42 | "uBlock Unbreak", 43 | {}, 44 | "https://github.com/gorhill/uBlock", 45 | "", 46 | "" 47 | }, { 48 | "2FBEB0BC-E2E1-4170-BAA9-05E76AAB5BA5", 49 | "https://raw.githubusercontent.com/brave/adblock-lists/master/brave-unbreak.txt", // NOLINT 50 | "Brave Unblock", 51 | {}, 52 | "https://github.com/brave/adblock-lists", 53 | "", 54 | "" 55 | }, { 56 | "BCDF774A-7845-4121-B7EB-77EB66CEDF84", 57 | "https://raw.githubusercontent.com/brave/adblock-lists/master/coin-miners.txt", // NOLINT 58 | "NoCoin Filter List", 59 | {}, 60 | "https://github.com/brave/adblock-lists", 61 | "", 62 | "" 63 | } 64 | }; 65 | 66 | #endif // LISTS_DEFAULT_H_ 67 | -------------------------------------------------------------------------------- /test/cosmetic_filter_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "./CppUnitLite/TestHarness.h" 13 | #include "./CppUnitLite/Test.h" 14 | #include "./ad_block_client.h" 15 | #include "./util.h" 16 | 17 | using std::cout; 18 | using std::endl; 19 | 20 | bool testCosmeticFilter(const char *rawFilter, FilterType expectedFilterType, 21 | const char *expectedData, const char *domainList = nullptr) { 22 | Filter filter; 23 | parseFilter(rawFilter, &filter); 24 | 25 | if (filter.filterType != expectedFilterType) { 26 | cout << "Actual filter type: " << filter.filterType 27 | << endl << "Expected: " << expectedFilterType << endl; 28 | return false; 29 | } 30 | 31 | if (strcmp(filter.data, expectedData)) { 32 | cout << "Actual filter data: " << filter.data 33 | << endl << "Expected: " << expectedData << endl; 34 | return false; 35 | } 36 | 37 | if (domainList && !filter.domainList) { 38 | cout << "Expected domains but none were parsed" << endl; 39 | } else if (!domainList && filter.domainList) { 40 | cout << "Domains found but expected none" << endl; 41 | } else if (domainList && filter.domainList 42 | && strcmp(domainList, filter.domainList)) { 43 | cout << "Actual domains: " << filter.domainList 44 | << endl << "Expected: " << domainList << endl; 45 | } 46 | 47 | return true; 48 | } 49 | 50 | TEST(parser, parseCosmeticFilters) { 51 | CHECK(testCosmeticFilter("###A9AdsMiddleBoxTop", 52 | FTElementHiding, "#A9AdsMiddleBoxTop")); 53 | CHECK(testCosmeticFilter("#@#A9AdsMiddleBoxTop", 54 | FTElementHidingException, "#A9AdsMiddleBoxTop")); 55 | CHECK(testCosmeticFilter("domain1.com,domain2.com###A9AdsMiddleBoxTop", 56 | FTElementHiding, "#A9AdsMiddleBoxTop", "domain1.com,domain2.com")); 57 | CHECK(testCosmeticFilter("domain1.com,domain2.com#@#A9AdsMiddleBoxTop", 58 | FTElementHidingException, 59 | "#A9AdsMiddleBoxTop", "domain1.com,domain2.com")); 60 | } 61 | -------------------------------------------------------------------------------- /ad_block_client_wrap.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef AD_BLOCK_CLIENT_WRAP_H_ 7 | #define AD_BLOCK_CLIENT_WRAP_H_ 8 | 9 | #include 10 | #include 11 | 12 | #include "./ad_block_client.h" 13 | 14 | namespace ad_block_client_wrap { 15 | 16 | /** 17 | * Wraps AdBlockClient for use in Node 18 | */ 19 | class AdBlockClientWrap : public AdBlockClient, public node::ObjectWrap { 20 | public: 21 | static void Init(v8::Local exports); 22 | 23 | private: 24 | AdBlockClientWrap(); 25 | virtual ~AdBlockClientWrap(); 26 | 27 | static void New(const v8::FunctionCallbackInfo& args); 28 | 29 | static void Clear(const v8::FunctionCallbackInfo& args); 30 | static void Parse(const v8::FunctionCallbackInfo& args); 31 | static void Matches(const v8::FunctionCallbackInfo& args); 32 | static void Serialize(const v8::FunctionCallbackInfo& args); 33 | static void Deserialize(const v8::FunctionCallbackInfo& args); 34 | static void Cleanup(const v8::FunctionCallbackInfo& args); 35 | static void GetParsingStats(const v8::FunctionCallbackInfo& args); 36 | static void GetMatchingStats(const v8::FunctionCallbackInfo& args); 37 | static void GetFilters(const v8::FunctionCallbackInfo& args); 38 | static void GetFingerprint(const v8::FunctionCallbackInfo& args); 39 | static void EnableBadFingerprintDetection( 40 | const v8::FunctionCallbackInfo& args); 41 | static void GenerateBadFingerprintsHeader( 42 | const v8::FunctionCallbackInfo& args); 43 | static void GenerateDefaultManifestFile( 44 | const v8::FunctionCallbackInfo& args); 45 | static void GenerateRegionalManifestFiles( 46 | const v8::FunctionCallbackInfo& args); 47 | static void FindMatchingFilters( 48 | const v8::FunctionCallbackInfo& args); 49 | 50 | static v8::Persistent constructor; 51 | }; 52 | 53 | } // namespace ad_block_client_wrap 54 | 55 | #endif // AD_BLOCK_CLIENT_WRAP_H_ 56 | -------------------------------------------------------------------------------- /test/js/serializationTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it, before */ 5 | 6 | const assert = require('assert') 7 | const {makeAdBlockClientFromString} = require('../../lib/util') 8 | const {AdBlockClient, FilterOptions} = require('../..') 9 | 10 | describe('serialization', function () { 11 | before(function (cb) { 12 | const ruleData = ` 13 | [Adblock Plus 2.0] 14 | &video_ads_ 15 | &videoadid= 16 | &view=ad& 17 | +advertorial. 18 | +adverts/ 19 | -2/ads/ 20 | -2011ad_ 21 | -300x100ad2. 22 | -ad-001- 23 | -ad-180x150px. 24 | -ad-200x200- 25 | ! comment here 26 | ` 27 | makeAdBlockClientFromString(ruleData).then((client) => { 28 | this.client = client 29 | this.data = this.client.serialize() 30 | this.client2 = new AdBlockClient() 31 | this.client2.deserialize(this.data) 32 | cb() 33 | }) 34 | }) 35 | 36 | it('blocks things the same when created from serialized', function () { 37 | assert(this.client.matches('http://www.brianbondy.com?c=a&view=ad&b=2', FilterOptions.image, 'slashdot.org')) 38 | assert(!this.client.matches('http://www.brianbondy.com?c=a&view1=ad&b=2', FilterOptions.image, 'slashdot.org')) 39 | assert(this.client2.matches('http://www.brianbondy.com?c=a&view=ad&b=2', FilterOptions.image, 'slashdot.org')) 40 | assert(!this.client2.matches('http://www.brianbondy.com?c=a&view1=ad&b=2', FilterOptions.image, 'slashdot.org')) 41 | }) 42 | it('deserialized client serializes the same', function () { 43 | this.client2.deserialize(this.data) 44 | const data2 = this.client2.serialize() 45 | assert(this.data.equals(data2)) 46 | }) 47 | it('deserializes with the same number of filters', function () { 48 | const nonComentFilterCount = 11 49 | assert.equal(this.client.getParsingStats().numFilters, nonComentFilterCount) 50 | assert.equal(this.client2.getParsingStats().numFilters, nonComentFilterCount) 51 | }) 52 | it('serialized data does not include comment data', function () { 53 | assert(!this.data.toString().includes('comment')) 54 | assert(!this.data.toString().includes('Adblock Plus')) 55 | }) 56 | }) 57 | -------------------------------------------------------------------------------- /scripts/lib/github.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import os 5 | import re 6 | import sys 7 | 8 | REQUESTS_DIR = os.path.abspath(os.path.join(__file__, '..', '..', '..', 9 | 'vendor', 'requests')) 10 | sys.path.append(os.path.join(REQUESTS_DIR, 'build', 'lib')) 11 | sys.path.append(os.path.join(REQUESTS_DIR, 'build', 'lib.linux-x86_64-2.7')) 12 | import requests 13 | 14 | GITHUB_URL = 'https://api.github.com' 15 | GITHUB_UPLOAD_ASSET_URL = 'https://uploads.github.com' 16 | 17 | class GitHub: 18 | def __init__(self, access_token): 19 | self._authorization = 'token %s' % access_token 20 | 21 | pattern = '^/repos/{0}/{0}/releases/{1}/assets$'.format('[^/]+', '[0-9]+') 22 | self._releases_upload_api_pattern = re.compile(pattern) 23 | 24 | def __getattr__(self, attr): 25 | return _Callable(self, '/%s' % attr) 26 | 27 | def send(self, method, path, **kw): 28 | if not 'headers' in kw: 29 | kw['headers'] = dict() 30 | headers = kw['headers'] 31 | headers['Authorization'] = self._authorization 32 | headers['Accept'] = 'application/vnd.github.manifold-preview' 33 | 34 | # Switch to a different domain for the releases uploading API. 35 | if self._releases_upload_api_pattern.match(path): 36 | url = '%s%s' % (GITHUB_UPLOAD_ASSET_URL, path) 37 | else: 38 | url = '%s%s' % (GITHUB_URL, path) 39 | # Data are sent in JSON format. 40 | if 'data' in kw: 41 | kw['data'] = json.dumps(kw['data']) 42 | 43 | r = getattr(requests, method)(url, **kw).json() 44 | if 'message' in r: 45 | raise Exception(json.dumps(r, indent=2, separators=(',', ': '))) 46 | return r 47 | 48 | 49 | class _Executable: 50 | def __init__(self, gh, method, path): 51 | self._gh = gh 52 | self._method = method 53 | self._path = path 54 | 55 | def __call__(self, **kw): 56 | return self._gh.send(self._method, self._path, **kw) 57 | 58 | 59 | class _Callable(object): 60 | def __init__(self, gh, name): 61 | self._gh = gh 62 | self._name = name 63 | 64 | def __call__(self, *args): 65 | if len(args) == 0: 66 | return self 67 | 68 | name = '%s/%s' % (self._name, '/'.join([str(arg) for arg in args])) 69 | return _Callable(self._gh, name) 70 | 71 | def __getattr__(self, attr): 72 | if attr in ['get', 'put', 'post', 'patch', 'delete']: 73 | return _Executable(self._gh, attr, self._name) 74 | 75 | name = '%s/%s' % (self._name, attr) 76 | return _Callable(self._gh, name) 77 | -------------------------------------------------------------------------------- /test/data/brave-unbreak.txt: -------------------------------------------------------------------------------- 1 | ||ntv.io^$third-party 2 | @@||creative.sonobi.com 3 | @@||adm.fwmrm.net^*/AdManager.js$domain=msnbc.com|sky.com|cnbc.com 4 | ||novately.com^$third-party 5 | ||webspectator.com^$third-party 6 | ! Would be nice to find an alt fix, but unbreaks cnet video 7 | @@||tags.tiqcdn.com/utag/*/utag.js$domain=cnet.com 8 | ! Twitch main video 9 | ||cloudfront.net/esf.js$domain=twitch.tv 10 | ! LA Times forced-whitelisting modal fix 11 | ||tribdss.com/meter/assets$script,domain=www.latimes.com 12 | ! LA Times and Chicago Tribune native ads fixes 13 | ||aggrego.org^$script,image,domain=latimes.com|chicagotribune.com 14 | ||jadserve.postrelease.com^$script,image,domain=latimes.com|chicagotribune.com 15 | ||troncdata.com^$script,image,domain=latimes.com 16 | ||polarmobile.com^$script,image,domain=latimes.com|chicagotribune.com 17 | ||ntv.io^$script,image,domain=latimes.com|chicagotribune.com 18 | ! Expressen.se and aftonbladet.set ad blocking evasion fix 19 | ||biowebb-data.s3.amazonaws.com^$script,image,domain=expressen.se|aftonbladet.se 20 | ||richmetrics.com^$script,image,domain=expressen.se|aftonbladet.se 21 | ||adtomafusion.net^$script,image,domain=expressen.se|aftonbladet.se 22 | ||ld1.lpbeta.com^$script,image,domain=expressen.se|aftonbladet.se 23 | ||csp.screen9.com^$script,image,domain=expressen.se|aftonbladet.se 24 | ||glimr.io^$script,image,domain=expressen.se|aftonbladet.se 25 | ||aka-cdn-ns.adtech.de^$script,image,domain=aftonbladet.se|expressen.se 26 | ! Hearst anti-ad blocking fix 27 | ||aps.hearstnp.com^$script,image 28 | ! Sailthru native ad aggregator fix 29 | ||ak.sail-horizon.com^$script,image 30 | ! gRPC client ad tracking data fix boston.com sfgate.com 31 | ||g.3gl.net^$domain=sfgate.com|boston.com 32 | ! 123movies.is video player display banner overlay fix 33 | ||123clouds.ru/*/custombanner.js^$script,domain=123movies.is 34 | ! vendors serving video ads and tracking via proxied requests 35 | ||track.atom-data.io^$third-party 36 | ||vidazoo.com/aggregate^$third-party 37 | ||vidazoo.com/proxy^$third-party 38 | ||vidible.tv^$third-party 39 | ||mediabong.net^$third-party 40 | ||imprvdosrv.com^$third-party 41 | ! yt embed exceptions 42 | @@||youtube.com/yts/jsbin^$domain=thegatewaypundit.com|godlikeproductions.com|techcrunch.com 43 | ! fb widget audience, ad and marketing tracking 44 | ||connect.facebook.net/*/fbevents.js$third-party 45 | ||facebook.com/tr^$image,third-party 46 | ||graph.facebook.com^$third-party 47 | ! theatlantic.com anti-blocker filters 48 | ||theatlantic.blueconic.net$domain=theatlantic.com 49 | ||theatlantic.com/please-support-us^ 50 | -------------------------------------------------------------------------------- /test/rule_types_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "./CppUnitLite/TestHarness.h" 15 | #include "./ad_block_client.h" 16 | #include "./util.h" 17 | 18 | using std::set; 19 | using std::string; 20 | 21 | bool testComment(const char *rawFilter) { 22 | Filter filter; 23 | parseFilter(rawFilter, &filter); 24 | return filter.filterType == FTComment; 25 | } 26 | 27 | TEST(ruleTypes, commentRules) { 28 | set commentRules { 29 | "[Adblock Plus 2.0]", 30 | "! Checksum: nVIXktYXKU6M+cu+Txkhuw", 31 | "!/cb.php?sub$script,third-party", 32 | "!@@/cb.php?sub", 33 | "!###ADSLOT_SKYSCRAPER", 34 | "! *** easylist:easylist/easylist_whitelist_general_hide.txt ***", 35 | " !###ADSLOT_SKYSCRAPER", 36 | }; 37 | 38 | std::for_each(commentRules.begin(), commentRules.end(), 39 | [this, &result_](string const &s) { 40 | CHECK(testComment(s.c_str())); 41 | }); 42 | } 43 | 44 | bool testElementHidingRule(const char *rawFilter, bool exception) { 45 | Filter filter; 46 | parseFilter(rawFilter, &filter); 47 | if (exception) { 48 | return filter.filterType == FTElementHidingException; 49 | } 50 | return filter.filterType == FTElementHiding; 51 | } 52 | 53 | TEST(ruleTypes, elementHidingRules) { 54 | set elementHidingRules { 55 | "@@###ADSLOT_SKYSCRAPER", 56 | " ###ADSLOT_SKYSCRAPER", 57 | "###ADSLOT_SKYSCRAPER", 58 | "##.adsBox", 59 | "##a[href^=\"http://affiliate.sometracker.com/\"]", 60 | }; 61 | 62 | std::for_each(elementHidingRules.begin(), elementHidingRules.end(), 63 | [this, &result_](string const &s) { 64 | CHECK(testElementHidingRule(s.c_str(), false)); 65 | }); 66 | } 67 | 68 | TEST(ruleTypes, elementHidingExceptionRules) { 69 | set elementHidingExceptionRules { 70 | "eee.se#@##adspace_top", 71 | "domain1.com,domain2.com#@##adwrapper", 72 | "edgesuitedomain.net#@##ad-unit", 73 | "mydomain.com#@#.ad-unit", 74 | }; 75 | 76 | std::for_each(elementHidingExceptionRules.begin(), 77 | elementHidingExceptionRules.end(), [this, &result_](string const &s) { 78 | CHECK(testElementHidingRule(s.c_str(), true)); 79 | }); 80 | } 81 | -------------------------------------------------------------------------------- /test/protocol_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "./protocol.h" 15 | #include "./CppUnitLite/TestHarness.h" 16 | #include "./CppUnitLite/Test.h" 17 | #include "./util.h" 18 | 19 | // Testing isBlockableProtocol 20 | TEST(isBlockableProtocol, basic) { 21 | // Each of the below should be valid, since they're urls of protocols 22 | // we block against 23 | CHECK(isBlockableProtocol("http://example.com", 19)); 24 | CHECK(isBlockableProtocol("https://example.com", 20)); 25 | CHECK(isBlockableProtocol("ws://example.com/path", 22)); 26 | CHECK(isBlockableProtocol("wss://example.com/path", 23)); 27 | 28 | // Each of the following should also be valid, since we ignore "blob" 29 | // pre-prefixes on URLs. 30 | CHECK(isBlockableProtocol("blob:http://example.com", 24)); 31 | CHECK(isBlockableProtocol("blob:https://example.com", 25)); 32 | CHECK(isBlockableProtocol("blob:ws://example.com/path", 27)); 33 | CHECK(isBlockableProtocol("blob:wss://example.com/path", 28)); 34 | 35 | // We don't care about case either, so each of the following should also 36 | // pass. 37 | CHECK(isBlockableProtocol("hTtp://example.com", 19)); 38 | CHECK(isBlockableProtocol("htTPs://example.com", 20)); 39 | CHECK(isBlockableProtocol("WS://example.com/path", 22)); 40 | CHECK(isBlockableProtocol("WSS://example.com/path", 23)); 41 | 42 | // Each of the following should fail, since they are each non supported / 43 | // blockable protocols. 44 | 45 | // To short example 46 | CHECK(isBlockableProtocol("htt", 3) == false); 47 | 48 | // Bad protocol example. 49 | CHECK(isBlockableProtocol("htttp://example.com", 25) == false); 50 | 51 | // No protocol example 52 | CHECK(isBlockableProtocol("example.com", 12) == false); 53 | 54 | // PNG data url example (not a valid image)[ 55 | const char *pngPixel = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEA=="; 56 | int pngPixelLen = strlen(pngPixel); 57 | CHECK(isBlockableProtocol(pngPixel, pngPixelLen) == false); 58 | 59 | // SVG data url example 60 | const char *svgUrl = "data:image/svg+xml;utf8,"; 62 | int svgUrlLen = strlen(svgUrl); 63 | CHECK(isBlockableProtocol(svgUrl, svgUrlLen) == false); 64 | } 65 | -------------------------------------------------------------------------------- /no_fingerprint_domain.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include "./no_fingerprint_domain.h" 7 | 8 | #include 9 | #include 10 | 11 | #include "hashFn.h" 12 | 13 | static HashFn h(19); 14 | 15 | NoFingerprintDomain::NoFingerprintDomain() : 16 | borrowed_data(false), 17 | data(nullptr), 18 | dataLen(-1) { 19 | } 20 | 21 | NoFingerprintDomain::NoFingerprintDomain(const NoFingerprintDomain &other) { 22 | borrowed_data = other.borrowed_data; 23 | dataLen = other.dataLen; 24 | if (other.dataLen == -1 && other.data) { 25 | dataLen = static_cast(strlen(other.data)); 26 | } 27 | 28 | if (other.borrowed_data) { 29 | data = other.data; 30 | } else { 31 | if (other.data) { 32 | data = new char[dataLen]; 33 | memcpy(data, other.data, dataLen); 34 | } else { 35 | data = nullptr; 36 | } 37 | } 38 | } 39 | 40 | NoFingerprintDomain::NoFingerprintDomain(const char * data, int dataLen) : 41 | borrowed_data(true), data(const_cast(data)), 42 | dataLen(dataLen) { 43 | } 44 | 45 | NoFingerprintDomain::~NoFingerprintDomain() { 46 | if (borrowed_data) { 47 | return; 48 | } 49 | if (data) { 50 | delete[] data; 51 | } 52 | } 53 | 54 | uint64_t NoFingerprintDomain::hash() const { 55 | if (!data) { 56 | return 0; 57 | } 58 | return h(data, dataLen); 59 | } 60 | 61 | uint32_t NoFingerprintDomain::Serialize(char *buffer) { 62 | uint32_t totalSize = 0; 63 | char sz[64]; 64 | uint32_t dataLenSize = 1 + snprintf(sz, sizeof(sz), 65 | "%xx", dataLen); 66 | if (buffer) { 67 | memcpy(buffer + totalSize, sz, dataLenSize); 68 | } 69 | totalSize += dataLenSize; 70 | if (buffer) { 71 | memcpy(buffer + totalSize, data, dataLen); 72 | } 73 | totalSize += dataLen; 74 | 75 | totalSize += 1; 76 | 77 | return totalSize; 78 | } 79 | 80 | uint32_t NoFingerprintDomain::Deserialize(char *buffer, uint32_t bufferSize) { 81 | dataLen = 0; 82 | sscanf(buffer, "%x", &dataLen); 83 | uint32_t consumed = static_cast(strlen(buffer)) + 1; 84 | if (consumed + dataLen >= bufferSize) { 85 | return 0; 86 | } 87 | data = buffer + consumed; 88 | consumed += dataLen; 89 | borrowed_data = true; 90 | return consumed; 91 | } 92 | 93 | bool NoFingerprintDomain::operator==(const NoFingerprintDomain &rhs) const { 94 | if (dataLen != rhs.dataLen) { 95 | return false; 96 | } 97 | if (dataLen == 0) { 98 | return true; 99 | } 100 | return !memcmp(data, rhs.data, dataLen); 101 | } 102 | -------------------------------------------------------------------------------- /bad_fingerprint.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef BAD_FINGERPRINT_H_ 7 | #define BAD_FINGERPRINT_H_ 8 | 9 | #include 10 | #include 11 | #include "./hash_set.h" 12 | 13 | #ifdef PERF_STATS 14 | #include 15 | #endif 16 | 17 | class BadFingerprint { 18 | public: 19 | uint64_t GetHash() const { 20 | return 0; 21 | } 22 | 23 | ~BadFingerprint() { 24 | if (data) { 25 | delete[] data; 26 | } 27 | } 28 | explicit BadFingerprint(const char *data) { 29 | size_t len = strlen(data) + 1; 30 | this->data = new char[len]; 31 | snprintf(this->data, len, "%s", data); 32 | } 33 | 34 | BadFingerprint(const BadFingerprint &rhs) { 35 | data = new char[strlen(rhs.data) + 1]; 36 | memcpy(data, rhs.data, strlen(rhs.data) + 1); 37 | } 38 | 39 | BadFingerprint() : data(nullptr) { 40 | } 41 | 42 | bool operator==(const BadFingerprint &rhs) const { 43 | return !strcmp(data, rhs.data); 44 | } 45 | 46 | bool operator!=(const BadFingerprint &rhs) const { 47 | return !(*this == rhs); 48 | } 49 | 50 | // Nothing needs to be updated for multiple adds 51 | void Update(const BadFingerprint &) {} 52 | 53 | uint32_t Serialize(char *buffer) { 54 | if (buffer) { 55 | memcpy(buffer, data, strlen(data) + 1); 56 | } 57 | return static_cast(strlen(data)) + 1; 58 | } 59 | 60 | uint32_t Deserialize(char *buffer, uint32_t bufferSize) { 61 | uint32_t len = static_cast(strlen(buffer)); 62 | data = new char[len + 1]; 63 | memcpy(data, buffer, len + 1); 64 | return len + 1; 65 | } 66 | 67 | char *data; 68 | }; 69 | 70 | class BadFingerprintsHashSet : public HashSet { 71 | public: 72 | BadFingerprintsHashSet() : HashSet(1, false) { 73 | } 74 | 75 | void generateHeader(const char *filename) { 76 | #ifdef PERF_STATS 77 | std::ofstream outFile; 78 | outFile.open(filename); 79 | 80 | outFile << "#pragma once\n"; 81 | outFile << "/**\n *\n * Auto generated bad filters\n */\n"; 82 | outFile << "const char *badFingerprints[] = {\n"; 83 | for (uint32_t bucket_index = 0; bucket_index < bucket_count_; 84 | bucket_index++) { 85 | HashItem *hashItem = buckets_[bucket_index]; 86 | while (hashItem) { 87 | BadFingerprint *badFingerprint = hashItem->hash_item_storage_; 88 | outFile << "\"" << badFingerprint->data << "\"," << std::endl; 89 | hashItem = hashItem->next_; 90 | } 91 | } 92 | outFile << "};\n" << std::endl; 93 | outFile << "const char *badSubstrings[] = {\"http\", \"www\" };" 94 | << std::endl; 95 | outFile.close(); 96 | #endif 97 | } 98 | }; 99 | 100 | #endif // BAD_FINGERPRINT_H_ 101 | -------------------------------------------------------------------------------- /scripts/uploadDataFiles.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | const fs = require('fs') 6 | const s3 = require('s3') 7 | const commander = require('commander') 8 | const path = require('path') 9 | const dataFileVersion = 4 10 | 11 | const client = s3.createClient({ 12 | maxAsyncS3: 20, 13 | s3RetryCount: 3, 14 | s3RetryDelay: 1000, 15 | multipartUploadThreshold: 20971520, 16 | multipartUploadSize: 15728640, 17 | // See: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/Config.html#constructor-property 18 | s3Options: {} 19 | }) 20 | 21 | const uploadFile = (key, filePath, filename) => { 22 | return new Promise((resolve, reject) => { 23 | var params = { 24 | localFile: filePath, 25 | // See: http://docs.aws.amazon.com/AWSJavaScriptSDK/latest/AWS/S3.html#putObject-property 26 | s3Params: { 27 | Bucket: 'adblock-data', 28 | Key: `${key}/${filename}`, 29 | ACL: 'public-read' 30 | } 31 | } 32 | var uploader = client.uploadFile(params) 33 | process.stdout.write(`Started uploading to: ${params.s3Params.Key}... `) 34 | uploader.on('error', function (err) { 35 | console.error('Unable to upload:', err.stack, 'Do you have ~/.aws/credentials filled out?') 36 | reject() 37 | }) 38 | uploader.on('end', function (params) { 39 | console.log('completed') 40 | resolve() 41 | }) 42 | }) 43 | } 44 | 45 | commander 46 | .option('-d, --dat [dat]', 'file path of the adblock .dat file to upload') 47 | .option('-p, --prod', 'whether the upload is for prod, if not specified uploads to the test location') 48 | .parse(process.argv) 49 | 50 | // Queue up all the uploads one at a time to easily spot errors 51 | let p = Promise.resolve() 52 | const date = new Date().toISOString().split('.')[0] 53 | 54 | if (commander.dat) { 55 | if (commander.prod) { 56 | p = p.then(uploadFile.bind(null, dataFileVersion, commander.dat, path.basename(commander.dat))) 57 | p = p.then(uploadFile.bind(null, `backups/${date}`, commander.dat, path.basename(commander.dat))) 58 | } else { 59 | p = p.then(uploadFile.bind(null, 'test', commander.dat, path.basename(commander.dat))) 60 | } 61 | } else { 62 | const dataFilenames = fs.readdirSync('out') 63 | dataFilenames.forEach((filename) => { 64 | if (commander.prod) { 65 | p = p.then(uploadFile.bind(null, dataFileVersion, `out/${filename}`, filename)) 66 | p = p.then(uploadFile.bind(null, `backups/${date}`, `out/${filename}`, filename)) 67 | } else { 68 | p = p.then(uploadFile.bind(null, `test/${dataFileVersion}`, `out/${filename}`, filename)) 69 | } 70 | }) 71 | } 72 | p = p.catch((e) => { 73 | console.error('A problem was encounterd', e) 74 | process.exit(1) 75 | }) 76 | 77 | process.on('uncaughtException', (err) => { 78 | console.error('Caught exception:', err) 79 | process.exit(1) 80 | }) 81 | 82 | process.on('unhandledRejection', (err) => { 83 | console.error('Unhandled rejection:', err) 84 | process.exit(1) 85 | }) 86 | -------------------------------------------------------------------------------- /test/js/filterListTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it */ 5 | 6 | const assert = require('assert') 7 | const {adBlockLists} = require('../..') 8 | 9 | describe('adBlockLists', function () { 10 | describe('default', function () { 11 | it('contains 4 default lists', function () { 12 | assert.equal(adBlockLists.default.length, 5) 13 | }) 14 | it('has uuid property', function () { 15 | adBlockLists.default.forEach((list) => { 16 | assert(!!list.uuid) 17 | }) 18 | }) 19 | it('does not have langs property', function () { 20 | adBlockLists.default.forEach((list) => { 21 | assert(!list.langs) 22 | }) 23 | }) 24 | it('has url property', function () { 25 | adBlockLists.default.forEach((list) => { 26 | assert(!!list.listURL) 27 | }) 28 | }) 29 | it('has title property', function () { 30 | adBlockLists.default.forEach((list) => { 31 | assert(!!list.title) 32 | }) 33 | }) 34 | it('has supportURL property', function () { 35 | adBlockLists.default.forEach((list) => { 36 | assert(!!list.supportURL) 37 | }) 38 | }) 39 | }) 40 | describe('malware', function () { 41 | it('contains 2 malware lists', function () { 42 | assert.equal(adBlockLists.malware.length, 2) 43 | }) 44 | it('does not have langs property', function () { 45 | adBlockLists.malware.forEach((list) => { 46 | assert(!list.langs) 47 | }) 48 | }) 49 | it('has uuid property', function () { 50 | adBlockLists.malware.forEach((list) => { 51 | assert(!!list.uuid) 52 | }) 53 | }) 54 | it('has url property', function () { 55 | adBlockLists.malware.forEach((list) => { 56 | assert(!!list.listURL) 57 | }) 58 | }) 59 | it('has title property', function () { 60 | adBlockLists.malware.forEach((list) => { 61 | assert(!!list.title) 62 | }) 63 | }) 64 | it('has supportURL property', function () { 65 | adBlockLists.malware.forEach((list) => { 66 | assert(!!list.supportURL) 67 | }) 68 | }) 69 | }) 70 | describe('regions', function () { 71 | it('contains multiple region lists', function () { 72 | assert(adBlockLists.regions.length > 0) 73 | }) 74 | it('has uuid property', function () { 75 | adBlockLists.malware.forEach((list) => { 76 | assert(!!list.uuid) 77 | }) 78 | }) 79 | it('has langs array property', function () { 80 | assert(adBlockLists.regions.some((list) => !!list.langs)) 81 | }) 82 | it('has url property', function () { 83 | adBlockLists.regions.forEach((list) => { 84 | assert(!!list.listURL) 85 | }) 86 | }) 87 | it('has title property', function () { 88 | adBlockLists.regions.forEach((list) => { 89 | assert(!!list.title) 90 | }) 91 | }) 92 | it('has supportURL property', function () { 93 | adBlockLists.regions.forEach((list) => { 94 | assert(!!list.supportURL) 95 | }) 96 | }) 97 | }) 98 | }) 99 | -------------------------------------------------------------------------------- /cosmetic_filter.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef COSMETIC_FILTER_H_ 7 | #define COSMETIC_FILTER_H_ 8 | 9 | #include 10 | #include 11 | #include "./hash_set.h" 12 | 13 | class CosmeticFilter { 14 | public: 15 | uint64_t hash() const; 16 | uint64_t GetHash() const { 17 | return hash(); 18 | } 19 | 20 | ~CosmeticFilter() { 21 | if (data) { 22 | delete[] data; 23 | } 24 | } 25 | explicit CosmeticFilter(const char *data) { 26 | size_t len = strlen(data) + 1; 27 | this->data = new char[len]; 28 | snprintf(this->data, len, "%s", data); 29 | } 30 | 31 | CosmeticFilter(const CosmeticFilter &rhs) { 32 | data = new char[strlen(rhs.data) + 1]; 33 | memcpy(data, rhs.data, strlen(rhs.data) + 1); 34 | } 35 | 36 | CosmeticFilter() : data(nullptr) { 37 | } 38 | 39 | bool operator==(const CosmeticFilter &rhs) const { 40 | return !strcmp(data, rhs.data); 41 | } 42 | 43 | bool operator!=(const CosmeticFilter &rhs) const { 44 | return !(*this == rhs); 45 | } 46 | 47 | // Nothing needs to be updated for multiple adds 48 | void Update(const CosmeticFilter &) {} 49 | 50 | uint32_t Serialize(char *buffer) { 51 | if (buffer) { 52 | memcpy(buffer, data, strlen(data) + 1); 53 | } 54 | return static_cast(strlen(data)) + 1; 55 | } 56 | 57 | uint32_t Deserialize(char *buffer, uint32_t bufferSize) { 58 | int len = static_cast(strlen(buffer)); 59 | data = new char[len + 1]; 60 | memcpy(data, buffer, len + 1); 61 | return len + 1; 62 | } 63 | 64 | char *data; 65 | }; 66 | 67 | class CosmeticFilterHashSet : public HashSet { 68 | public: 69 | CosmeticFilterHashSet() : HashSet(1000, false) { 70 | } 71 | char * toStylesheet(uint32_t *len) { 72 | *len = fillStylesheetBuffer(nullptr); 73 | char *buffer = new char[*len]; 74 | memset(buffer, 0, *len); 75 | fillStylesheetBuffer(buffer); 76 | return buffer; 77 | } 78 | 79 | private: 80 | uint32_t fillStylesheetBuffer(char *buffer) { 81 | uint32_t len = 0; 82 | for (uint32_t bucketIndex = 0; bucketIndex < bucket_count_; bucketIndex++) { 83 | HashItem *hashItem = buckets_[bucketIndex]; 84 | len = 0; 85 | while (hashItem) { 86 | CosmeticFilter *cosmeticFilter = hashItem->hash_item_storage_; 87 | // [cosmeticFilter],[space] 88 | int cosmeticFilterLen = 89 | static_cast(strlen(cosmeticFilter->data)); 90 | if (buffer) { 91 | memcpy(buffer + len, cosmeticFilter->data, cosmeticFilterLen); 92 | } 93 | len += cosmeticFilterLen; 94 | if (hashItem->next_) { 95 | if (buffer) { 96 | memcpy(buffer + len, ", ", 2); 97 | } 98 | len += 2; 99 | } 100 | hashItem = hashItem->next_; 101 | } 102 | } 103 | return len; 104 | } 105 | }; 106 | 107 | #endif // COSMETIC_FILTER_H_ 108 | -------------------------------------------------------------------------------- /test/js/parsingTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, it, before */ 5 | 6 | const assert = require('assert') 7 | const fs = require('fs') 8 | const {makeAdBlockClientFromString} = require('../../lib/util') 9 | const {FilterOptions} = require('../..') 10 | 11 | describe('parsing', function () { 12 | describe('newlines', function () { 13 | before(function () { 14 | const data = fs.readFileSync('./test/data/easylist.txt', 'utf8') 15 | this.rawData = data.replace(/\r/g, '').split('\n').slice(0, 100).join('\n') 16 | this.matchArgs = ['http://www.brianbondy.com/public/ad/adbanner.gif&ad_box_=1&ad_type=3', FilterOptions.image, 'slashdot.org'] 17 | }) 18 | 19 | it('\\r newline is handled the same as \\n', function (cb) { 20 | Promise.all([ 21 | makeAdBlockClientFromString(this.rawData.replace(/\n/g, '\r')), 22 | makeAdBlockClientFromString(this.rawData) 23 | ]).then(([client1, client2]) => { 24 | const buffer1 = client1.serialize() 25 | const buffer2 = client2.serialize() 26 | assert.equal(buffer1.length, buffer2.length) 27 | assert(buffer2.toString() === buffer1.toString().replace(/\n/g, '\r')) 28 | assert(client1.matches(...this.matchArgs)) 29 | assert(client2.matches(...this.matchArgs)) 30 | cb() 31 | }).catch((e) => { 32 | console.log(e) 33 | assert(false) 34 | }) 35 | }) 36 | 37 | it('\\r\\n newline is handled the same as \\n', function (cb) { 38 | Promise.all([ 39 | makeAdBlockClientFromString(this.rawData.replace(/\n/g, '\r\n')), 40 | makeAdBlockClientFromString(this.rawData) 41 | ]).then(([client1, client2]) => { 42 | const buffer1 = client1.serialize() 43 | const buffer2 = client2.serialize() 44 | assert.equal(buffer1.length, buffer2.length) 45 | assert(buffer2.toString() === buffer1.toString().replace(/\n/g, '\r')) 46 | assert(client1.matches(...this.matchArgs)) 47 | assert(client2.matches(...this.matchArgs)) 48 | cb() 49 | }).catch((e) => { 50 | console.log(e) 51 | assert(false) 52 | }) 53 | }) 54 | }) 55 | describe('single chararacters', function () { 56 | it('with \'/\'', function (cb) { 57 | makeAdBlockClientFromString('/').then((client) => { 58 | assert(client.matches('http://www.brianbondy.com/a', FilterOptions.image, 'slashdot.org')) 59 | cb() 60 | }) 61 | }) 62 | 63 | it('with normal char \'a\'', function (cb) { 64 | makeAdBlockClientFromString('a').then((client) => { 65 | assert(client.matches('http://www.brianbondy.com/', FilterOptions.image, 'slashdot.org')) 66 | assert(!client.matches('http://www.zzz.com/', FilterOptions.image, 'slashdot.org')) 67 | cb() 68 | }) 69 | }) 70 | 71 | it('does not crash with unfinshed rules', function (cb) { 72 | Promise.all([ 73 | makeAdBlockClientFromString('a'), 74 | makeAdBlockClientFromString('\r'), 75 | makeAdBlockClientFromString('\n'), 76 | makeAdBlockClientFromString('\t'), 77 | makeAdBlockClientFromString(' '), 78 | makeAdBlockClientFromString('|'), 79 | makeAdBlockClientFromString('@'), 80 | makeAdBlockClientFromString('!'), 81 | makeAdBlockClientFromString('['), 82 | makeAdBlockClientFromString('$'), 83 | makeAdBlockClientFromString('#'), 84 | makeAdBlockClientFromString('/'), 85 | makeAdBlockClientFromString('.'), 86 | makeAdBlockClientFromString('^') 87 | ]).then(() => { 88 | cb() 89 | }) 90 | }) 91 | }) 92 | }) 93 | -------------------------------------------------------------------------------- /perf.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "./ad_block_client.h" 16 | #include "./bad_fingerprint.h" 17 | 18 | using std::string; 19 | using std::cout; 20 | using std::endl; 21 | 22 | string getFileContents(const char *filename) { 23 | std::ifstream in(filename, std::ios::in); 24 | if (in) { 25 | std::ostringstream contents; 26 | contents << in.rdbuf(); 27 | in.close(); 28 | return(contents.str()); 29 | } 30 | throw(errno); 31 | } 32 | 33 | void doSiteList(AdBlockClient *pClient, bool outputPerf) { 34 | AdBlockClient &client = *pClient; 35 | std::string && siteList = getFileContents("./test/data/sitelist.txt"); 36 | std::stringstream ss(siteList); 37 | std::istream_iterator begin(ss); 38 | std::istream_iterator end; 39 | std::vector sites(begin, end); 40 | 41 | // This is the site who's URLs are being checked, not the domain of 42 | // the URL being checked. 43 | const char *currentPageDomain = "brianbondy.com"; 44 | 45 | int numBlocks = 0; 46 | int numSkips = 0; 47 | const clock_t beginTime = clock(); 48 | std::for_each(sites.begin(), sites.end(), [&client, currentPageDomain, 49 | &numBlocks, &numSkips](std::string const &urlToCheck) { 50 | if (client.matches(urlToCheck.c_str(), FONoFilterOption, 51 | currentPageDomain)) { 52 | ++numBlocks; 53 | } else { 54 | ++numSkips; 55 | } 56 | }); 57 | if (outputPerf) { 58 | cout << "Time: " << float(clock() - beginTime) 59 | / CLOCKS_PER_SEC << "s" << endl; 60 | cout << "num blocks: " << numBlocks << ", num skips: " << numSkips << endl; 61 | cout << "False Positives: " << client.numFalsePositives 62 | << ", exception false positives: " 63 | << client.numExceptionFalsePositives << endl; 64 | cout << "Bloom filter saves: " << client.numBloomFilterSaves 65 | << ", exception bloom filter saves: " 66 | << client.numExceptionBloomFilterSaves << endl; 67 | } 68 | } 69 | 70 | int main(int argc, char**argv) { 71 | std::string && easyListTxt = 72 | getFileContents("./test/data/easylist.txt"); 73 | std::string && easyPrivacyTxt = 74 | getFileContents("./test/data/easyprivacy.txt"); 75 | std::string && braveUnblockTxt = 76 | getFileContents("./test/data/brave-unbreak.txt"); 77 | std::string && ublockUnblockTxt = 78 | getFileContents("./test/data/ublock-unbreak.txt"); 79 | std::string && spam404MainBlacklistTxt = 80 | getFileContents("./test/data/spam404-main-blacklist.txt"); 81 | std::string && disconnectSimpleMalwareTxt = 82 | getFileContents("./test/data/disconnect-simple-malware.txt"); 83 | 84 | cout << endl 85 | << "-------------\n" 86 | << " AD BLOCK \n" 87 | << "-------------\n" 88 | << endl; 89 | 90 | AdBlockClient adBlockClient; 91 | adBlockClient.parse(easyListTxt.c_str()); 92 | adBlockClient.parse(easyPrivacyTxt.c_str()); 93 | adBlockClient.parse(ublockUnblockTxt.c_str()); 94 | adBlockClient.parse(braveUnblockTxt.c_str()); 95 | doSiteList(&adBlockClient, true); 96 | 97 | cout << endl 98 | << "-------------\n" 99 | << "SAFE BROWSING\n" 100 | << "-------------\n" 101 | << endl; 102 | 103 | AdBlockClient safeBrowsingClient; 104 | safeBrowsingClient.parse(spam404MainBlacklistTxt.c_str()); 105 | safeBrowsingClient.parse(disconnectSimpleMalwareTxt.c_str()); 106 | doSiteList(&safeBrowsingClient, true); 107 | 108 | cout << endl 109 | << "-------------\n" 110 | << "generating bad fingerprints list" 111 | << endl; 112 | 113 | AdBlockClient allClient; 114 | allClient.enableBadFingerprintDetection(); 115 | allClient.parse(easyListTxt.c_str()); 116 | allClient.parse(easyPrivacyTxt.c_str()); 117 | allClient.parse(ublockUnblockTxt.c_str()); 118 | allClient.parse(braveUnblockTxt.c_str()); 119 | allClient.parse(spam404MainBlacklistTxt.c_str()); 120 | allClient.parse(disconnectSimpleMalwareTxt.c_str()); 121 | doSiteList(&allClient, false); 122 | allClient.badFingerprintsHashSet->generateHeader("bad_fingerprints.h"); 123 | 124 | return 0; 125 | } 126 | -------------------------------------------------------------------------------- /main.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "./ad_block_client.h" 14 | 15 | using std::cout; 16 | using std::endl; 17 | using std::string; 18 | 19 | string getFileContents(const char *filename) { 20 | std::ifstream in(filename, std::ios::in); 21 | if (in) { 22 | std::ostringstream contents; 23 | contents << in.rdbuf(); 24 | in.close(); 25 | return(contents.str()); 26 | } 27 | throw(errno); 28 | } 29 | 30 | void writeFile(const char *filename, const char *buffer, int length) { 31 | std::ofstream outFile(filename, std::ios::out | std::ios::binary); 32 | if (outFile) { 33 | outFile.write(buffer, length); 34 | outFile.close(); 35 | return; 36 | } 37 | throw(errno); 38 | } 39 | 40 | int checkForClient(AdBlockClient *pClient, const char *outputPath, 41 | const std::vector &urlsToCheck) { 42 | AdBlockClient &client = *pClient; 43 | 44 | // This is the site who's URLs are being checked, not the domain of the 45 | // URL being checked. 46 | const char *currentPageDomain = "slashdot.org"; 47 | 48 | 49 | // Do the checks 50 | std::for_each(urlsToCheck.begin(), urlsToCheck.end(), 51 | [&client, currentPageDomain](std::string const &urlToCheck) { 52 | if (client.matches(urlToCheck.c_str(), 53 | FONoFilterOption, currentPageDomain)) { 54 | cout << urlToCheck << ": You should block this URL!" << endl; 55 | } else { 56 | cout << urlToCheck << ": You should NOT block this URL!" << endl; 57 | } 58 | }); 59 | 60 | int size; 61 | // This buffer is allocate on the heap, you must call delete[] when 62 | // you're done using it. 63 | char *buffer = client.serialize(&size); 64 | writeFile(outputPath, buffer, size); 65 | 66 | AdBlockClient client2; 67 | // Deserialize uses the buffer directly for subsequent matches, do not free 68 | // until all matches are done. 69 | if (!client2.deserialize(buffer)) { 70 | cout << "Could not deserialize"; 71 | delete[] buffer; 72 | return 0; 73 | } 74 | // Prints the same as client.matches would 75 | std::for_each(urlsToCheck.begin(), urlsToCheck.end(), 76 | [&client2, currentPageDomain](std::string const &urlToCheck) { 77 | if (client2.matches(urlToCheck.c_str(), 78 | FONoFilterOption, currentPageDomain)) { 79 | cout << urlToCheck << ": You should block this URL!" << endl; 80 | } else { 81 | cout << urlToCheck << ": You should NOT block this URL!" << endl; 82 | } 83 | }); 84 | delete[] buffer; 85 | return 0; 86 | } 87 | 88 | 89 | int main(int argc, char**argv) { 90 | std::string && easyListTxt = getFileContents("./test/data/easylist.txt"); 91 | std::string && easyPrivacyTxt = 92 | getFileContents("./test/data/easyprivacy.txt"); 93 | std::string && ublockUnblockTxt = 94 | getFileContents("./test/data/ublock-unbreak.txt"); 95 | std::string && braveUnblockTxt = 96 | getFileContents("./test/data/brave-unbreak.txt"); 97 | std::string && spam404MainBlacklistTxt = 98 | getFileContents("./test/data/spam404-main-blacklist.txt"); 99 | std::string && disconnectSimpleMalwareTxt = 100 | getFileContents("./test/data/disconnect-simple-malware.txt"); 101 | 102 | // Parse filter lists for adblock 103 | AdBlockClient adBlockClient; 104 | adBlockClient.parse(easyListTxt.c_str()); 105 | adBlockClient.parse(easyPrivacyTxt.c_str()); 106 | adBlockClient.parse(ublockUnblockTxt.c_str()); 107 | adBlockClient.parse(braveUnblockTxt.c_str()); 108 | std::vector checkVector; 109 | checkVector.push_back( 110 | "http://pagead2.googlesyndication.com/pagead/show_ads.js"); 111 | checkVector.push_back( 112 | "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html"); 113 | checkVector.push_back( 114 | "http://www.googletagservices.com/tag/js/gpt_mobile.js"); 115 | checkVector.push_back("http://www.brianbondy.com"); 116 | checkForClient(&adBlockClient, "./ABPFilterClientData.dat", checkVector); 117 | 118 | // Parse filter lists for malware 119 | AdBlockClient malwareClient; 120 | malwareClient.parse(spam404MainBlacklistTxt.c_str()); 121 | malwareClient.parse(disconnectSimpleMalwareTxt.c_str()); 122 | std::vector checkVector2; 123 | checkVector2.push_back("http://freexblcode.com/test"); 124 | checkVector2.push_back("https://malware-check.disconnect.me"); 125 | checkVector2.push_back("http://www.brianbondy.com"); 126 | checkForClient(&malwareClient, "./SafeBrowsingData.dat", checkVector2); 127 | 128 | return 0; 129 | } 130 | -------------------------------------------------------------------------------- /scripts/generateDataFiles.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | const {AdBlockClient, FilterOptions} = require('..') 6 | const path = require('path') 7 | const fs = require('fs') 8 | const request = require('request') 9 | const braveUnbreakPath = './test/data/brave-unbreak.txt' 10 | const {getListBufferFromURL, getListFilterFunction} = require('../lib/util') 11 | const {adBlockLists} = require('..') 12 | 13 | let totalExceptionFalsePositives = 0 14 | let totalNumFalsePositives = 0 15 | let totalTime = 0 16 | 17 | const generateDataFileFromString = (filterRuleData, outputDATFilename) => { 18 | const client = new AdBlockClient() 19 | if (filterRuleData.constructor === Array) { 20 | filterRuleData.forEach(filterRuleDataItem => client.parse(filterRuleDataItem)) 21 | } else { 22 | client.parse(filterRuleData) 23 | } 24 | 25 | console.log('Parsing stats:', client.getParsingStats()) 26 | client.enableBadFingerprintDetection() 27 | checkSiteList(client, top500URLList20k) 28 | client.generateBadFingerprintsHeader('bad_fingerprints.h') 29 | const serializedData = client.serialize() 30 | if (!fs.existsSync('out')) { 31 | fs.mkdirSync('./out') 32 | } 33 | fs.writeFileSync(path.join('out', outputDATFilename), serializedData) 34 | } 35 | 36 | const generateDataFileFromURL = (listURL, outputDATFilename, filter) => { 37 | return new Promise((resolve, reject) => { 38 | console.log(`${listURL}...`) 39 | request.get(listURL, function (error, response, body) { 40 | if (error) { 41 | console.error(`Request error: ${error}`) 42 | reject() 43 | return 44 | } 45 | if (response.statusCode !== 200) { 46 | console.error(`Error status code ${response.statusCode} returned for URL: ${listURL}`) 47 | reject() 48 | return 49 | } 50 | const braveUnbreakBody = fs.readFileSync(braveUnbreakPath, 'utf8') 51 | if (filter) { 52 | body = filter(body) 53 | } 54 | generateDataFileFromString([body, braveUnbreakBody], outputDATFilename) 55 | resolve() 56 | }) 57 | }) 58 | } 59 | 60 | const generateDataFilesForAllRegions = () => { 61 | let p = Promise.resolve() 62 | adBlockLists.regions.forEach((region) => { 63 | p = p.then(generateDataFileFromURL.bind(null, region.listURL, `${region.uuid}.dat`)) 64 | }) 65 | p = p.then(() => { 66 | console.log(`Total time: ${totalTime / 1000}s ${totalTime % 1000}ms`) 67 | console.log(`Num false positives: ${totalNumFalsePositives}`) 68 | console.log(`Num exception false positives: ${totalExceptionFalsePositives}`) 69 | }) 70 | return p 71 | } 72 | 73 | const generateDataFilesForList = (lists, filename) => { 74 | let promises = [] 75 | lists.forEach((l) => { 76 | console.log(`${l.listURL}...`) 77 | const filterFn = getListFilterFunction(l.uuid) 78 | promises.push(getListBufferFromURL(l.listURL, filterFn)) 79 | }) 80 | let p = Promise.all(promises) 81 | p = p.then((listBuffers) => { 82 | generateDataFileFromString(listBuffers, filename) 83 | }) 84 | return p 85 | } 86 | 87 | const generateDataFilesForMalware = generateDataFilesForList.bind(null, adBlockLists.malware, 'SafeBrowsingData.dat') 88 | const generateDataFilesForDefaultAdblock = generateDataFilesForList.bind(null, adBlockLists.default, 'ABPFilterParserData.dat') 89 | 90 | const checkSiteList = (client, siteList) => { 91 | const start = new Date().getTime() 92 | siteList.forEach(site => { 93 | // console.log('matches: ', client.matches(site, FilterOptions.image, 'slashdot.org')) 94 | client.matches(site, FilterOptions.noFilterOption, 'slashdot.org') 95 | }) 96 | const stats = client.getMatchingStats() 97 | console.log('Matching stats:', stats) 98 | totalNumFalsePositives += stats.numFalsePositives 99 | totalExceptionFalsePositives += stats.numExceptionFalsePositives 100 | const end = new Date().getTime() 101 | const time = end - start 102 | totalTime += time 103 | console.log('done, time: ', time, 'ms') 104 | } 105 | 106 | const top500URLList20k = fs.readFileSync('./test/data/sitelist.txt', 'utf8').split('\n') 107 | // const shortURLList = fs.readFileSync('./test/data/short-sitelist.txt', 'utf8').split('\n') 108 | 109 | generateDataFilesForDefaultAdblock() 110 | .then(generateDataFilesForMalware) 111 | .then(generateDataFilesForAllRegions) 112 | .then(() => { 113 | console.log('Thank you for updating the data files, don\'t forget to upload them too!') 114 | }) 115 | .catch(() => { 116 | console.error('Something went wrong, aborting!') 117 | process.exit(1) 118 | }) 119 | 120 | process.on('uncaughtException', (err) => { 121 | console.error('Caught exception:', err) 122 | process.exit(1) 123 | }) 124 | 125 | process.on('unhandledRejection', (err) => { 126 | console.error('Unhandled rejection:', err) 127 | process.exit(1) 128 | }) 129 | -------------------------------------------------------------------------------- /ad_block_client.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef AD_BLOCK_CLIENT_H_ 7 | #define AD_BLOCK_CLIENT_H_ 8 | 9 | #include 10 | #include 11 | #include "./filter.h" 12 | 13 | class CosmeticFilter; 14 | class BloomFilter; 15 | class BadFingerprintsHashSet; 16 | class NoFingerprintDomain; 17 | 18 | template 19 | class HashSet; 20 | 21 | class AdBlockClient { 22 | public: 23 | AdBlockClient(); 24 | ~AdBlockClient(); 25 | 26 | void clear(); 27 | bool parse(const char *input); 28 | bool matches(const char *input, 29 | FilterOption contextOption = FONoFilterOption, 30 | const char *contextDomain = nullptr); 31 | bool findMatchingFilters(const char *input, 32 | FilterOption contextOption, 33 | const char *contextDomain, 34 | Filter **matchingFilter, 35 | Filter **matchingExceptionFilter); 36 | // Serializes a the parsed data and bloom filter data into a single buffer. 37 | // The returned buffer should be deleted. 38 | char * serialize(int *size, 39 | bool ignoreCosmeticFilters = true, 40 | bool ignoreHtmlFilters = true); 41 | // Deserializes the buffer, a size is not needed since a serialized. 42 | // buffer is self described 43 | bool deserialize(char *buffer); 44 | 45 | void enableBadFingerprintDetection(); 46 | const char * getDeserializedBuffer() { 47 | return deserializedBuffer; 48 | } 49 | 50 | static bool getFingerprint(char *buffer, const char *input); 51 | static bool getFingerprint(char *buffer, const Filter &f); 52 | 53 | Filter *filters; 54 | Filter *cosmeticFilters; 55 | Filter *htmlFilters; 56 | Filter *exceptionFilters; 57 | Filter *noFingerprintFilters; 58 | Filter *noFingerprintExceptionFilters; 59 | Filter *noFingerprintDomainOnlyFilters; 60 | Filter *noFingerprintAntiDomainOnlyFilters; 61 | Filter *noFingerprintDomainOnlyExceptionFilters; 62 | Filter *noFingerprintAntiDomainOnlyExceptionFilters; 63 | 64 | int numFilters; 65 | int numCosmeticFilters; 66 | int numHtmlFilters; 67 | int numExceptionFilters; 68 | int numNoFingerprintFilters; 69 | int numNoFingerprintExceptionFilters; 70 | int numNoFingerprintDomainOnlyFilters; 71 | int numNoFingerprintAntiDomainOnlyFilters; 72 | int numNoFingerprintDomainOnlyExceptionFilters; 73 | int numNoFingerprintAntiDomainOnlyExceptionFilters; 74 | int numHostAnchoredFilters; 75 | int numHostAnchoredExceptionFilters; 76 | 77 | BloomFilter *bloomFilter; 78 | BloomFilter *exceptionBloomFilter; 79 | HashSet *hostAnchoredHashSet; 80 | HashSet *hostAnchoredExceptionHashSet; 81 | HashSet *noFingerprintDomainHashSet; 82 | HashSet *noFingerprintAntiDomainHashSet; 83 | HashSet *noFingerprintDomainExceptionHashSet; 84 | HashSet *noFingerprintAntiDomainExceptionHashSet; 85 | 86 | // Used only in the perf program to create a list of bad fingerprints 87 | BadFingerprintsHashSet *badFingerprintsHashSet; 88 | 89 | // Stats kept for matching 90 | unsigned int numFalsePositives; 91 | unsigned int numExceptionFalsePositives; 92 | unsigned int numBloomFilterSaves; 93 | unsigned int numExceptionBloomFilterSaves; 94 | unsigned int numHashSetSaves; 95 | unsigned int numExceptionHashSetSaves; 96 | 97 | static const int kFingerprintSize; 98 | 99 | protected: 100 | // Determines if a passed in array of filter pointers matches for any of 101 | // the input 102 | bool hasMatchingFilters(Filter *filter, int numFilters, const char *input, 103 | int inputLen, FilterOption contextOption, const char *contextDomain, 104 | BloomFilter *inputBloomFilter, const char *inputHost, int inputHostLen, 105 | Filter **matchingFilter = nullptr); 106 | void initBloomFilter(BloomFilter**, const char *buffer, int len); 107 | template 108 | bool initHashSet(HashSet**, char *buffer, int len); 109 | char *deserializedBuffer; 110 | }; 111 | 112 | extern std::set unknownOptions; 113 | extern const char *separatorCharacters; 114 | void parseFilter(const char *input, const char *end, Filter *f, 115 | BloomFilter *bloomFilter = nullptr, 116 | BloomFilter *exceptionBloomFilter = nullptr, 117 | HashSet *hostAnchoredHashSet = nullptr, 118 | HashSet *hostAnchoredExceptionHashSet = nullptr, 119 | HashSet *simpleCosmeticFilters = nullptr); 120 | void parseFilter(const char *input, Filter *f, 121 | BloomFilter *bloomFilter = nullptr, 122 | BloomFilter *exceptionBloomFilter = nullptr, 123 | HashSet *hostAnchoredHashSet = nullptr, 124 | HashSet *hostAnchoredExceptionHashSet = nullptr, 125 | HashSet *simpleCosmeticFilters = nullptr); 126 | bool isSeparatorChar(char c); 127 | int findFirstSeparatorChar(const char *input, const char *end); 128 | 129 | #endif // AD_BLOCK_CLIENT_H_ 130 | -------------------------------------------------------------------------------- /protocol.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include "./protocol.h" 8 | 9 | // Macro for telling -Wimplicit-fallthrough that a fallthrough is intentional. 10 | #if defined(__clang__) 11 | #define FALLTHROUGH [[clang::fallthrough]] 12 | #else 13 | #define FALLTHROUGH 14 | #endif 15 | 16 | enum ProtocolParseState { 17 | ProtocolParseStateStart, 18 | ProtocolParseStateReadingBlob, 19 | ProtocolParseStatePostBlob, 20 | ProtocolParseStateReadingProtoWebSocket, 21 | ProtocolParseStateReadingProtoHTTP, 22 | ProtocolParseStatePostProto, 23 | ProtocolParseStateReadingSeperator, 24 | }; 25 | 26 | /** 27 | * Checks to see if a URL is "blockable". 28 | * 29 | * Blockable URLs are ones that use one of the following protocols (any of 30 | * which can be prefixed by "blob:") 31 | * - http 32 | * - https 33 | * - ws 34 | * - wss 35 | */ 36 | bool isBlockableProtocol(const char *url, int urlLen) { 37 | // First check to see if this is a blob URL. If the URL is very short, 38 | // then trivially it isn't of the above protocols. 39 | if (urlLen <= 5) { 40 | return false; 41 | } 42 | 43 | const char *curChar = url; 44 | int totalCharsRead = 0; 45 | int numCharsReadInState; 46 | char lowerChar; 47 | ProtocolParseState parseState = ProtocolParseStateStart; 48 | 49 | // The below loop encodes a state machine. Free transitions between states 50 | // are continues. States that consume input "break" so that the can 51 | // share the common incrementing statements at the bottom of the loop. 52 | // 53 | // Its not quite as optimized as possible (some state transitions could 54 | // be collapsed) but its written in this _slightly_ more verbose way 55 | // to make it easier to grok. 56 | while (true) { 57 | switch (parseState) { 58 | case ProtocolParseStateStart: 59 | if (tolower(*curChar) == 'b') { 60 | parseState = ProtocolParseStateReadingBlob; 61 | continue; 62 | } 63 | // Intentional fall through 64 | FALLTHROUGH; 65 | case ProtocolParseStatePostBlob: 66 | lowerChar = tolower(*curChar); 67 | if (lowerChar == 'w') { 68 | parseState = ProtocolParseStateReadingProtoWebSocket; 69 | continue; 70 | } 71 | if (lowerChar == 'h') { 72 | parseState = ProtocolParseStateReadingProtoHTTP; 73 | continue; 74 | } 75 | // If we're in ProtocolParseStateStart and didn't see "blob:", 76 | // "ws" or "http", or in ProtocolParseStatePostBlob 77 | // and don't see "ws" or "http" starting, then the URL doesn't match 78 | // any protocol we're interested in. 79 | return false; 80 | 81 | case ProtocolParseStateReadingBlob: 82 | if (tolower(*curChar) == 'b' && 83 | tolower(*(curChar + 1)) == 'l' && 84 | tolower(*(curChar + 2)) == 'o' && 85 | tolower(*(curChar + 3)) == 'b' && 86 | tolower(*(curChar + 4)) == ':') { 87 | parseState = ProtocolParseStatePostBlob; 88 | numCharsReadInState = 5; 89 | break; 90 | } 91 | // Unexpected character read when consuming "blob:" 92 | return false; 93 | 94 | case ProtocolParseStateReadingProtoHTTP: 95 | if (tolower(*curChar) == 'h' && 96 | tolower(*(curChar + 1)) == 't' && 97 | tolower(*(curChar + 2)) == 't' && 98 | tolower(*(curChar + 3)) == 'p') { 99 | parseState = ProtocolParseStatePostProto; 100 | numCharsReadInState = 4; 101 | break; 102 | } 103 | // Unexpected character read when consuming "http" 104 | return false; 105 | 106 | case ProtocolParseStateReadingProtoWebSocket: 107 | if (tolower(*curChar) == 'w' && 108 | tolower(*(curChar + 1)) == 's') { 109 | parseState = ProtocolParseStatePostProto; 110 | numCharsReadInState = 2; 111 | break; 112 | } 113 | // Unexpected character read when consuming "ws" 114 | return false; 115 | 116 | case ProtocolParseStatePostProto: 117 | if (tolower(*curChar) == 's') { 118 | parseState = ProtocolParseStateReadingSeperator; 119 | numCharsReadInState = 1; 120 | break; 121 | } 122 | FALLTHROUGH; 123 | // Intentional fall through 124 | case ProtocolParseStateReadingSeperator: 125 | if (*curChar == ':' && 126 | (*(curChar + 1)) == '/' && 127 | (*(curChar + 2)) == '/') { 128 | return true; 129 | } 130 | // Unexpected character read when consuming "://" 131 | return false; 132 | } 133 | 134 | // If we've read the entire URL and we haven't been able to determine 135 | // the protocol, then its trivially not a blockable protocol. 136 | totalCharsRead += numCharsReadInState; 137 | if (totalCharsRead >= urlLen) { 138 | return false; 139 | } 140 | curChar += numCharsReadInState; 141 | } 142 | } 143 | 144 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/brave/ad-block.svg?branch=master)](https://travis-ci.org/brave/ad-block) 2 | 3 | # Brave Ad Block 4 | 5 | Native node module, and C++ library for Adblock Plus filter parsing for lists like EasyList. 6 | 7 | It uses a bloom filter and Rabin-Karp algorithm to be super fast. 8 | 9 | ## To include brave/ad-block in your project: 10 | 11 | ``` 12 | npm install --save ad-block 13 | ``` 14 | 15 | ## JS Sample 16 | 17 | ```javascript 18 | 19 | const {AdBlockClient, FilterOptions} = require('ad-block') 20 | const client = new AdBlockClient() 21 | client.parse('/public/ad/*$domain=slashdot.org') 22 | client.parse('/public/ad3/*$script') 23 | var b1 = client.matches('http://www.brianbondy.com/public/ad/some-ad', FilterOptions.script, 'slashdot.org') 24 | var b2 = client.matches('http://www.brianbondy.com/public/ad/some-ad', FilterOptions.script, 'digg.com') 25 | console.log('public/ad/* should match b1. Actual: ', b1) 26 | console.log('public/ad/* should not match b2. Actual: ', b2) 27 | ``` 28 | 29 | ## C++ Sample 30 | 31 | ```c++ 32 | #include "ad_block_client.h" 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | using namespace std; 41 | 42 | string getFileContents(const char *filename) 43 | { 44 | ifstream in(filename, ios::in); 45 | if (in) { 46 | ostringstream contents; 47 | contents << in.rdbuf(); 48 | in.close(); 49 | return(contents.str()); 50 | } 51 | throw(errno); 52 | } 53 | 54 | void writeFile(const char *filename, const char *buffer, int length) 55 | { 56 | ofstream outFile(filename, ios::out | ios::binary); 57 | if (outFile) { 58 | outFile.write(buffer, length); 59 | outFile.close(); 60 | return; 61 | } 62 | throw(errno); 63 | } 64 | 65 | 66 | int main(int argc, char**argv) { 67 | std::string &&easyListTxt = getFileContents("./test/data/easylist.txt"); 68 | const char *urlsToCheck[] = { 69 | // ||pagead2.googlesyndication.com^$~object-subrequest 70 | "http://pagead2.googlesyndication.com/pagead/show_ads.js", 71 | // Should be blocked by: ||googlesyndication.com/safeframe/$third-party 72 | "http://tpc.googlesyndication.com/safeframe/1-0-2/html/container.html", 73 | // Should be blocked by: ||googletagservices.com/tag/js/gpt_$third-party 74 | "http://www.googletagservices.com/tag/js/gpt_mobile.js", 75 | // Shouldn't be blocked 76 | "http://www.brianbondy.com" 77 | }; 78 | 79 | // This is the site who's URLs are being checked, not the domain of the URL being checked. 80 | const char *currentPageDomain = "slashdot.org"; 81 | 82 | // Parse easylist 83 | AdBlockClient client; 84 | client.parse(easyListTxt.c_str()); 85 | 86 | // Do the checks 87 | std::for_each(urlsToCheck, urlsToCheck + sizeof(urlsToCheck) / sizeof(urlsToCheck[0]), [&client, currentPageDomain](std::string const &urlToCheck) { 88 | if (client.matches(urlToCheck.c_str(), FONoFilterOption, currentPageDomain)) { 89 | cout << urlToCheck << ": You should block this URL!" << endl; 90 | } else { 91 | cout << urlToCheck << ": You should NOT block this URL!" << endl; 92 | } 93 | }); 94 | 95 | int size; 96 | // This buffer is allocate on the heap, you must call delete[] when you're done using it. 97 | char *buffer = client.serialize(size); 98 | writeFile("./ABPFilterParserData.dat", buffer, size); 99 | 100 | AdBlockClient client2; 101 | // Deserialize uses the buffer directly for subsequent matches, do not free until all matches are done. 102 | client2.deserialize(buffer); 103 | // Prints the same as client.matches would 104 | std::for_each(urlsToCheck, urlsToCheck + sizeof(urlsToCheck) / sizeof(urlsToCheck[0]), [&client2, currentPageDomain](std::string const &urlToCheck) { 105 | if (client2.matches(urlToCheck.c_str(), FONoFilterOption, currentPageDomain)) { 106 | cout << urlToCheck << ": You should block this URL!" << endl; 107 | } else { 108 | cout << urlToCheck << ": You should NOT block this URL!" << endl; 109 | } 110 | }); 111 | delete[] buffer; 112 | return 0; 113 | } 114 | ``` 115 | 116 | 117 | ## Util for checking URLs 118 | 119 | - Basic checking a URL: 120 | `node scripts/check.js --host www.cnet.com --location https://s0.2mdn.net/instream/html5/ima3.js` 121 | - Checking a URL with discovery: 122 | `node scripts/check.js --host www.cnet.com --location "https://slashdot.org?t=1&ad_box_=2" --discover` 123 | - Checking a URL against a particular adblock list: 124 | `node scripts/check.js --uuid 03F91310-9244-40FA-BCF6-DA31B832F34D --host slashdot.org --location https://s.yimg.jp/images/ds/ult/toppage/rapidjp-1.0.0.js` 125 | - Checking a URL from a loaded DAT file: 126 | `node scripts/check.js --dat ./out/SafeBrowsingData.dat --host excellentmovies.net --location https://excellentmovies.net` 127 | - Checking a list of URLs: 128 | `node scripts/check.js --host www.cnet.com --list ./test/data/sitelist.txt` 129 | - Checking a list of URLS with discovery: 130 | `node scripts/check.js --host www.cnet.com --list ./test/data/sitelist.txt --discover` 131 | 132 | 133 | ## Developing brave/ad-block 134 | 135 | 1. Clone the git repository from GitHub: 136 | 137 | `git clone --recursive https://github.com/brave/ad-block` 138 | 139 | 2. Open the working directory: 140 | 141 | `cd ad-block` 142 | 143 | 3. Install the Node (v5+) dependencies: 144 | 145 | `npm install` 146 | 147 | 148 | ## Make the node module 149 | 150 | ``` 151 | make 152 | ``` 153 | 154 | ## Running sample (which also generates a .dat file for deserializing) 155 | 156 | ``` 157 | make sample 158 | ``` 159 | 160 | ## Running tests 161 | 162 | ``` 163 | make test 164 | ``` 165 | 166 | ## Clearing build files 167 | ``` 168 | make clean 169 | ``` 170 | -------------------------------------------------------------------------------- /scripts/check.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | /** 6 | * Example invocations: 7 | * Basic checking a URL: 8 | * node scripts/check.js --host www.cnet.com --location https://s0.2mdn.net/instream/html5/ima3.js 9 | * Checking with a particular resource type: 10 | * node scripts/check.js --host www.scrumpoker.online --location https://www.scrumpoker.online/js/angular-google-analytics.js -O script 11 | * Checking a URL with discovery: 12 | * node scripts/check.js --host www.cnet.com --location "https://slashdot.org?t=1&ad_box_=2" --discover 13 | * Checking a URL against a particular adblock list: 14 | * node scripts/check.js --uuid 03F91310-9244-40FA-BCF6-DA31B832F34D --host slashdot.org --location https://s.yimg.jp/images/ds/ult/toppage/rapidjp-1.0.0.js 15 | * Checking a URL from a loaded DAT file: 16 | * node scripts/check.js --dat ./out/SafeBrowsingData.dat --host excellentmovies.net --location https://excellentmovies.net 17 | * Checking a URL from a list URL: 18 | * node scripts/check.js --http https://easylist-downloads.adblockplus.org/easylist.txt --host excellentmovies.net --location http://simple-adblock.com/adblocktest/files/adbanner.gif 19 | * Checking a list of URLs: 20 | * node scripts/check.js --host www.cnet.com --list ./test/data/sitelist.txt 21 | * Checking a list of URLS with discovery: 22 | * node scripts/check.js --host www.cnet.com --list ./test/data/sitelist.txt --discover 23 | * Get stats for a particular adblock list: 24 | * node scripts/check.js --uuid 67F880F5-7602-4042-8A3D-01481FD7437A --stats 25 | */ 26 | const commander = require('commander') 27 | const {makeAdBlockClientFromListUUID, makeAdBlockClientFromDATFile, makeAdBlockClientFromListURL, makeAdBlockClientFromString, makeAdBlockClientFromFilePath, readSiteList} = require('../lib/util') 28 | const {FilterOptions} = require('..') 29 | 30 | const filterStringToFilterOption = (val) => FilterOptions[val] 31 | 32 | commander 33 | .option('-u, --uuid [uuid]', 'UUID of the list to use') 34 | .option('-d, --dat [dat]', 'file path of the adblock .dat file') 35 | .option('-f, --filter [filter]', 'Brave filter rules') 36 | .option('-F, --filter-path [filterPath]', 'Brave filter rules file path') 37 | .option('-w, --http [http]', 'Web filter to use') 38 | .option('-h, --host [host]', 'host of the page that is being loaded') 39 | .option('-l, --location [location]', 'URL to use for the check') 40 | .option('-o, --output [output]', 'Optionally saves a DAT file') 41 | .option('-L --list [list]', 'Filename for list of sites to check') 42 | .option('-D --discover', 'If speciied does filter discovery for matched filter') 43 | .option('-s --stats', 'If speciied outputs parsing stats') 44 | .option('-C, --cache', 'Optionally cache results and use cached results') 45 | .option('-O, --filter-option [filterOption]', 'Filter option to use', filterStringToFilterOption, FilterOptions.noFilterOption) 46 | .parse(process.argv) 47 | 48 | let p = Promise.reject('Usage: node check.js --location --host [--uuid ]') 49 | 50 | if (commander.host && (commander.location || commander.list) || commander.stats) { 51 | p.catch(() => {}) 52 | if (commander.uuid) { 53 | p = makeAdBlockClientFromListUUID(commander.uuid) 54 | } else if (commander.dat) { 55 | p = makeAdBlockClientFromDATFile(commander.dat) 56 | } else if (commander.http) { 57 | p = makeAdBlockClientFromListURL(commander.http) 58 | } else if (commander.filter) { 59 | p = makeAdBlockClientFromString(commander.filter) 60 | } else if (commander.filterPath) { 61 | p = makeAdBlockClientFromFilePath(commander.filterPath) 62 | } else { 63 | const defaultLists = require('../').adBlockLists.default 64 | .map((listObj) => listObj.listURL) 65 | p = makeAdBlockClientFromListURL(defaultLists) 66 | } 67 | } 68 | 69 | p.then((adBlockClient) => { 70 | if (commander.stats) { 71 | console.log('Parsing stats:', adBlockClient.getParsingStats()) 72 | return 73 | } 74 | if (commander.location) { 75 | console.log('params:', commander.location, commander.filterOption, commander.host) 76 | if (commander.discover) { 77 | console.log(adBlockClient.findMatchingFilters(commander.location, commander.filterOption, commander.host)) 78 | } else { 79 | console.log('Matches: ', adBlockClient.matches(commander.location, commander.filterOption, commander.host)) 80 | } 81 | console.log(adBlockClient.getMatchingStats()) 82 | } else { 83 | const siteList = readSiteList(commander.list) 84 | let matchCount = 0 85 | let skipCount = 0 86 | console.time('check') 87 | if (commander.discover) { 88 | const m = new Map() 89 | siteList.forEach((site) => { 90 | if (commander.cache && m.has(site)) { 91 | if (m.get(site)) { 92 | matchCount++ 93 | } else { 94 | skipCount++ 95 | } 96 | return 97 | } 98 | if (adBlockClient.findMatchingFilters(site, commander.filterOption, commander.host)) { 99 | matchCount++ 100 | m.set(site, true) 101 | } else { 102 | skipCount++ 103 | m.set(site, false) 104 | } 105 | }) 106 | } else { 107 | siteList.forEach((site) => { 108 | if (adBlockClient.matches(site, commander.filterOption, commander.host)) { 109 | matchCount++ 110 | } else { 111 | skipCount++ 112 | } 113 | }) 114 | } 115 | console.timeEnd('check') 116 | console.log(adBlockClient.getMatchingStats()) 117 | console.log('Matching:', matchCount) 118 | console.log('Skipped:', skipCount) 119 | } 120 | if (commander.output) { 121 | require('fs').writeFileSync(commander.output, adBlockClient.serialize()) 122 | } 123 | }).catch((e) => { 124 | console.log('Error:', e) 125 | }) 126 | -------------------------------------------------------------------------------- /lib/util.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | 5 | const request = require('request') 6 | const {sanitizeABPInput} = require('./filtering') 7 | const fs = require('fs') 8 | const {AdBlockClient, adBlockLists} = require('..') 9 | 10 | const makeAdBlockClientFromString = (filterRuleData) => { 11 | return new Promise((resolve) => { 12 | const client = new AdBlockClient() 13 | if (filterRuleData.constructor === Array) { 14 | filterRuleData.forEach(filterRuleDataItem => client.parse(filterRuleDataItem)) 15 | } else { 16 | client.parse(filterRuleData) 17 | } 18 | resolve(client) 19 | }) 20 | } 21 | 22 | /** 23 | * Creates an ABlock client from a DAT file 24 | * @param datFilePath - The file path to the DAT file 25 | */ 26 | const makeAdBlockClientFromDATFile = (datFilePath) => { 27 | return new Promise((resolve, reject) => { 28 | fs.readFile(datFilePath, (err, data) => { 29 | if (err) { 30 | reject(err) 31 | return 32 | } 33 | const client = new AdBlockClient() 34 | client.deserialize(data) 35 | resolve(client) 36 | }) 37 | }) 38 | } 39 | 40 | /** 41 | * Creates an ABlock client from a list URL 42 | * @param listURL - The URL to check for obtaining the list. 43 | * @param filter - A filtering function that can be optionally specified. 44 | * It will be called with the URL's body and it can filter and return a new string. 45 | */ 46 | const getSingleListDataFromSingleURL = (listURL, filter) => { 47 | return new Promise((resolve, reject) => { 48 | request.get(listURL, function (error, response, body) { 49 | if (error) { 50 | console.error(`Request error: ${error}`) 51 | reject() 52 | return 53 | } 54 | if (response.statusCode !== 200) { 55 | console.error(`Error status code ${response.statusCode} returned for URL: ${listURL}`) 56 | reject() 57 | return 58 | } 59 | if (filter) { 60 | body = filter(body) 61 | } 62 | body = sanitizeABPInput(body) 63 | resolve(body) 64 | }) 65 | }) 66 | } 67 | 68 | /** 69 | * Creates an ABlock client from a list URL 70 | * @param listURL - The URL to check for obtaining the list. 71 | * @param filter - A filtering function that can be optionally specified. 72 | * It will be called with the URL's body and it can filter and return a new string. 73 | */ 74 | const makeAdBlockClientFromListURL = (listURL, filter) => { 75 | return new Promise((resolve, reject) => { 76 | if (listURL.constructor === Array) { 77 | const requestPromises = listURL.map((currentURL) => { 78 | console.log(`${currentURL}...`) 79 | return getSingleListDataFromSingleURL(currentURL, filter) 80 | }) 81 | Promise.all(requestPromises).then((results) => { 82 | let body = results.join('\n') 83 | body = sanitizeABPInput(body) 84 | resolve(makeAdBlockClientFromString(body)) 85 | }).catch((error) => { 86 | console.error(`getSingleListDataFromSingleURL error: ${error}`) 87 | reject() 88 | }) 89 | } else { 90 | console.log(`${listURL}...`) 91 | getSingleListDataFromSingleURL(listURL, filter).then((listData) => { 92 | const body = sanitizeABPInput(listData) 93 | resolve(makeAdBlockClientFromString(body)) 94 | }).catch((error) => { 95 | console.error(`getSingleListDataFromSingleURL error: ${error}`) 96 | reject() 97 | }) 98 | } 99 | }) 100 | } 101 | 102 | const getListFilterFunction = (uuid) => { 103 | if (uuid === 'FBB430E8-3910-4761-9373-840FC3B43FF2') { 104 | return (input) => input.split('\n').slice(4) 105 | .map((line) => `||${line}`).join('\n') 106 | } 107 | return undefined 108 | } 109 | 110 | const makeAdBlockClientFromListUUID = (uuid) => { 111 | let list = adBlockLists.default.find((l) => l.uuid === uuid) 112 | if (!list) { 113 | list = adBlockLists.regions.find((l) => l.uuid === uuid) 114 | } 115 | if (!list) { 116 | list = adBlockLists.malware.find((l) => l.uuid === uuid) 117 | } 118 | if (!list) { 119 | return Promise.reject() 120 | } 121 | 122 | const filterFn = getListFilterFunction(uuid) 123 | return makeAdBlockClientFromListURL(list.listURL, filterFn) 124 | } 125 | 126 | const makeAdBlockClientFromFilePath = (filePath) => { 127 | return new Promise((resolve, reject) => { 128 | let filterRuleData 129 | if (filePath.constructor === Array) { 130 | filterRuleData = filePath.map((filePath) => fs.readFileSync(filePath, 'utf8')) 131 | } else { 132 | filterRuleData = fs.readFileSync(filePath, 'utf8') 133 | } 134 | resolve(makeAdBlockClientFromString(filterRuleData)) 135 | }) 136 | } 137 | 138 | const getListBufferFromURL = (listURL, filter) => { 139 | return new Promise((resolve, reject) => { 140 | request.get(listURL, function (error, response, body) { 141 | if (error) { 142 | console.error(`Request error: ${error}`) 143 | reject() 144 | return 145 | } 146 | if (response.statusCode !== 200) { 147 | console.error(`Error status code ${response.statusCode} returned for URL: ${listURL}`) 148 | reject() 149 | return 150 | } 151 | if (filter) { 152 | body = filter(body) 153 | } 154 | body = sanitizeABPInput(body) 155 | resolve(body) 156 | }) 157 | }) 158 | } 159 | 160 | /** 161 | * Reads a list of sites in the format of one site per newline 162 | * from a file path and returns an array with the sites. 163 | */ 164 | const readSiteList = (path) => 165 | fs.readFileSync(path, 'utf8').split('\n') 166 | 167 | module.exports = { 168 | makeAdBlockClientFromString, 169 | makeAdBlockClientFromDATFile, 170 | makeAdBlockClientFromListURL, 171 | makeAdBlockClientFromFilePath, 172 | makeAdBlockClientFromListUUID, 173 | getListBufferFromURL, 174 | readSiteList, 175 | getListFilterFunction 176 | } 177 | -------------------------------------------------------------------------------- /scripts/lib/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import atexit 4 | import contextlib 5 | import errno 6 | import platform 7 | import re 8 | import shutil 9 | import ssl 10 | import subprocess 11 | import sys 12 | import tarfile 13 | import tempfile 14 | import urllib2 15 | import os 16 | import zipfile 17 | 18 | from config import is_verbose_mode 19 | 20 | 21 | def get_host_arch(): 22 | """Returns the host architecture with a predictable string.""" 23 | host_arch = platform.machine() 24 | 25 | # Convert machine type to format recognized by gyp. 26 | if re.match(r'i.86', host_arch) or host_arch == 'i86pc': 27 | host_arch = 'ia32' 28 | elif host_arch in ['x86_64', 'amd64']: 29 | host_arch = 'x64' 30 | elif host_arch.startswith('arm'): 31 | host_arch = 'arm' 32 | 33 | # platform.machine is based on running kernel. It's possible to use 64-bit 34 | # kernel with 32-bit userland, e.g. to give linker slightly more memory. 35 | # Distinguish between different userland bitness by querying 36 | # the python binary. 37 | if host_arch == 'x64' and platform.architecture()[0] == '32bit': 38 | host_arch = 'ia32' 39 | 40 | return host_arch 41 | 42 | 43 | def tempdir(prefix=''): 44 | directory = tempfile.mkdtemp(prefix=prefix) 45 | atexit.register(shutil.rmtree, directory) 46 | return directory 47 | 48 | 49 | @contextlib.contextmanager 50 | def scoped_cwd(path): 51 | cwd = os.getcwd() 52 | os.chdir(path) 53 | try: 54 | yield 55 | finally: 56 | os.chdir(cwd) 57 | 58 | 59 | @contextlib.contextmanager 60 | def scoped_env(key, value): 61 | origin = '' 62 | if key in os.environ: 63 | origin = os.environ[key] 64 | os.environ[key] = value 65 | try: 66 | yield 67 | finally: 68 | os.environ[key] = origin 69 | 70 | 71 | def download(text, url, path): 72 | safe_mkdir(os.path.dirname(path)) 73 | with open(path, 'wb') as local_file: 74 | if hasattr(ssl, '_create_unverified_context'): 75 | ssl._create_default_https_context = ssl._create_unverified_context 76 | 77 | web_file = urllib2.urlopen(url) 78 | file_size = int(web_file.info().getheaders("Content-Length")[0]) 79 | downloaded_size = 0 80 | block_size = 128 81 | 82 | ci = os.environ.get('CI') == '1' 83 | 84 | while True: 85 | buf = web_file.read(block_size) 86 | if not buf: 87 | break 88 | 89 | downloaded_size += len(buf) 90 | local_file.write(buf) 91 | 92 | if not ci: 93 | percent = downloaded_size * 100. / file_size 94 | status = "\r%s %10d [%3.1f%%]" % (text, downloaded_size, percent) 95 | print status, 96 | 97 | if ci: 98 | print "%s done." % (text) 99 | else: 100 | print 101 | return path 102 | 103 | 104 | def extract_tarball(tarball_path, member, destination): 105 | with tarfile.open(tarball_path) as tarball: 106 | tarball.extract(member, destination) 107 | 108 | 109 | def extract_zip(zip_path, destination): 110 | if sys.platform == 'darwin': 111 | # Use unzip command on Mac to keep symbol links in zip file work. 112 | execute(['unzip', zip_path, '-d', destination]) 113 | else: 114 | with zipfile.ZipFile(zip_path) as z: 115 | z.extractall(destination) 116 | 117 | def make_zip(zip_file_path, files, dirs): 118 | safe_unlink(zip_file_path) 119 | if sys.platform == 'darwin': 120 | files += dirs 121 | execute(['zip', '-r', '-y', zip_file_path] + files) 122 | else: 123 | zip_file = zipfile.ZipFile(zip_file_path, "w", zipfile.ZIP_DEFLATED) 124 | for filename in files: 125 | zip_file.write(filename, filename) 126 | for dirname in dirs: 127 | for root, _, filenames in os.walk(dirname): 128 | for f in filenames: 129 | zip_file.write(os.path.join(root, f)) 130 | zip_file.close() 131 | 132 | 133 | def rm_rf(path): 134 | try: 135 | shutil.rmtree(path) 136 | except OSError: 137 | pass 138 | 139 | 140 | def safe_unlink(path): 141 | try: 142 | os.unlink(path) 143 | except OSError as e: 144 | if e.errno != errno.ENOENT: 145 | raise 146 | 147 | 148 | def safe_mkdir(path): 149 | try: 150 | os.makedirs(path) 151 | except OSError as e: 152 | if e.errno != errno.EEXIST: 153 | raise 154 | 155 | 156 | def execute(argv, env=os.environ): 157 | if is_verbose_mode(): 158 | print ' '.join(argv) 159 | try: 160 | output = subprocess.check_output(argv, stderr=subprocess.STDOUT, env=env) 161 | if is_verbose_mode(): 162 | print output 163 | return output 164 | except subprocess.CalledProcessError as e: 165 | print e.output 166 | raise e 167 | 168 | 169 | def execute_stdout(argv, env=os.environ): 170 | if is_verbose_mode(): 171 | print ' '.join(argv) 172 | try: 173 | subprocess.check_call(argv, env=env) 174 | except subprocess.CalledProcessError as e: 175 | print e.output 176 | raise e 177 | else: 178 | execute(argv, env) 179 | 180 | 181 | def atom_gyp(): 182 | SOURCE_ROOT = os.path.abspath(os.path.join(__file__, '..', '..', '..')) 183 | gyp = os.path.join(SOURCE_ROOT, 'atom.gyp') 184 | with open(gyp) as f: 185 | obj = eval(f.read()); 186 | return obj['variables'] 187 | 188 | 189 | def get_atom_shell_version(): 190 | return 'v' + atom_gyp()['version%'] 191 | 192 | 193 | def parse_version(version): 194 | if version[0] == 'v': 195 | version = version[1:] 196 | 197 | vs = version.split('.') 198 | if len(vs) > 4: 199 | return vs[0:4] 200 | else: 201 | return vs + ['0'] * (4 - len(vs)) 202 | 203 | 204 | def s3put(bucket, access_key, secret_key, prefix, key_prefix, files): 205 | env = os.environ.copy() 206 | BOTO_DIR = os.path.abspath(os.path.join(__file__, '..', '..', '..', 'vendor', 207 | 'boto')) 208 | env['PYTHONPATH'] = os.path.pathsep.join([ 209 | env.get('PYTHONPATH', ''), 210 | os.path.join(BOTO_DIR, 'build', 'lib'), 211 | os.path.join(BOTO_DIR, 'build', 'lib.linux-x86_64-2.7')]) 212 | 213 | boto = os.path.join(BOTO_DIR, 'bin', 's3put') 214 | args = [ 215 | sys.executable, 216 | boto, 217 | '--bucket', bucket, 218 | '--access_key', access_key, 219 | '--secret_key', secret_key, 220 | '--prefix', prefix, 221 | '--key_prefix', key_prefix, 222 | '--grant', 'public-read' 223 | ] + files 224 | 225 | execute(args, env) 226 | -------------------------------------------------------------------------------- /filter.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #ifndef FILTER_H_ 7 | #define FILTER_H_ 8 | 9 | #include 10 | #include 11 | #include "./base.h" 12 | 13 | class BloomFilter; 14 | 15 | enum FilterType { 16 | FTNoFilterType = 0, 17 | FTRegex = 01, 18 | FTElementHiding = 02, 19 | FTElementHidingException = 04, 20 | FTHostAnchored = 010, 21 | FTLeftAnchored = 020, 22 | FTRightAnchored = 040, 23 | FTComment = 0100, 24 | FTException = 0200, 25 | FTEmpty = 0400, 26 | FTHostOnly = 01000, 27 | // E.g. example.org$$script[data-src="banner"] to delete 28 | // script element with matching attr 29 | FTHTMLFiltering = 02000, 30 | FTListTypesMask = FTException|FTElementHiding| 31 | FTElementHidingException|FTEmpty|FTComment| 32 | FTHTMLFiltering, 33 | }; 34 | 35 | enum FilterOption { 36 | FONoFilterOption = 0, 37 | FOScript = 01, 38 | FOImage = 02, 39 | FOStylesheet = 04, 40 | FOObject = 010, 41 | FOXmlHttpRequest = 020, 42 | FOObjectSubrequest = 040, 43 | FOSubdocument = 0100, 44 | FODocument = 0200, 45 | FOOther = 0400, 46 | FOXBL = 01000, 47 | FOCollapse = 02000, 48 | FODoNotTrack = 04000, 49 | FOElemHide = 010000, 50 | // Used internally only, do not use 51 | FOThirdParty = 020000, 52 | // Used internally only, do not use 53 | FONotThirdParty = 040000, 54 | // Not supported, but we will ignore these rules 55 | FOPing = 0100000, 56 | // Not supported, but we will ignore these rules 57 | FOPopup = 0200000, 58 | // This is only used by uBlock and currently all instances are 1x1 transparent gif which we already do for images 59 | FORedirect = 0400000, 60 | // Parse CSPs but consider them unsupported 61 | FOCSP = 01000000, 62 | FOFont = 02000000, 63 | FOMedia = 04000000, 64 | FOWebRTC = 010000000, 65 | FOGenericHide = 020000000, 66 | FOGenericBlock = 040000000, 67 | // Used by Adguard, purpose unknown, ignore 68 | FOEmpty = 0100000000, 69 | FOWebsocket = 0200000000, 70 | // important means to ignore all exception filters (those prefixed with @@). 71 | FOImportant = 0400000000, 72 | 73 | FOUnknown = 04000000000, 74 | FOResourcesOnly = FOScript|FOImage|FOStylesheet|FOObject|FOXmlHttpRequest| 75 | FOObjectSubrequest|FOSubdocument|FODocument|FOOther|FOXBL|FOFont|FOMedia| 76 | FOWebRTC|FOWebsocket|FOPing, 77 | FOUnsupportedSoSkipCheck = FOPopup|FOCSP|FOElemHide|FOGenericHide|FOGenericBlock|FOEmpty|FOUnknown, 78 | FOUnsupportedButIgnore = FORedirect|FOImportant 79 | }; 80 | 81 | class Filter { 82 | friend class AdBlockClient; 83 | public: 84 | Filter(); 85 | Filter(const Filter &other); 86 | Filter(const char * data, int dataLen, char *domainList = nullptr, 87 | const char * host = nullptr, int hostLen = -1); 88 | 89 | Filter(FilterType filterType, FilterOption filterOption, 90 | FilterOption antiFilterOption, 91 | const char * data, int dataLen, 92 | char *domainList = nullptr, const char * host = nullptr, 93 | int hostLen = -1); 94 | 95 | ~Filter(); 96 | 97 | // Swaps the data members for 'this' and the passed in filter 98 | void swapData(Filter *f); 99 | 100 | // Checks to see if any filter matches the input but does not match 101 | // any exception rule You may want to call the first overload to be 102 | // slighly more efficient 103 | bool matches(const char *input, int inputLen, 104 | FilterOption contextOption = FONoFilterOption, 105 | const char *contextDomain = nullptr, 106 | BloomFilter *inputBloomFilter = nullptr, 107 | const char *inputHost = nullptr, int inputHostLen = 0); 108 | bool matches(const char *input, FilterOption contextOption = FONoFilterOption, 109 | const char *contextDomain = nullptr, 110 | BloomFilter *inputBloomFilter = nullptr, 111 | const char *inputHost = nullptr, int inputHostLen = 0); 112 | 113 | // Nothing needs to be updated when a filter is added multiple times 114 | void Update(const Filter &) {} 115 | bool hasUnsupportedOptions() const; 116 | 117 | // Checks to see if the filter options match for the passed in data 118 | bool matchesOptions(const char *input, FilterOption contextOption, 119 | const char *contextDomain = nullptr); 120 | 121 | void parseOptions(const char *input); 122 | bool containsDomain(const char *domain, bool anti = false) const; 123 | // Returns true if the filter is composed of only domains and no anti domains 124 | // Note that the set of all domain and anti-domain rules are not mutually 125 | // exclusive. One xapmle is: 126 | // domain=example.com|~foo.example.com restricts the filter to the example.com 127 | // domain with the exception of "foo.example.com" subdomain. 128 | bool isDomainOnlyFilter(); 129 | // Returns true if the filter is composed of only anti-domains and no domains 130 | bool isAntiDomainOnlyFilter(); 131 | uint32_t getDomainCount(bool anti = false); 132 | // One pass, will calcuate internal member for domainCount and antiDomainCount 133 | void calculateDomainCounts(); 134 | 135 | uint64_t hash() const; 136 | uint64_t GetHash() const { 137 | return hash(); 138 | } 139 | 140 | bool operator==(const Filter &rhs) const { 141 | /* 142 | if (filterType != rhs.filterType || filterOption != rhs.filterOption || 143 | antiFilterOption != rhs.antiFilterOption) { 144 | return false; 145 | } 146 | */ 147 | 148 | int hostLen = 0; 149 | if (host) { 150 | hostLen = this->hostLen == -1 ? 151 | static_cast(strlen(host)) : this->hostLen; 152 | } 153 | int rhsHostLen = 0; 154 | if (rhs.host) { 155 | rhsHostLen = rhs.hostLen == -1 ? 156 | static_cast(strlen(rhs.host)) : rhs.hostLen; 157 | } 158 | 159 | if (hostLen != rhsHostLen) { 160 | return false; 161 | } 162 | 163 | return !memcmp(host, rhs.host, hostLen); 164 | } 165 | 166 | bool operator!=(const Filter &rhs) const { 167 | return !(*this == rhs); 168 | } 169 | 170 | uint32_t Serialize(char *buffer); 171 | uint32_t Deserialize(char *buffer, uint32_t bufferSize); 172 | 173 | // Holds true if the filter should not free memory because for example it 174 | // was loaded from a large buffer somewhere else via the serialize and 175 | // deserialize functions. 176 | bool borrowed_data; 177 | 178 | FilterType filterType; 179 | FilterOption filterOption; 180 | FilterOption antiFilterOption; 181 | char *data; 182 | int dataLen; 183 | char *domainList; 184 | char *host; 185 | uint32_t domainCount; 186 | uint32_t antiDomainCount; 187 | int hostLen; 188 | 189 | protected: 190 | // Filters the domain list down to what's applicable for the context domain 191 | void filterDomainList(const char *domainList, char *destBuffer, 192 | const char *contextDomain, bool anti); 193 | // Checks for what is not excluded by the opposite list 194 | int getLeftoverDomainCount(const char *shouldBlockDomains, 195 | const char *shouldSkipDomains); 196 | 197 | // Parses a single option 198 | void parseOption(const char *input, int len); 199 | }; 200 | 201 | bool isThirdPartyHost(const char *baseContextHost, 202 | int baseContextHostLen, 203 | const char *testHost, 204 | int testHostLen); 205 | 206 | static inline bool isEndOfLine(char c) { 207 | return c == '\r' || c == '\n'; 208 | } 209 | 210 | #endif // FILTER_H_ 211 | -------------------------------------------------------------------------------- /bad_fingerprints8.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #pragma once 7 | /** 8 | * 9 | * Auto generated bad filters 10 | */ 11 | const char *badFingerprints[] = { 12 | "/walmart", 13 | "redirect", 14 | "/microso", 15 | "/jquery.", 16 | "/library", 17 | "/account", 18 | "/common/", 19 | "/generat", 20 | "homepage", 21 | "social/j", 22 | "googlead", 23 | "tag/js/g", 24 | "analytic", 25 | "oublecli", 26 | "provider", 27 | "gpt/puba", 28 | "js?callb", 29 | "recommen", 30 | "&callbac", 31 | "ubads.g.", 32 | "gampad/a", 33 | "w.google", 34 | "google.c", 35 | "pagead/e", 36 | "pagead/j", 37 | "pagead/g", 38 | "ahoo.com", 39 | "zz/combo", 40 | "content/", 41 | "desktop-", 42 | "content-", 43 | ".yimg.co", 44 | "img.com/", 45 | "content_", 46 | "/overlay", 47 | "assets/s", 48 | "/themes/", 49 | "/header-", 50 | "rq/darla", 51 | "default/", 52 | "build/js", 53 | "/public/", 54 | "controll", 55 | "interest", 56 | "plugin/a", 57 | "dserver.", 58 | "gallery-", 59 | "platform", 60 | "resource", 61 | "default_", 62 | "template", 63 | "streams/", 64 | "assets/p", 65 | "styleshe", 66 | "reative/", 67 | "delivera", 68 | "300x250.", 69 | "js/beaco", 70 | "/footer-", 71 | "facebook", 72 | "timg.com", 73 | "d.double", 74 | "pagead/i", 75 | "external", 76 | "iframe_a", 77 | "instream", 78 | "com/js/a", 79 | "oogleuse", 80 | "gadgets/", 81 | "gallery/", 82 | "yfpadobj", 83 | "com/lib/", 84 | "/global-", 85 | "/global/", 86 | "componen", 87 | "/process", 88 | "frontpag", 89 | "amazon.c", 90 | "/images/", 91 | "/images-", 92 | "adsystem", 93 | "microsof", 94 | "/jquery-", 95 | ".com/lib", 96 | "library/", 97 | "common/r", 98 | "generate", 99 | "/Common/", 100 | "/product", 101 | "/static/", 102 | ".com/js/", 103 | "/homepag", 104 | "/social/", 105 | ".googlea", 106 | "/pagead/", 107 | "/tag/js/", 108 | "/googlea", 109 | "g.double", 110 | ".doublec", 111 | "doublecl", 112 | "search.c", 113 | "/provide", 114 | "/gpt/pub", 115 | ".js?call", 116 | "callback", 117 | "pubads.g", 118 | "/gampad/", 119 | "ww.googl", 120 | "oogle.co", 121 | "_300x250", 122 | "300x250_", 123 | "-300x250", 124 | "yahoo.co", 125 | "ttp://l.", 126 | "/zz/comb", 127 | "/content", 128 | "/ads/ads", 129 | "/ads-min", 130 | "l.yimg.c", 131 | "yimg.com", 132 | "-content", 133 | "/generic", 134 | "overlay/", 135 | "/assets/", 136 | "overlay.", 137 | "/media/t", 138 | "/media/p", 139 | "/css/ski", 140 | "common/a", 141 | "/toolbar", 142 | "/rq/darl", 143 | "/default", 144 | "/common_", 145 | "/desktop", 146 | "/build/j", 147 | "/plugin/", 148 | "-iframe-", 149 | "overlay-", 150 | ".adserve", 151 | "adserver", 152 | "/gallery", 153 | "_platfor", 154 | "/resourc", 155 | "/storage", 156 | "-source/", 157 | "/templat", 158 | "-templat", 159 | "/streams", 160 | "/video-a", 161 | "/stylesh", 162 | "/secure/", 163 | "/creativ", 164 | "creative", 165 | "/deliver", 166 | "/beacon/", 167 | "/js/beac", 168 | "/search/", 169 | "/search-", 170 | "/search_", 171 | "common/i", 172 | "/preview", 173 | "/google.", 174 | "/faceboo", 175 | "/static.", 176 | "ytimg.co", 177 | "/pubads.", 178 | "/iframe_", 179 | "/doublec", 180 | "ad.doubl", 181 | "/ad_data", 182 | "/externa", 183 | "accounts", 184 | "/instrea", 185 | "googleus", 186 | ".com/gad", 187 | "/gadgets", 188 | "-gallery", 189 | "/yfpadob", 190 | "/compone", 191 | "/control", 192 | "/recomme", 193 | "/frontpa", 194 | "/analyti", 195 | "/amazon.", 196 | "mazon.co", 197 | "images-a", 198 | "images/G", 199 | "images/I", 200 | "//images", 201 | "/redirec", 202 | "-adsyste", 203 | "edirect.", 204 | "icrosoft", 205 | "ommon/re", 206 | "omepage/", 207 | "oogleads", 208 | "ag/js/gp", 209 | "nalytics", 210 | "ubleclic", 211 | "pt/pubad", 212 | "s?callba", 213 | "allback=", 214 | "omepage_", 215 | "ecommend", 216 | "bads.g.d", 217 | "ampad/ad", 218 | ".google.", 219 | "ogle.com", 220 | "agead/ex", 221 | "agead/js", 222 | "agead/ga", 223 | "hoo.com/", 224 | "z/combo?", 225 | "ontent/s", 226 | "ontent_i", 227 | "ontent-a", 228 | "q/darla/", 229 | "ontent/b", 230 | "ontent/a", 231 | "ontrolle", 232 | "ontent/i", 233 | "server.y", 234 | "latform_", 235 | "emplate-", 236 | "emplates", 237 | "tyleshee", 238 | "mg.com/a", 239 | "s/beacon", 240 | "xternal_", 241 | "ogleuser", 242 | "ccounts/", 243 | "fpadobje", 244 | "omponent", 245 | "emplate/", 246 | "rontpage", 247 | "azon.com", 248 | "mmon/res", 249 | "ogleadse", 250 | "g/js/gpt", 251 | "ads.g.do", 252 | "bleclick", 253 | "/beacon.", 254 | "t/pubads", 255 | "?callbac", 256 | "commenda", 257 | "mpad/ads", 258 | "gle.com/", 259 | "gead/exp", 260 | "gead/js/", 261 | "gead/gad", 262 | "mg.com/z", 263 | "mg.com/r", 264 | "ntent/ad", 265 | "ntroller", 266 | "erver.ya", 267 | "ylesheet", 268 | "gleuserc", 269 | "padobjec", 270 | "mponent/", 271 | "g.com/a/", 272 | "zon.com/", 273 | "gleadser", 274 | "ds.g.dou", 275 | "leclick.", 276 | "/pubads_", 277 | "ommendat", 278 | "pad/ads?", 279 | "le.com/a", 280 | "ead/expa", 281 | "ead/gadg", 282 | "g.com/zz", 283 | "g.com/rq", 284 | "rver.yah", 285 | "ead/js/l", 286 | "leclick/", 287 | "leuserco", 288 | "adobject", 289 | ".com/a/1", 290 | "leadserv", 291 | "s.g.doub", 292 | "eclick.n", 293 | "pubads_i", 294 | "mmendati", 295 | "ad/ads?g", 296 | "e.com/ad", 297 | "ad/expan", 298 | "ad/gadge", 299 | ".com/zz/", 300 | ".com/rq/", 301 | "ver.yaho", 302 | "ad/js/li", 303 | "ad/ads?a", 304 | "eusercon", 305 | "dobject.", 306 | "eadservi", 307 | ".g.doubl", 308 | "click.ne", 309 | "ubads_im", 310 | "mendatio", 311 | "d/ads?gd", 312 | ".com/ads", 313 | "d/expans", 314 | "d/gadget", 315 | "com/zz/c", 316 | "com/rq/d", 317 | "er.yahoo", 318 | "d/js/lid", 319 | "d/ads?ad", 320 | "usercont", 321 | "object.j", 322 | "adservic", 323 | "lick.net", 324 | "bads_imp", 325 | "endation", 326 | "/ads?gdf", 327 | "com/ads/", 328 | "/expansi", 329 | "om/zz/co", 330 | "om/rq/da", 331 | "r.yahoo.", 332 | "/js/lida", 333 | "/ads?ad_", 334 | "serconte", 335 | "bject.js", 336 | "dservice", 337 | "ick.net/", 338 | "ads_impl", 339 | "ndations", 340 | "ads?gdfp", 341 | "expansio", 342 | "m/zz/com", 343 | "m/rq/dar", 344 | ".yahoo.c", 345 | "js/lidar", 346 | "ads?ad_r", 347 | "erconten", 348 | "services", 349 | "ck.net/p", 350 | "ds_impl_", 351 | "ck.net/g", 352 | "ds?gdfp_", 353 | "xpansion", 354 | "oo.com/a", 355 | "s/lidar.", 356 | "ds?ad_ru", 357 | "rcontent", 358 | "ervices.", 359 | "partner.", 360 | "k.net/ga", 361 | "s?gdfp_r", 362 | "pansion_", 363 | "o.com/a?", 364 | "/lidar.j", 365 | "s?ad_rul", 366 | "content.", 367 | "rvices.c", 368 | "artner.g", 369 | ".net/gam", 370 | "?gdfp_re", 371 | "ansion_e", 372 | "lidar.js", 373 | "?ad_rule", 374 | "ontent.c", 375 | "vices.co", 376 | "rtner.go", 377 | "net/gamp", 378 | "gdfp_req", 379 | "pagead2.", 380 | "nsion_em", 381 | "ad_rule=", 382 | "ntent.co", 383 | "ices.com", 384 | "tner.goo", 385 | "et/gampa", 386 | "dfp_req=", 387 | "agead2.g", 388 | "sion_emb", 389 | "tent.com", 390 | "ces.com/", 391 | "ner.goog", 392 | "t/gampad", 393 | "fp_req=1", 394 | "gead2.go", 395 | "ion_embe", 396 | "er.googl", 397 | "p_req=1&", 398 | "ead2.goo", 399 | "on_embed", 400 | "r.google", 401 | "ad2.goog", 402 | "n_embed.", 403 | "es.com/g", 404 | "d2.googl", 405 | "_embed.j", 406 | "s.com/gp", 407 | "2.google", 408 | "embed.js", 409 | ".com/gpt", 410 | ".googles", 411 | "com/gpt/", 412 | "googlesy", 413 | "om/gpt/p", 414 | "ooglesyn", 415 | "m/gpt/pu", 416 | "oglesynd", 417 | "glesyndi", 418 | "lesyndic", 419 | "esyndica", 420 | "syndicat", 421 | "yndicati", 422 | "ndicatio", 423 | "dication", 424 | "ication.", 425 | "cation.c", 426 | "ation.co", 427 | "tion.com", 428 | "ion.com/", 429 | "on.com/p", 430 | "n.com/pa", 431 | ".com/pag", 432 | "com/page", 433 | "om/pagea", 434 | "m/pagead", 435 | }; 436 | 437 | const char *badSubstrings[] = {"http", "www" }; 438 | -------------------------------------------------------------------------------- /test/options_test.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "./CppUnitLite/TestHarness.h" 15 | #include "./ad_block_client.h" 16 | #include "./util.h" 17 | 18 | using std::set; 19 | using std::string; 20 | using std::cout; 21 | using std::endl; 22 | 23 | void printSet(const set &domainSet) { 24 | std::for_each(domainSet.begin(), domainSet.end(), [](string const &s) { 25 | cout << s.c_str() << " "; 26 | }); 27 | } 28 | 29 | bool testOptionsWithFilter(Filter *f, const char *input, 30 | FilterOption expectedOption, FilterOption expectedAntiOption, 31 | const set &expectedDomains, 32 | const set &expectedAntiDomains) { 33 | if (f->filterOption != expectedOption) { 34 | cout << input << endl << "Actual options: " << f->filterOption 35 | << endl << "Expected: " << expectedOption << endl; 36 | return false; 37 | } 38 | if (f->antiFilterOption != expectedAntiOption) { 39 | cout << input << endl << "Actual anti options: " << f->antiFilterOption 40 | << endl << "Expected: " << expectedAntiOption << endl; 41 | return false; 42 | } 43 | if (expectedDomains.size() > 0 && expectedAntiDomains.size() == 0 && 44 | !f->isDomainOnlyFilter()) { 45 | cout << input << endl << "Domain only filter should be set." 46 | << endl << "Actual domain count: " << f->getDomainCount() 47 | << endl << "Actual anti domain count: " << f->getDomainCount(true) 48 | << endl << "Expected: " << expectedDomains.size() 49 | << endl << "Expected anti: " << expectedAntiDomains.size(); 50 | return false; 51 | } else if ((expectedDomains.size() == 0 || 52 | expectedAntiDomains.size() != 0) && 53 | f->isDomainOnlyFilter()) { 54 | cout << input << endl << "Domain only filter should NOT be set." 55 | << endl << "Actual domain count: " << f->getDomainCount() 56 | << endl << "Actual anti domain count: " << f->getDomainCount(true) 57 | << endl << "Expected: " << expectedDomains.size() 58 | << endl << "Expected anti: " << expectedAntiDomains.size(); 59 | return false; 60 | } 61 | if (expectedDomains.size() != f->getDomainCount()) { 62 | cout << input << endl << "Actual domain count: " << f->getDomainCount() 63 | << endl << "Expected: " << expectedDomains.size() << endl; 64 | return false; 65 | } 66 | if (expectedAntiDomains.size() != f->getDomainCount(true)) { 67 | cout << input << endl << "Actual anti domain count: " 68 | << f->getDomainCount(false) << endl << "Expected: " 69 | << expectedAntiDomains.size() << endl; 70 | return false; 71 | } 72 | 73 | bool ret = true; 74 | std::for_each(expectedDomains.begin(), expectedDomains.end(), 75 | [&f, &expectedDomains, &ret, input](string const &s) { 76 | if (!f->containsDomain(s.c_str())) { 77 | cout << input << endl << "Actual domains: " 78 | << (f->domainList ? f->domainList : "") << endl << "Expected: "; 79 | printSet(expectedDomains); 80 | cout << endl; 81 | cout << "Not found: " << s.c_str() << endl; 82 | ret = false; 83 | } 84 | }); 85 | if (!ret) { 86 | return false; 87 | } 88 | 89 | std::for_each(expectedAntiDomains.begin(), expectedAntiDomains.end(), 90 | [&f, &expectedAntiDomains, &ret, input](string const &s) { 91 | if (!f->containsDomain(s.c_str(), true)) { 92 | cout << input << endl << "Actual anti domains: " 93 | << (f->domainList ? f->domainList : "") << endl << "Expected: "; 94 | printSet(expectedAntiDomains); 95 | cout << endl; 96 | ret = false; 97 | } 98 | }); 99 | if (!ret) { 100 | return false; 101 | } 102 | 103 | return true; 104 | } 105 | 106 | bool testOptions(const char *rawOptions, FilterOption expectedOption, 107 | FilterOption expectedAntiOption, 108 | set &&expectedDomains, // NOLINT 109 | set &&expectedAntiDomains) { // NOLINT 110 | Filter f; 111 | f.parseOptions(rawOptions); 112 | return testOptionsWithFilter(&f, rawOptions, expectedOption, 113 | expectedAntiOption, expectedDomains, expectedAntiDomains); 114 | } 115 | 116 | bool testFilterOptions(const char *input, FilterOption expectedOption, 117 | FilterOption expectedAntiOption, 118 | set &&expectedDomains, // NOLINT 119 | set &&expectedAntiDomains) { // NOLINT 120 | Filter f; 121 | parseFilter(input, &f); 122 | return testOptionsWithFilter(&f, input, expectedOption, 123 | expectedAntiOption, expectedDomains, expectedAntiDomains); 124 | } 125 | 126 | // Option parsing should split options properly 127 | TEST(options, splitOptions) { 128 | CHECK(testOptions("subdocument,third-party", 129 | static_cast(FOThirdParty | FOSubdocument), 130 | FONoFilterOption, 131 | {}, 132 | {})); 133 | 134 | CHECK(testOptions( 135 | "object-subrequest,script,domain=~msnbc.msn.com|~www.nbcnews.com", 136 | static_cast(FOObjectSubrequest | FOScript), 137 | FONoFilterOption, 138 | {}, 139 | { 140 | "msnbc.msn.com", 141 | "www.nbcnews.com" 142 | })); 143 | 144 | CHECK(testOptions("~document,xbl,domain=~foo|bar|baz|foo.xbl|gar,~collapse", 145 | FOXBL, 146 | static_cast(FODocument | FOCollapse), 147 | { 148 | "bar", 149 | "baz", 150 | "foo.xbl", 151 | "gar" 152 | }, 153 | { 154 | "foo" 155 | })); 156 | 157 | 158 | CHECK(testOptions("domain=~example.com|foo.example.com,script", 159 | FOScript, 160 | FONoFilterOption, 161 | { 162 | "foo.example.com" 163 | }, 164 | { 165 | "example.com" 166 | })); 167 | } 168 | 169 | // domain rule types should be properly parsed 170 | TEST(options, domainOptionStrings) { 171 | CHECK(testOptions("domain=example.com", 172 | FONoFilterOption, 173 | FONoFilterOption, 174 | { 175 | "example.com" 176 | }, 177 | {})); 178 | 179 | CHECK(testOptions("domain=example.com|example.net", 180 | FONoFilterOption, 181 | FONoFilterOption, 182 | { 183 | "example.com", 184 | "example.net" 185 | }, 186 | {})); 187 | 188 | CHECK(testOptions("domain=~example.com", 189 | FONoFilterOption, 190 | FONoFilterOption, 191 | { }, 192 | { 193 | "example.com" 194 | })); 195 | 196 | CHECK(testOptions("domain=example.com|~foo.example.com", 197 | FONoFilterOption, 198 | FONoFilterOption, 199 | { 200 | "example.com", 201 | }, 202 | { 203 | "foo.example.com" 204 | })); 205 | 206 | CHECK(testOptions("domain=~foo.example.com|example.com", 207 | FONoFilterOption, 208 | FONoFilterOption, 209 | { 210 | "example.com", 211 | }, 212 | { 213 | "foo.example.com" 214 | })); 215 | 216 | CHECK(testOptions("domain=~msnbc.msn.com|~www.nbcnews.com", 217 | FONoFilterOption, 218 | FONoFilterOption, 219 | { }, 220 | { 221 | "msnbc.msn.com", 222 | "www.nbcnews.com" 223 | })) 224 | } 225 | 226 | // parseFilter for full rules properly extracts options 227 | TEST(options, optionsFromFilter) { 228 | CHECK(testFilterOptions("domain=foo.bar", 229 | FONoFilterOption, 230 | FONoFilterOption, 231 | {}, 232 | {})) 233 | 234 | CHECK(testFilterOptions("+Ads/$~stylesheet", 235 | FONoFilterOption, 236 | FOStylesheet, 237 | {}, 238 | {})) 239 | 240 | CHECK(testFilterOptions("-advertising-$domain=~advertise.bingads.domain.com", 241 | FONoFilterOption, 242 | FONoFilterOption, 243 | { }, 244 | { 245 | "advertise.bingads.domain.com" 246 | })) 247 | 248 | CHECK(testFilterOptions(".se/?placement=$script,third-party", 249 | static_cast(FOScript| FOThirdParty), 250 | FONoFilterOption, 251 | {}, 252 | {})) 253 | 254 | CHECK(testFilterOptions("https:$ping", 255 | static_cast(FOPing), 256 | FONoFilterOption, 257 | {}, 258 | {})) 259 | 260 | CHECK(testFilterOptions("https:$popup", 261 | static_cast(FOPopup), 262 | FONoFilterOption, 263 | {}, 264 | {})) 265 | 266 | CHECK(testFilterOptions( 267 | "||tst.net^$object-subrequest,third-party,domain=domain1.com|domain5.com", 268 | static_cast(FOObjectSubrequest | FOThirdParty), 269 | FONoFilterOption, 270 | { 271 | "domain1.com", 272 | "domain5.com" 273 | }, 274 | {})) 275 | } 276 | -------------------------------------------------------------------------------- /bad_fingerprints7.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #pragma once 7 | /** 8 | * 9 | * Auto generated bad filters 10 | */ 11 | const char *badFingerprints[] = { 12 | "/walmar", 13 | "/micros", 14 | "/jQuery", 15 | "/jquery", 16 | ".mobile", 17 | ".com/li", 18 | "/librar", 19 | "/broker", 20 | "ttp://c", 21 | "/accoun", 22 | "ttps://", 23 | "/assets", 24 | "/common", 25 | "/respon", 26 | "/genera", 27 | "/store/", 28 | "/script", 29 | "/Common", 30 | "/produc", 31 | "/static", 32 | ".com/js", 33 | "/homepa", 34 | "/classi", 35 | "classic", 36 | "/social", 37 | "ttp://p", 38 | "/player", 39 | "/serve/", 40 | ".google", 41 | "/pagead", 42 | "/conver", 43 | "/widget", 44 | "/tag/js", 45 | ".com/ga", 46 | "ttp://j", 47 | "_widget", 48 | "/google", 49 | "g.doubl", 50 | "doublec", 51 | "search.", 52 | "/beacon", 53 | "/provid", 54 | "/gpt/pu", 55 | "/pubads", 56 | ".js?cal", 57 | "&callba", 58 | "politic", 59 | "-source", 60 | "pubads.", 61 | "/gampad", 62 | "=728x90", 63 | "google.", 64 | "_300x25", 65 | "300x250", 66 | "-300x25", 67 | "/client", 68 | ".min.js", 69 | "ttp://l", 70 | "/zz/com", 71 | "/combo?", 72 | "/styles", 73 | "/media/", 74 | "/conten", 75 | "/ads/ad", 76 | "/ads-mi", 77 | "l.yimg.", 78 | "yimg.co", 79 | "-conten", 80 | "/generi", 81 | "/overla", 82 | "/skins/", 83 | "/themes", 84 | "-search", 85 | "_search", 86 | "/css/sk", 87 | "/select", 88 | "/plugin", 89 | "/imagel", 90 | "/header", 91 | "/toolba", 92 | "/rq/dar", 93 | "/darla/", 94 | "/theme/", 95 | "/defaul", 96 | "/deskto", 97 | "/stream", 98 | "/build/", 99 | "/public", 100 | "-scroll", 101 | "interes", 102 | "-iframe", 103 | ".adserv", 104 | "adserve", 105 | "/jsonp/", 106 | "/galler", 107 | "/module", 108 | "_platfo", 109 | "/resour", 110 | "/badge_", 111 | "/storag", 112 | "/templa", 113 | "-templa", 114 | "/video-", 115 | "/video/", 116 | "/banner", 117 | "/secure", 118 | "/creati", 119 | "creativ", 120 | "sticker", 121 | "/delive", 122 | "/logo_d", 123 | "/js/bea", 124 | "/top_ba", 125 | "/search", 126 | ".com/pr", 127 | "/previe", 128 | "/footer", 129 | "/share-", 130 | "/extern", 131 | "static.", 132 | "_iframe", 133 | "/facebo", 134 | "/iframe", 135 | "/double", 136 | "ad.doub", 137 | "/index.", 138 | ".com/ad", 139 | "/ad_dat", 140 | "/custom", 141 | "/embed/", 142 | "/instre", 143 | "googleu", 144 | "/gadget", 145 | "/router", 146 | "-galler", 147 | "-footer", 148 | "/yfpado", 149 | "/newfor", 150 | "/global", 151 | "/layout", 152 | "/skin/d", 153 | "/index-", 154 | "?module", 155 | "/compon", 156 | "/contro", 157 | "/tracke", 158 | "/proces", 159 | "/recomm", 160 | "_report", 161 | "/bundle", 162 | "/frontp", 163 | "/analyt", 164 | "/amazon", 165 | "amazon.", 166 | "images-", 167 | ".com/im", 168 | "/images", 169 | "_engine", 170 | "//image", 171 | "-images", 172 | "/redire", 173 | "/Client", 174 | "-adsyst", 175 | "walmart", 176 | "redirec", 177 | "microso", 178 | "jquery-", 179 | "jquery.", 180 | "com/lib", 181 | "library", 182 | "account", 183 | "common/", 184 | "generat", 185 | "Common/", 186 | "product", 187 | "static/", 188 | "com/js/", 189 | "homepag", 190 | "social/", 191 | "googlea", 192 | "pagead/", 193 | "tag/js/", 194 | "google-", 195 | "analyti", 196 | "widgets", 197 | ".double", 198 | "oublecl", 199 | "earch.c", 200 | "storage", 201 | "provide", 202 | "gpt/pub", 203 | "js?call", 204 | "callbac", 205 | "default", 206 | "recomme", 207 | "ubads.g", 208 | "gampad/", 209 | "oogle.c", 210 | "00x250_", 211 | "control", 212 | "yahoo.c", 213 | "tp://l.", 214 | "zz/comb", 215 | "content", 216 | "desktop", 217 | "ads/ads", 218 | "ads-min", 219 | ".yimg.c", 220 | "img.com", 221 | "generic", 222 | "overlay", 223 | "assets/", 224 | "media/t", 225 | "themes/", 226 | "media/p", 227 | "search/", 228 | "search-", 229 | "css/ski", 230 | "header-", 231 | "toolbar", 232 | "rq/darl", 233 | "common_", 234 | "plugins", 235 | "build/j", 236 | "public/", 237 | "plugin/", 238 | "nterest", 239 | "iframe/", 240 | "iframe-", 241 | "dserver", 242 | "gallery", 243 | "platfor", 244 | "resourc", 245 | "source/", 246 | "templat", 247 | "streams", 248 | "video-a", 249 | "stylesh", 250 | "secure/", 251 | "reative", 252 | "media/s", 253 | "deliver", 254 | "00x250.", 255 | "beacon/", 256 | "js/beac", 257 | "search_", 258 | "preview", 259 | "footer-", 260 | "iframes", 261 | "faceboo", 262 | "iframe_", 263 | "d.doubl", 264 | "client/", 265 | "ad_data", 266 | "externa", 267 | "widgeta", 268 | "instrea", 269 | "oogleus", 270 | "com/gad", 271 | "gadgets", 272 | "stream-", 273 | "yfpadob", 274 | "global/", 275 | "global-", 276 | "module/", 277 | "module-", 278 | "compone", 279 | "process", 280 | "frontpa", 281 | "mazon.c", 282 | "mages-a", 283 | "images/", 284 | "Clients", 285 | "amazon-", 286 | "adsyste", 287 | "edirect", 288 | "icrosof", 289 | "ibrary/", 290 | "ommon/r", 291 | "enerate", 292 | "omepage", 293 | "ocial/j", 294 | "query.j", 295 | "ooglead", 296 | "ag/js/g", 297 | "oogle-a", 298 | "nalytic", 299 | "ublecli", 300 | "rovider", 301 | "pt/puba", 302 | "s?callb", 303 | "allback", 304 | "ecommen", 305 | "bads.g.", 306 | "ampad/a", 307 | "ogle.co", 308 | "agead/e", 309 | "agead/j", 310 | "agead/g", 311 | "ahoo.co", 312 | "z/combo", 313 | "ontent/", 314 | "esktop-", 315 | "ontent-", 316 | "mg.com/", 317 | "ontent_", 318 | "verlay/", 319 | "ssets/s", 320 | "verlay.", 321 | "ommon/a", 322 | "q/darla", 323 | "efault/", 324 | "lugins/", 325 | "uild/js", 326 | "ontroll", 327 | "/strip_", 328 | "frame/i", 329 | "frame-m", 330 | "verlay-", 331 | "lugin/a", 332 | "server.", 333 | "allery-", 334 | "latform", 335 | "esource", 336 | "efault_", 337 | "emplate", 338 | "treams/", 339 | "ssets/p", 340 | "tyleshe", 341 | "eative/", 342 | "elivera", 343 | "s/beaco", 344 | "ommon/i", 345 | "acebook", 346 | "agead/i", 347 | "frames/", 348 | "xternal", 349 | "frame_a", 350 | "idgetap", 351 | "ccounts", 352 | "frames_", 353 | "nstream", 354 | "om/js/a", 355 | "ogleuse", 356 | "adgets/", 357 | "allery/", 358 | "fpadobj", 359 | "om/lib/", 360 | "ds-min.", 361 | "omponen", 362 | "rontpag", 363 | "azon.co", 364 | "mages/G", 365 | "lients/", 366 | "mages/I", 367 | "dsystem", 368 | "direct.", 369 | "crosoft", 370 | "mmon/re", 371 | "mepage/", 372 | "ogleads", 373 | "g/js/gp", 374 | "alytics", 375 | "ads.g.d", 376 | "bleclic", 377 | "beacon.", 378 | "t/pubad", 379 | "?callba", 380 | "llback=", 381 | "mepage_", 382 | "commend", 383 | "mpad/ad", 384 | "gle.com", 385 | "gead/ex", 386 | "gead/js", 387 | "gead/ga", 388 | "hoo.com", 389 | "g.com/z", 390 | "ntent/s", 391 | "ntent_i", 392 | "ntent-a", 393 | "g.com/r", 394 | "ntent/b", 395 | "ntent/a", 396 | "ntrolle", 397 | "ntent/i", 398 | "erver.y", 399 | "atform_", 400 | "ugins/a", 401 | "mplate-", 402 | "mplates", 403 | "yleshee", 404 | "g.com/a", 405 | "ternal_", 406 | "dgetapi", 407 | "gleuser", 408 | "counts/", 409 | "padobje", 410 | "mponent", 411 | "mplate/", 412 | "ontpage", 413 | "zon.com", 414 | "mon/res", 415 | "gleadse", 416 | "/js/gpt", 417 | "ds.g.do", 418 | "leclick", 419 | "pubads_", 420 | "ommenda", 421 | "pad/ads", 422 | "le.com/", 423 | "ead/exp", 424 | "ead/js/", 425 | "ead/gad", 426 | "oo.com/", 427 | ".com/zz", 428 | ".com/rq", 429 | "tent/ad", 430 | "troller", 431 | "rver.ya", 432 | "lesheet", 433 | "leuserc", 434 | "adobjec", 435 | "ponent/", 436 | ".com/a/", 437 | "on.com/", 438 | "n.com/a", 439 | "leadser", 440 | "s.g.dou", 441 | "eclick.", 442 | "ubads_i", 443 | "mmendat", 444 | "ad/ads?", 445 | "e.com/a", 446 | "ad/expa", 447 | "ad/gadg", 448 | "com/zz/", 449 | "com/rq/", 450 | "ver.yah", 451 | "ad/js/l", 452 | "eclick/", 453 | "euserco", 454 | "dobject", 455 | "com/a/1", 456 | "eadserv", 457 | ".g.doub", 458 | "click.n", 459 | "bads_im", 460 | "mendati", 461 | "d/ads?g", 462 | "ww.goog", 463 | "com/ads", 464 | "d/expan", 465 | "d/gadge", 466 | "om/zz/c", 467 | "om/rq/d", 468 | "er.yaho", 469 | "d/js/li", 470 | "d/ads?a", 471 | "usercon", 472 | "object.", 473 | "adservi", 474 | "lick.ne", 475 | "ads_imp", 476 | "endatio", 477 | "/ads?gd", 478 | "w.googl", 479 | "om/ads/", 480 | "/expans", 481 | "m/zz/co", 482 | "m/rq/da", 483 | "r.yahoo", 484 | "/js/lid", 485 | "/ads?ad", 486 | "sercont", 487 | "bject.j", 488 | "dservic", 489 | "ick.net", 490 | "ds_impl", 491 | "ndation", 492 | "ads?gdf", 493 | "expansi", 494 | ".yahoo.", 495 | "js/lida", 496 | "ads?ad_", 497 | "e.com/d", 498 | "erconte", 499 | "ject.js", 500 | "service", 501 | "ck.net/", 502 | "s_impl_", 503 | "dations", 504 | "ds?gdfp", 505 | "xpansio", 506 | "o.com/a", 507 | "s/lidar", 508 | "ds?ad_r", 509 | ".com/do", 510 | "rconten", 511 | "ervices", 512 | "partner", 513 | "k.net/g", 514 | "s?gdfp_", 515 | "pansion", 516 | ".com/a?", 517 | "/lidar.", 518 | "s?ad_ru", 519 | "com/dou", 520 | "ontent.", 521 | "rvices.", 522 | "artner.", 523 | ".net/ga", 524 | "?gdfp_r", 525 | "ansion_", 526 | "lidar.j", 527 | "?ad_rul", 528 | "om/doub", 529 | "ntent.c", 530 | "vices.c", 531 | "rtner.g", 532 | "net/gam", 533 | "gdfp_re", 534 | "nsion_e", 535 | "idar.js", 536 | "ad_rule", 537 | "m/doubl", 538 | "tent.co", 539 | "ices.co", 540 | "tner.go", 541 | "et/gamp", 542 | "dfp_req", 543 | "pagead2", 544 | "sion_em", 545 | "d_rule=", 546 | "ent.com", 547 | "ces.com", 548 | "ner.goo", 549 | "t/gampa", 550 | "fp_req=", 551 | "agead2.", 552 | "ion_emb", 553 | "es.com/", 554 | "er.goog", 555 | "p_req=1", 556 | "gead2.g", 557 | "on_embe", 558 | "r.googl", 559 | "_req=1&", 560 | "ead2.go", 561 | "n_embed", 562 | "s.com/g", 563 | "ad2.goo", 564 | "_embed.", 565 | ".com/gp", 566 | "d2.goog", 567 | "embed.j", 568 | "com/gpt", 569 | "2.googl", 570 | "mbed.js", 571 | "om/gpt/", 572 | "googles", 573 | "m/gpt/p", 574 | "ooglesy", 575 | "oglesyn", 576 | "glesynd", 577 | "lesyndi", 578 | "esyndic", 579 | "syndica", 580 | "yndicat", 581 | "ndicati", 582 | "dicatio", 583 | "ication", 584 | "cation.", 585 | "ation.c", 586 | "tion.co", 587 | "ion.com", 588 | "n.com/p", 589 | ".com/pa", 590 | "com/pag", 591 | "om/page", 592 | "m/pagea", 593 | }; 594 | 595 | const char *badSubstrings[] = {"http", "www" }; 596 | -------------------------------------------------------------------------------- /test/js/matchingTest.js: -------------------------------------------------------------------------------- 1 | /* This Source Code Form is subject to the terms of the Mozilla Public 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 | * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 | /* global describe, before, it */ 5 | 6 | const assert = require('assert') 7 | const {AdBlockClient} = require('../..') 8 | const {FilterOptions} = require('../..') 9 | 10 | describe('matching', function () { 11 | describe('strange rules', function () { 12 | it('can start with a slash', function () { 13 | const client = new AdBlockClient() 14 | client.parse('/banner1.gif\n') 15 | assert(client.matches('http://www.brianbondy.com/public/banner1.gif', FilterOptions.image, 'slashdot.org')) 16 | assert(client.matches('http://www.brianbondy.com/public//banner1.gif', FilterOptions.image, 'slashdot.org')) 17 | assert(!client.matches('http://www.brianbondy.com/public/abanner1.gif', FilterOptions.image, 'slashdot.org')) 18 | }) 19 | 20 | it('can start with a star', function () { 21 | const client = new AdBlockClient() 22 | client.parse('*/b/ss/*&aqe=$image,redirect=1x1-transparent.gif\n') 23 | assert(!client.matches('https://i.ytimg.com/vi/hxUAntt1z2c/hqdefault.jpg?custom=true&w=320&h=180&stc=true&jpg444=true&jpgq=90&sp=68&sigh=YNApXAOpSSoeEmCpXSLQwjnOdwY', FilterOptions.image, 'slashdot.org')) 24 | assert(client.matches('https://i.ytimg.com/b/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 25 | assert(client.matches('https://i.ytimg.com/a/b/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 26 | assert(!client.matches('https://i.ytimg.com/b/a/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 27 | }) 28 | it('can start with multiple stars', function () { 29 | const client = new AdBlockClient() 30 | client.parse('***/b/ss/*&aqe=$image,redirect=1x1-transparent.gif\n') 31 | assert(!client.matches('https://i.ytimg.com/vi/hxUAntt1z2c/hqdefault.jpg?custom=true&w=320&h=180&stc=true&jpg444=true&jpgq=90&sp=68&sigh=YNApXAOpSSoeEmCpXSLQwjnOdwY', FilterOptions.image, 'slashdot.org')) 32 | assert(client.matches('https://i.ytimg.com/b/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 33 | assert(client.matches('https://i.ytimg.com/a/b/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 34 | assert(!client.matches('https://i.ytimg.com/b/a/ss/hxUAntt1z2c/hqdefault.jpg?a&aqe=$image,redirect=1x1-transparent.gif&dd', FilterOptions.image, 'slashdot.org')) 35 | }) 36 | it('can have multiple stars in the middle', function () { 37 | const client = new AdBlockClient() 38 | client.parse('a/********b\n') 39 | assert(client.matches('https://i.ytimg.com/a/d/e/f/b', FilterOptions.noFilterOption, 'slashdot.org')) 40 | assert(client.matches('https://i.ytimg.com/a/d/e/fb', FilterOptions.noFilterOption, 'slashdot.org')) 41 | assert(!client.matches('https://i.ytimg.com/a/d/e/fd', FilterOptions.noFilterOption, 'slashdot.org')) 42 | }) 43 | it('can block everything with a single *', function () { 44 | const client = new AdBlockClient() 45 | client.parse('*') 46 | assert(client.matches('https://brianbondy.com/d', FilterOptions.noFilterOption, 'slashdot.org')) 47 | assert(client.matches('https://brianbondy.com', FilterOptions.noFilterOption, 'slashdot.org')) 48 | }) 49 | it('can have no rule data', function () { 50 | const client = new AdBlockClient() 51 | client.parse('') 52 | assert(!client.matches('https://brianbondy.com/d', FilterOptions.noFilterOption, 'slashdot.org')) 53 | assert(!client.matches('https://brianbondy.com', FilterOptions.noFilterOption, 'slashdot.org')) 54 | }) 55 | it('can have rule data with just a ^', function () { 56 | const client = new AdBlockClient() 57 | client.parse('^') 58 | assert(!client.matches('https://brianbondy.com', FilterOptions.noFilterOption, 'slashdot.org')) 59 | assert(!client.matches('https://brianbondy.com', FilterOptions.noFilterOption, 'slashdot.org')) 60 | }) 61 | }) 62 | describe('host anchored exception with matching first party exception', function () { 63 | before(function () { 64 | this.client = new AdBlockClient() 65 | this.client.parse('-google-analytics.\n@@||www.scrumpoker.online^$~third-party') 66 | }) 67 | it('does not match', function () { 68 | assert(!this.client.matches('https://www.scrumpoker.online/js/angular-google-analytics.js', FilterOptions.script, 'www.scrumpoker.online')) 69 | }) 70 | it('detects as a hash set save', function () { 71 | assert.equal(this.client.getMatchingStats().numExceptionHashSetSaves, 1) 72 | }) 73 | }) 74 | describe('host anchored exception with not matching first party exception', function () { 75 | before(function () { 76 | this.client = new AdBlockClient() 77 | this.client.parse('-google-analytics.\n@@||www.scrumpoker.online^$~third-party') 78 | }) 79 | it('does match', function () { 80 | assert(this.client.matches('https://www.scrumpoker.online/js/angular-google-analytics.js', FilterOptions.script, 'www.brianbondy.com')) 81 | }) 82 | it('detects as a hash set save', function () { 83 | assert.equal(this.client.getMatchingStats().numExceptionHashSetSaves, 1) 84 | }) 85 | }) 86 | describe('no-fingerprint rules', function () { 87 | it('can match against a no-fingerprint rule', function () { 88 | const client = new AdBlockClient() 89 | client.parse('adv') 90 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 91 | assert(!client.matches('https://brianbondy.com/omg', FilterOptions.noFilterOption, 'slashdot.org')) 92 | }) 93 | it('can match against a no-fingerprint domain only rule', function () { 94 | const client = new AdBlockClient() 95 | client.parse('adv$domain=brianbondy.com') 96 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 97 | assert(client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 98 | assert(!client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 99 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'digg.com')) 100 | }) 101 | it('can match against a no-fingerprint anti-domain only rule', function () { 102 | const client = new AdBlockClient() 103 | client.parse('adv$domain=~brianbondy.com') 104 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 105 | assert(!client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 106 | assert(client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 107 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'digg.com')) 108 | }) 109 | it('can match against a no-fingerprint domain only exception rule', function () { 110 | const client = new AdBlockClient() 111 | client.parse('adv\n@@adv$domain=brianbondy.com') 112 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 113 | assert(!client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'brianbondy.com')) 114 | assert(client.matches('https://digg.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 115 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'digg.com')) 116 | }) 117 | }) 118 | describe('findMatchingFilters return values', function () { 119 | before(function () { 120 | this.client = new AdBlockClient() 121 | this.client.parse('/pubads_\n@@||fastly.net/ad2/$image,script,xmlhttprequest\n.net/ad2/\n@@||fastly.net/ad2/$image,script,xmlhttprequest') 122 | }) 123 | it('match', function () { 124 | const queryResult = this.client.findMatchingFilters('https://securepubads.g.doubleclick.net/gpt/pubads_impl_rendering_193.js?cb=194', FilterOptions.script, 'www.cnn.com') 125 | assert.equal(queryResult.matches, true) 126 | assert.equal(queryResult.matchingFilter, '/pubads_') 127 | }) 128 | it('miss', function () { 129 | const queryResult = this.client.findMatchingFilters('https://cdn.cnn.com/cnn/.e1mo/img/4.0/logos/menu_entertainment.png', FilterOptions.image, 'www.cnn.com') 130 | assert.equal(queryResult.matches, false) 131 | }) 132 | it('whitelisted', function () { 133 | const queryResult = this.client.findMatchingFilters('https://0914.global.ssl.fastly.net/ad2/script/x.js?cb=1523383475084', FilterOptions.script, 'www.cnn.com') 134 | assert.equal(queryResult.matches, false) 135 | assert.equal(queryResult.matchingFilter, '.net/ad2/') 136 | assert.equal(queryResult.matchingExceptionFilter, 'fastly.net/ad2/') 137 | }) 138 | }) 139 | describe('Filters with unknown options are ignored', function () { 140 | it('known unsupported options are not blocked', function () { 141 | const client = new AdBlockClient() 142 | client.parse('adv$ping') 143 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 144 | }) 145 | it('CSPs are ignored', function () { 146 | const client = new AdBlockClient() 147 | client.parse('adv$csp=script-src \'self\' \'unsafe-inline\' \'unsafe-eval\' data: *.google.com *.gstatic.com *.google-analytics.com') 148 | assert(!client.matches('!https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 149 | }) 150 | it('unknown unsupported options are not blocked', function () { 151 | const client = new AdBlockClient() 152 | client.parse('adv$somethingnew=3') 153 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 154 | }) 155 | it('redirects are still blocked', function () { 156 | const client = new AdBlockClient() 157 | client.parse('adv$image,redirect=1x1-transparent.gif&dd') 158 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.image, 'slashdot.org')) 159 | }) 160 | }) 161 | describe('Type option matching', function () { 162 | describe('font', function () { 163 | it('option matches for no resource type filters', function () { 164 | const client = new AdBlockClient() 165 | client.parse('adv') 166 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.font, 'slashdot.org')) 167 | }) 168 | it('option matches for same resource type', function () { 169 | const client = new AdBlockClient() 170 | client.parse('adv$font') 171 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.font, 'slashdot.org')) 172 | }) 173 | it('doesn\'t matche when resource type differs', function () { 174 | const client = new AdBlockClient() 175 | client.parse('adv$font') 176 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.image, 'slashdot.org')) 177 | }) 178 | it('option matches for same resource type for rule with multiple types', function () { 179 | const client = new AdBlockClient() 180 | client.parse('adv$font,image,script') 181 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.font, 'slashdot.org')) 182 | }) 183 | it('option matches for rule without options', function () { 184 | const client = new AdBlockClient() 185 | client.parse('adv') 186 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.font, 'slashdot.org')) 187 | }) 188 | }) 189 | describe('other', function () { 190 | it('option matches for rule without options', function () { 191 | const client = new AdBlockClient() 192 | client.parse('adv') 193 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.other, 'slashdot.org')) 194 | }) 195 | }) 196 | describe('document', function () { 197 | it('should not match when filter rule has no type', function () { 198 | const client = new AdBlockClient() 199 | client.parse('adv') 200 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.document, 'slashdot.org')) 201 | }) 202 | it('should not match when filter rule is an image', function () { 203 | const client = new AdBlockClient() 204 | client.parse('adv$image') 205 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.document, 'slashdot.org')) 206 | }) 207 | it('should match when filter rule is a document', function () { 208 | const client = new AdBlockClient() 209 | client.parse('adv$document') 210 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.document, 'slashdot.org')) 211 | }) 212 | it('filter rule that is for documents should not match other types', function () { 213 | const client = new AdBlockClient() 214 | client.parse('adv$document') 215 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.other, 'slashdot.org')) 216 | }) 217 | }) 218 | describe('noFilterOption', function () { 219 | it('should not match when filter rule has no resource type', function () { 220 | const client = new AdBlockClient() 221 | client.parse('adv') 222 | assert(client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 223 | }) 224 | it('should not match when filter rule has a resource type', function () { 225 | const client = new AdBlockClient() 226 | client.parse('adv$image') 227 | assert(!client.matches('https://brianbondy.com/adv', FilterOptions.noFilterOption, 'slashdot.org')) 228 | }) 229 | }) 230 | }) 231 | describe('left anchored exception filter', function () { 232 | describe('simple block match', function () { 233 | before(function () { 234 | this.client = new AdBlockClient() 235 | this.client.parse('|http://baddomain.example/') 236 | }) 237 | it('matches exactly from the left', function () { 238 | assert(this.client.matches('http://baddomain.example/banner.gif', FilterOptions.image, 'http://baddomain.example/')) 239 | }) 240 | it('does not match if scheme does not match', function () { 241 | assert(!this.client.matches('https://baddomain.example/banner.gif', FilterOptions.image, 'http://baddomain.example/')) 242 | }) 243 | it('does not match not at start', function () { 244 | assert(!this.client.matches('http://gooddomain.example/analyze?http://baddomain.example', FilterOptions.image, 'http://baddomain.example/')) 245 | }) 246 | }) 247 | }) 248 | }) 249 | -------------------------------------------------------------------------------- /bad_fingerprints5.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2015 Brian R. Bondy. Distributed under the MPL2 license. 2 | * This Source Code Form is subject to the terms of the Mozilla Public 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 | 6 | #pragma once 7 | /** 8 | * 9 | * Auto generated bad filters 10 | */ 11 | const char *badFingerprints[] = { 12 | "ttp:/", 13 | "/walm", 14 | ".com/", 15 | ".html", 16 | "/micr", 17 | "/ajax", 18 | "/jQue", 19 | "/jque", 20 | ".mobi", 21 | "/libr", 22 | "/brok", 23 | "/acs/", 24 | "/acco", 25 | "/js/w", 26 | "ttps:", 27 | "/asse", 28 | "/ones", 29 | "/comm", 30 | "/resp", 31 | "/styl", 32 | "/js/s", 33 | "/gene", 34 | "/stor", 35 | "/js/b", 36 | "/scri", 37 | "/Stat", 38 | "/Comm", 39 | "/js/l", 40 | "/lib/", 41 | "/prod", 42 | "/stat", 43 | "stati", 44 | "/tags", 45 | "/api/", 46 | "/ligh", 47 | "/icon", 48 | "/home", 49 | ".gif?", 50 | "/coll", 51 | "/site", 52 | ".link", 53 | ".org/", 54 | "/clas", 55 | "/css/", 56 | "/app.", 57 | "/cdn-", 58 | "socia", 59 | "/soci", 60 | "/play", 61 | "/serv", 62 | "/load", 63 | "/auto", 64 | "fresh", 65 | ".goog", 66 | "/page", 67 | "/conv", 68 | "/widg", 69 | "/cdn.", 70 | "/tag/", 71 | "/js/g", 72 | "_widg", 73 | "/goog", 74 | "googl", 75 | "g.dou", 76 | ".net/", 77 | "/view", 78 | "searc", 79 | "/beac", 80 | "/jobs", 81 | "/prov", 82 | "/gpt/", 83 | "/puba", 84 | "pubad", 85 | "/json", 86 | "_home", 87 | "/get_", 88 | "&call", 89 | "about", 90 | "games", 91 | "-book", 92 | "video", 93 | "-sour", 94 | "-show", 95 | "world", 96 | "china", 97 | "-your", 98 | "music", 99 | "/gamp", 100 | "/ads?", 101 | "=728x", 102 | "/safe", 103 | "/html", 104 | "/cont", 105 | "/ads/", 106 | "/user", 107 | "doubl", 108 | "_300x", 109 | "300x2", 110 | "-300x", 111 | "/js/r", 112 | "/clie", 113 | ".min.", 114 | "data.", 115 | "/news", 116 | "/zz/c", 117 | "/comb", 118 | "/medi", 119 | "media", 120 | "/p/co", 121 | "/over", 122 | "class", 123 | "/ads-", 124 | "yimg.", 125 | "-cont", 126 | "_inde", 127 | "/lead", 128 | "/skin", 129 | "/them", 130 | "/base", 131 | "-imag", 132 | "image", 133 | "/regi", 134 | "-load", 135 | "/meta", 136 | "-sear", 137 | "_sear", 138 | "/even", 139 | "/sele", 140 | "/node", 141 | "/plug", 142 | "-stri", 143 | "/os/m", 144 | "/trac", 145 | "/subs", 146 | "/imag", 147 | "/head", 148 | "/tool", 149 | "/chec", 150 | "/rq/d", 151 | "/darl", 152 | "/metr", 153 | "/defa", 154 | "/desk", 155 | "/stre", 156 | "strea", 157 | "_site", 158 | "_ads_", 159 | "/buil", 160 | "/vide", 161 | "/publ", 162 | "/perf", 163 | "-scro", 164 | "/ad-c", 165 | "/inte", 166 | "inter", 167 | "/inst", 168 | "/data", 169 | "-ifra", 170 | "-prom", 171 | "promo", 172 | "-skin", 173 | "skin-", 174 | "/colo", 175 | "/anim", 176 | ".adse", 177 | "/tran", 178 | "/gall", 179 | "/modu", 180 | "_plat", 181 | "/reso", 182 | "/edit", 183 | "/badg", 184 | "/temp", 185 | "-temp", 186 | "&l=VI", 187 | "-api.", 188 | "ads.y", 189 | "/bann", 190 | "banne", 191 | "://pr", 192 | "/secu", 193 | "/true", 194 | ".ads.", 195 | "/crea", 196 | "creat", 197 | "stick", 198 | "_yaho", 199 | "?pid=", 200 | "/deli", 201 | "/js/e", 202 | "/js/p", 203 | "/pers", 204 | "/js/m", 205 | "/menu", 206 | "/logo", 207 | "/loca", 208 | "/js/t", 209 | "/time", 210 | "/sign", 211 | "/js/c", 212 | "/js/d", 213 | "/disc", 214 | "/top-", 215 | "/top_", 216 | "/js/h", 217 | "/sear", 218 | "/code", 219 | "/movi", 220 | "movie", 221 | "/prev", 222 | "/foot", 223 | "/shar", 224 | "/exte", 225 | "_ifra", 226 | "/face", 227 | ".php/", 228 | "-page", 229 | "://ad", 230 | "/adi/", 231 | "/com.", 232 | ";ord=", 233 | "/ifra", 234 | "/doub", 235 | "ad.do", 236 | "/feed", 237 | "/inde", 238 | "/ad_d", 239 | "_host", 240 | "/cust", 241 | "/post", 242 | "/embe", 243 | "&show", 244 | "/ad_s", 245 | "_stat", 246 | "/js/a", 247 | "/api.", 248 | "://oa", 249 | "/gadg", 250 | "/core", 251 | "/yaho", 252 | "/sda/", 253 | "/td-a", 254 | "/app-", 255 | "/rout", 256 | "-gall", 257 | "-foot", 258 | "/eng/", 259 | "/yfpa", 260 | "feedb", 261 | "/ad_t", 262 | "/2015", 263 | "/alog", 264 | "/v5/a", 265 | "/main", 266 | "/res/", 267 | ".php?", 268 | "&adst", 269 | "_1000", 270 | "&prm=", 271 | "/newf", 272 | "/dp.h", 273 | "/worl", 274 | "/glob", 275 | "/layo", 276 | "-floo", 277 | "cover", 278 | "/geti", 279 | "/new_", 280 | "?modu", 281 | "/util", 282 | "/tpl/", 283 | "/comp", 284 | "://a.", 285 | "/proc", 286 | "/reco", 287 | "&type", 288 | "/mini", 289 | "_repo", 290 | "/init", 291 | "/bund", 292 | "/fron", 293 | "/boot", 294 | "/anal", 295 | "/live", 296 | ".aspx", 297 | "/amaz", 298 | "_engi", 299 | "rotat", 300 | "//ima", 301 | "/redi", 302 | "/Clie", 303 | "-adsy", 304 | "_adco", 305 | "tp://", 306 | "walma", 307 | "redir", 308 | "micro", 309 | "com/a", 310 | "ajax/", 311 | "jQuer", 312 | "jquer", 313 | "mobil", 314 | "com/l", 315 | "libra", 316 | "broke", 317 | "accou", 318 | "tps:/", 319 | "asset", 320 | "store", 321 | "css/s", 322 | "commo", 323 | "respo", 324 | "style", 325 | "js/sh", 326 | "gener", 327 | "servi", 328 | "scrip", 329 | "Stati", 330 | "tatic", 331 | "Commo", 332 | "js/li", 333 | "produ", 334 | "com/s", 335 | "com/j", 336 | "light", 337 | "homep", 338 | "page/", 339 | "site/", 340 | "lassi", 341 | "css/a", 342 | "minif", 343 | "ocial", 344 | "playe", 345 | "com/v", 346 | "serve", 347 | "oogle", 348 | "leads", 349 | "adser", 350 | "com/p", 351 | "pagea", 352 | "conve", 353 | "widge", 354 | "org/e", 355 | "tag/j", 356 | "analy", 357 | "com/g", 358 | "com/m", 359 | ".doub", 360 | "ouble", 361 | "click", 362 | "earch", 363 | "com/b", 364 | "beaco", 365 | "org/a", 366 | "stora", 367 | "provi", 368 | "gpt/p", 369 | "ubads", 370 | "callb", 371 | "jsonp", 372 | "defau", 373 | "recom", 374 | "comme", 375 | "share", 376 | "story", 377 | "sourc", 378 | "show-", 379 | "secur", 380 | "gampa", 381 | "728x9", 382 | "html/", 383 | "net/p", 384 | "clien", 385 | "00x25", 386 | "embed", 387 | "contr", 388 | "bundl", 389 | "min.j", 390 | "news.", 391 | "yahoo", 392 | "zz/co", 393 | "combo", 394 | "edia/", 395 | "conte", 396 | "deskt", 397 | "p/com", 398 | "ads/a", 399 | "ads-m", 400 | "img.c", 401 | "index", 402 | "news/", 403 | "overl", 404 | "skins", 405 | "theme", 406 | "css/t", 407 | "event", 408 | "custo", 409 | "core/", 410 | "core-", 411 | "selec", 412 | "plugi", 413 | "repor", 414 | "track", 415 | "magel", 416 | "boots", 417 | "heade", 418 | "toolb", 419 | "check", 420 | "nter/", 421 | "rq/da", 422 | "darla", 423 | "metro", 424 | "tream", 425 | "site_", 426 | "news_", 427 | "build", 428 | "apple", 429 | "ads-p", 430 | "ideo.", 431 | "flash", 432 | "publi", 433 | "Clien", 434 | "scrol", 435 | "strip", 436 | "instr", 437 | "menta", 438 | "load-", 439 | "ifram", 440 | "skin/", 441 | "color", 442 | "site-", 443 | "trans", 444 | "galle", 445 | "modul", 446 | "platf", 447 | "resou", 448 | "sport", 449 | "useri", 450 | "badge", 451 | "utils", 452 | "json/", 453 | "view/", 454 | "view-", 455 | "compo", 456 | "templ", 457 | "l=VID", 458 | "ideo-", 459 | "ideo/", 460 | "type=", 461 | "site=", 462 | "main/", 463 | "anner", 464 | "reati", 465 | "ticke", 466 | "deliv", 467 | "liver", 468 | "js/ga", 469 | "logo_", 470 | "local", 471 | "link/", 472 | "js/be", 473 | "js/to", 474 | "top_b", 475 | "home_", 476 | "js/se", 477 | "previ", 478 | "foote", 479 | "exter", 480 | "faceb", 481 | "com/?", 482 | "//ad.", 483 | "home/", 484 | "ads?a", 485 | "d.dou", 486 | "ad_da", 487 | "ads/c", 488 | "com/i", 489 | "menu/", 490 | "ad_st", 491 | "gadge", 492 | "route", 493 | "main-", 494 | "page-", 495 | "yfpad", 496 | "usera", 497 | "ad_ti", 498 | "php?c", 499 | "amese", 500 | "newfo", 501 | "dp.ht", 502 | "globa", 503 | "layou", 504 | "logo.", 505 | "floor", 506 | "disco", 507 | "conta", 508 | "exten", 509 | "proce", 510 | "mini-", 511 | "front", 512 | "live.", 513 | "com/d", 514 | "html?", 515 | "amazo", 516 | "mages", 517 | "engin", 518 | "adsys", 519 | "adcon", 520 | "almar", 521 | "edire", 522 | "icros", 523 | "Query", 524 | "query", 525 | "obile", 526 | "om/li", 527 | "ibrar", 528 | "roker", 529 | "p://c", 530 | "ccoun", 531 | "ps://", 532 | "ssets", 533 | "ommon", 534 | "espon", 535 | "tyles", 536 | "enera", 537 | "ervic", 538 | "tore/", 539 | "cript", 540 | "atic/", 541 | "roduc", 542 | "om/js", 543 | "ontro", 544 | "omepa", 545 | "cial/", 546 | "p://p", 547 | "layer", 548 | "erve/", 549 | "oglea", 550 | "dserv", 551 | "agead", 552 | "onver", 553 | "idget", 554 | "ag/js", 555 | "ogle-", 556 | "nalyt", 557 | "om/ga", 558 | "p://j", 559 | "erve.", 560 | "ublec", 561 | "arch.", 562 | "eacon", 563 | "torag", 564 | "rovid", 565 | "pt/pu", 566 | "allba", 567 | "efaul", 568 | "ecomm", 569 | "ommen", 570 | "ource", 571 | "ecure", 572 | "bads.", 573 | "ampad", 574 | "28x90", 575 | "0x250", 576 | "frame", 577 | "ogle.", 578 | "lient", 579 | "undle", 580 | "in.js", 581 | "ahoo.", 582 | "p://l", 583 | "z/com", 584 | "ombo?", 585 | "onten", 586 | "eskto", 587 | "ds/ad", 588 | "ds-mi", 589 | "mg.co", 590 | "ndex/", 591 | "eneri", 592 | "verla", 593 | "kins/", 594 | "hare-", 595 | "dia/t", 596 | "hemes", 597 | "dia/p", 598 | "arch/", 599 | "arch-", 600 | "ss/sk", 601 | "ustom", 602 | "elect", 603 | "tyle/", 604 | "lugin", 605 | "tatio", 606 | "eport", 607 | "ootst", 608 | "ore/a", 609 | "eader", 610 | "oolba", 611 | "q/dar", 612 | "arla/", 613 | "heme/", 614 | "uild/", 615 | "ublic", 616 | "croll", 617 | "vents", 618 | "const", 619 | "sonp/", 620 | "aller", 621 | "ite/g", 622 | "ontri", 623 | "odule", 624 | "latfo", 625 | "esour", 626 | "adge_", 627 | "iew/a", 628 | "empla", 629 | "age/a", 630 | "reams", 631 | "deo-a", 632 | "iew/t", 633 | "ublis", 634 | "eativ", 635 | "icker", 636 | "dia/s", 637 | "elive", 638 | "ivera", 639 | "ogo_d", 640 | "s/bea", 641 | "op_ba", 642 | "arch_", 643 | "om/pr", 644 | "revie", 645 | "ooter", 646 | "xtern", 647 | "atic.", 648 | "acebo", 649 | "ome/_", 650 | "ndex.", 651 | "om/ad", 652 | "d_dat", 653 | "tyle_", 654 | "mbed/", 655 | "nstre", 656 | "adget", 657 | "ream-", 658 | "outer", 659 | "fpado", 660 | "d_tim", 661 | "ewfor", 662 | "lobal", 663 | "ayout", 664 | "kin/d", 665 | "om/ba", 666 | "ndex-", 667 | "ompon", 668 | "racke", 669 | "roces", 670 | "rontp", 671 | "mazon", 672 | "ages-", 673 | "om/im", 674 | "ages/", 675 | "ngine", 676 | "dsyst", 677 | "lmart", 678 | "direc", 679 | "croso", 680 | "uery/", 681 | "uery-", 682 | "uery.", 683 | "m/lib", 684 | "brary", 685 | "count", 686 | "g/js/", 687 | "mmon/", 688 | "yles/", 689 | "nerat", 690 | "rvice", 691 | "ripts", 692 | "oduct", 693 | "m/js/", 694 | "ntrol", 695 | "mepag", 696 | "g.com", 697 | "ial/j", 698 | "glead", 699 | "gead/", 700 | "ader.", 701 | "gle-a", 702 | "alyti", 703 | "rve.p", 704 | "dgets", 705 | "ads.g", 706 | "blecl", 707 | "rch.c", 708 | "acon.", 709 | "orage", 710 | "ovide", 711 | "t/pub", 712 | "llbac", 713 | "fault", 714 | "mmend", 715 | "ation", 716 | "ivers", 717 | "exper", 718 | "youre", 719 | "mpad/", 720 | "rame/", 721 | "gle.c", 722 | "x250_", 723 | "hoo.c", 724 | "://l.", 725 | "ntent", 726 | "sktop", 727 | "s-min", 728 | "s/ads", 729 | "dex/l", 730 | "neric", 731 | "erlay", 732 | "sets/", 733 | "s/ski", 734 | "emes/", 735 | "ader-", 736 | "rapid", 737 | "ative", 738 | "otstr", 739 | "olbar", 740 | "mmon_", 741 | "ugins", 742 | "ild/j", 743 | "blic/", 744 | "ugin/", 745 | "/stri", 746 | "rame-", 747 | "onstr", 748 | "erver", 749 | "llery", 750 | "ntrib", 751 | "atfor", 752 | "ins/m", 753 | "ins/a", 754 | "eview", 755 | "urce/", 756 | "mplat", 757 | "eams/", 758 | "ylesh", 759 | "blish", 760 | "cure/", 761 | "x250.", 762 | "acon/", 763 | "oter-", 764 | "rames", 765 | "ceboo", 766 | "rame_", 767 | "rame.", 768 | "ramed", 769 | "ient/", 770 | "_data", 771 | "terna", 772 | "dgeta", 773 | "m/gad", 774 | "lecti", 775 | "padob", 776 | "_time", 777 | "obal/", 778 | "obal-", 779 | "dule/", 780 | "dule-", 781 | "mpone", 782 | "ocess", 783 | "ontpa", 784 | "azon.", 785 | "ges-a", 786 | "ges/G", 787 | "ients", 788 | "ges/I", 789 | "azon-", 790 | "syste", 791 | "irect", 792 | "rosof", 793 | "rary/", 794 | "mon/r", 795 | "erate", 796 | "ipts/", 797 | "epage", 798 | "ery.j", 799 | "eadse", 800 | "vices", 801 | "js/gp", 802 | "lytic", 803 | "ds.g.", 804 | "lecli", 805 | "vider", 806 | "bads_", 807 | "lback", 808 | "menda", 809 | "tions", 810 | "cessa", 811 | "pad/a", 812 | "le.co", 813 | "ead/e", 814 | "ead/j", 815 | "ead/g", 816 | "gets/", 817 | "ets/h", 818 | "oo.co", 819 | "com/z", 820 | "tent/", 821 | "ktop-", 822 | "tion/", 823 | "tent-", 824 | "ent/s", 825 | "tent_", 826 | "lead-", 827 | "rlay/", 828 | "ets/s", 829 | "rlay.", 830 | "-min.", 831 | "tive/", 832 | "tstra", 833 | "tribu", 834 | "mon/a", 835 | "com/r", 836 | "ault/", 837 | "gins/", 838 | "ld/js", 839 | "troll", 840 | "trip_", 841 | "nstru", 842 | "ame/i", 843 | "ame-m", 844 | "ents/", 845 | "rlay-", 846 | "gin/a", 847 | "rver.", 848 | "lery-", 849 | "tform", 850 | "ault_", 851 | "plate", 852 | "ets/a", 853 | "ets/p", 854 | "leshe", 855 | "lishe", 856 | "mon/i", 857 | "ebook", 858 | "ame.h", 859 | "ead/i", 860 | "ames/", 861 | "ernal", 862 | "ame_a", 863 | "getap", 864 | "ounts", 865 | "ames_", 866 | "ets/j", 867 | "lery/", 868 | "ets/t", 869 | "ectio", 870 | "adobj", 871 | "timer", 872 | "ponen", 873 | "ntpag", 874 | "ystem", 875 | "ducto", 876 | "rect.", 877 | "osoft", 878 | "sets.", 879 | "tore.", 880 | "on/re", 881 | "pts/s", 882 | "lates", 883 | "ices.", 884 | "s/gpt", 885 | "ytics", 886 | "e.com", 887 | "s.g.d", 888 | "eclic", 889 | "ads_i", 890 | "back=", 891 | "page_", 892 | "endat", 893 | "books", 894 | "ction", 895 | "ad/ad", 896 | "ad/ex", 897 | "ad/js", 898 | "d/js/", 899 | "ad/ga", 900 | "o.com", 901 | "om/zz", 902 | "ent_i", 903 | "ent-a", 904 | "strap", 905 | "ribut", 906 | "om/rq", 907 | "ent/b", 908 | "ent/a", 909 | "rolle", 910 | "ent/i", 911 | "form_", 912 | "late-", 913 | "eshee", 914 | "top-b", 915 | "ts/js", 916 | "me.ht", 917 | "rnal_", 918 | "etapi", 919 | "unts/", 920 | "lay.j", 921 | "ions/", 922 | "dobje", 923 | "onent", 924 | "late/", 925 | "updat", 926 | "tpage", 927 | "n/res", 928 | "ts/sh", 929 | "ces.c", 930 | "om/pa", 931 | ".g.do", 932 | "lick.", 933 | "e.htm", 934 | "ds_im", 935 | "ndati", 936 | "d/ads", 937 | "m/ads", 938 | "d/exp", 939 | "d/gad", 940 | "m/zz/", 941 | "ts/sk", 942 | "ibute", 943 | "lick/", 944 | "m/rq/", 945 | "nt/ad", 946 | "oller", 947 | "trap/", 948 | "sheet", 949 | "s/lid", 950 | "s/js/", 951 | "ay.js", 952 | "objec", 953 | "nt/ba", 954 | "nent/", 955 | "pdate", 956 | "es.co", 957 | "m/pag", 958 | "ick.n", 959 | "s_imp", 960 | "datio", 961 | "ads?g", 962 | "ww.go", 963 | "/expa", 964 | "/lida", 965 | "ds?ad", 966 | "bject", 967 | "s.com", 968 | "ck.ne", 969 | "_impl", 970 | "ds?gd", 971 | "w.goo", 972 | "expan", 973 | "lidar", 974 | "s?ad_", 975 | "ons/a", 976 | "ject.", 977 | "k.net", 978 | "impl_", 979 | "s?gdf", 980 | "xpans", 981 | "idar.", 982 | "?ad_r", 983 | "om/do", 984 | "xpand", 985 | "ect.j", 986 | "partn", 987 | "net/g", 988 | "?gdfp", 989 | "pansi", 990 | "dar.j", 991 | "ad_ru", 992 | "m/dou", 993 | "pandi", 994 | "ct.js", 995 | "artne", 996 | "et/ga", 997 | "gdfp_", 998 | "ansio", 999 | "ar.js", 1000 | "d_rul", 1001 | "andin", 1002 | "rtner", 1003 | "nding", 1004 | "t/gam", 1005 | "dfp_r", 1006 | "gead2", 1007 | "nsion", 1008 | "_rule", 1009 | "tner.", 1010 | "fp_re", 1011 | "ead2.", 1012 | "sion_", 1013 | "rule=", 1014 | "ner.g", 1015 | "p_req", 1016 | "ad2.g", 1017 | "ion_e", 1018 | "er.go", 1019 | "_req=", 1020 | "d2.go", 1021 | "on_em", 1022 | "r.goo", 1023 | "req=1", 1024 | "2.goo", 1025 | "n_emb", 1026 | "om/gp", 1027 | "eq=1&", 1028 | "ogles", 1029 | "_embe", 1030 | "m/gpt", 1031 | "glesy", 1032 | "mbed.", 1033 | "lesyn", 1034 | "bed.j", 1035 | "esynd", 1036 | "ed.js", 1037 | "syndi", 1038 | "yndic", 1039 | "ndica", 1040 | "dicat", 1041 | "icati", 1042 | "catio", 1043 | "tion.", 1044 | "ion.c", 1045 | "on.co", 1046 | "n.com", 1047 | }; 1048 | 1049 | const char *badSubstrings[] = {"http", "www" }; 1050 | -------------------------------------------------------------------------------- /test/data/ublock-unbreak.txt: -------------------------------------------------------------------------------- 1 | # Needed to unbreak web sites, mostly because of hosts file-based filters: 2 | # Dan Pollock's, MVPS, Peter Lowe's 3 | 4 | # https://twitter.com/JarkkoRytkonen/status/540052376015888385 5 | # https://github.com/chrisaljoudi/uBlock/issues/119 6 | # https://github.com/chrisaljoudi/uBlock/issues/940 7 | # To counter `google-analytics.com` in Peter Lowe's list 8 | @@||google-analytics.com^$domain=avianca.com|jackbox.tv|newegg.com|bikstok.sonymusicshop.dk 9 | 10 | # From Chrome store: "Go to site http://www.vd.nl/ ... links from dropdown top menu do not work" 11 | # To counter Dan Pollock's, hpHosts, MVPS, Peter Lowe's 12 | @@||google-analytics.com/analytics.js$script,domain=burodecredito.com.mx|vd.nl 13 | 14 | # https://github.com/gorhill/uBlock/issues/1164 15 | # GA plugins are pulled if and only if `analytics.js` was already allowed in 16 | # the first place: it is thus safe to create a global exception filter for 17 | # all GA plugins. 18 | @@||google-analytics.com/plugins/$script 19 | 20 | # https://github.com/gorhill/uBlock/issues/1020 21 | @@||www.google-analytics.com/ga.js$script,domain=santander.com.mx|tv2.no 22 | 23 | # https://github.com/gorhill/uBlock/issues/939 24 | @@||google-analytics.com/urchin.js$script,domain=live.euroleague.net 25 | 26 | # https://github.com/chrisaljoudi/uBlock/issues/63 27 | @@||adf.ly^$~third-party 28 | # https://github.com/gorhill/uBlock/issues/1131 29 | @@||adf.ly^$domain=j.gs 30 | 31 | # https://github.com/chrisaljoudi/uBlock/issues/69 32 | @@||widget-cdn.rpxnow.com^$domain=foxnews.com,script 33 | 34 | # https://github.com/chrisaljoudi/uBlock/issues/141 35 | # Videos broken on weather.com 36 | # To counter `tags.tiqcdn.com` in  MVPS 37 | @@||tags.tiqcdn.com/utag/*.js$script,domain=weather.com 38 | 39 | # http://forums.lanik.us/viewtopic.php?f=64&t=17842&sid=46bd7065193153fe46efe816b7cd32c9&start=30#p59663 40 | # To counter `statcounter.com` in Peter Lowe's, MVPS, Dan Pollock's and hpHosts 41 | @@||statcounter.com^$~third-party 42 | 43 | # https://github.com/chrisaljoudi/uBlock/issues/166 44 | # To counter `liverail.com` in Peter Lowe's, MVPS, and hpHosts 45 | @@||cdn-static.liverail.com^$object-subrequest 46 | 47 | # https://github.com/chrisaljoudi/uBlock/issues/580 48 | # To counter `liverail.com` in Dan Pollock's, hpHosts, MVPS, Peter Lowe's 49 | @@||cdn-static.liverail.com$domain=9to5mac.com 50 | 51 | # https://github.com/gorhill/uBlock/issues/294 52 | # To counter `liverail.com` in hpHosts, Peter Lowe 53 | # To counter `vox-static.liverail.com` in Dan Pollock, hpHosts, MVPS 54 | @@||vox-static.liverail.com/swf/$object-subrequest,domain=cdnapi.kaltura.com 55 | 56 | # cnet.com needs this for video players 57 | # https://github.com/chrisaljoudi/uBlock/issues/227 58 | # To counter `2mdn.net` in Peter Lowe's 59 | # To counter `s0.2mdn.net` in MVPS, hpHosts, Dan Pollock's 60 | @@||s0.2mdn.net/instream/*$domain=cnet.com|nfl.com|wistv.com 61 | @@||s0.2mdn.net/instream/flash/v3/adsapi_3.swf$object 62 | 63 | # https://github.com/chrisaljoudi/uBlock/issues/243 64 | # To counter `cdn-i.dmdentertainment.com` in hpHosts 65 | @@||cdn-i.dmdentertainment.com^$domain=cracked.com 66 | 67 | # https://twitter.com/ethanveres/status/527122036649639936 68 | # Fortunately, further connections to `mxpnl.com` are blocked 69 | @@||cdn.vurb.com/*/mixpanel. 70 | 71 | # https://adblockplus.org/forum/viewtopic.php?f=2&t=25772 72 | @@||target.122.2o7.net^$domain=target.com 73 | 74 | # To counter `adm.fwmrm.net` from hpHosts, MVPS, Dan Pollock's 75 | @@||adm.fwmrm.net^$domain=go.com 76 | 77 | # https://github.com/chrisaljoudi/uBlock/issues/274 78 | # To counter `mopub.com` in Peter Lowe's 79 | @@||mopub.com^$~third-party 80 | 81 | # https://github.com/chrisaljoudi/uBlock/issues/293 82 | # To counter `safelinking.net` in hpHosts 83 | @@||safelinking.net^$~third-party 84 | 85 | # https://github.com/chrisaljoudi/uBlock/issues/302 86 | # To counter `outbrain.com` in hpHosts 87 | @@||outbrain.com^$~third-party 88 | 89 | # https://github.com/chrisaljoudi/uBlock/issues/316 90 | # To counter `inc.com` in Peter Lowe's, hpHosts 91 | @@||inc.com^$~third-party 92 | 93 | # https://github.com/chrisaljoudi/uBlock/issues/336 94 | # To counter `flurry.com` in Peter Lowe's, hpHosts, MVPS 95 | @@||flurry.com^$~third-party 96 | 97 | # https://github.com/chrisaljoudi/uBlock/issues/374 98 | # To counter `2mdn.net` in Peter Lowe's, `s0.2mdn.net` in hpHosts 99 | @@||s0.2mdn.net/instream/*$object,script,domain=wsmv.com 100 | 101 | # https://github.com/chrisaljoudi/uBlock/issues/409 102 | @@||c.speedtest.net^$~third-party 103 | 104 | # To counter Peter Lowe's, hpHosts 105 | @@||boldchat.com^$~third-party 106 | 107 | # https://github.com/chrisaljoudi/uBlock/issues/463 108 | # To counter Peter Lowe's, hpHosts 109 | @@||mixpanel.com^$~third-party 110 | 111 | # https://github.com/chrisaljoudi/uBlock/issues/466 112 | # Until fixed in EasyPrivacy 113 | @@/google-analytics-dashboard 114 | 115 | # https://twitter.com/poroot/status/554300983472574465 116 | charlie.strim.io#@##tweets 117 | ovh.strim.io#@##tweets 118 | 119 | # https://adblockplus.org/forum/viewtopic.php?f=10&t=24896&start=15#p114675 120 | # Until fixed in EasyList 121 | @@||cdn.turner.com/ads/nba/adfuel.js$domain=www.nba.com 122 | 123 | # https://github.com/chrisaljoudi/uBlock/issues/570 124 | # To counter `yui.yahooapis.com` in hpHosts 125 | @@||yui.yahooapis.com^ 126 | 127 | # Chrome store feedback 128 | # To counter `top.mail.ru` in Peter Lowe's 129 | @@||top.mail.ru^$~third-party 130 | 131 | # https://github.com/chrisaljoudi/uBlock/issues/602 132 | # To counter `/ads.css` in EasyList 133 | @@||css.washingtonpost.com/*/ads.css$stylesheet,~third-party 134 | 135 | # https://github.com/chrisaljoudi/uBlock/issues/607 136 | # To counter `||pcekspert.com^` in Peter Lowe's 137 | @@||pcekspert.com^$~third-party 138 | 139 | # https://twitter.com/yo_0/status/559748330390323200 140 | # To counter `||clickbank.com^` in Dan Pollock's, Peter Lowe's 141 | @@||clickbank.com^$~third-party 142 | 143 | # https://github.com/chrisaljoudi/uBlock/issues/614 144 | # To counter `awaps.yandex.ru` in hpHosts 145 | # To counter `mc.yandex.ru` in hpHosts, MVPS 146 | @@||awaps.yandex.ru^$domain=market.yandex.ru 147 | @@||mc.yandex.ru^$domain=market.yandex.ru 148 | @@||yastatic.net/market-export/*/advert.js$domain=market.yandex.ru 149 | 150 | # https://github.com/chrisaljoudi/uBlock/issues/774 151 | # To counter `carbonads.net` in Peter Lowe's 152 | @@||carbonads.net^$~third-party 153 | 154 | # This enables twitch.tv to work in HTML5 155 | @@||imasdk.googleapis.com$domain=twitch.tv 156 | 157 | # Chrome store feedback: "totaljobs.com [...] with μBlock enabled, I can't click any links" 158 | # To counter `2o7.net` in Peter Lowe's 159 | @@||2o7.net^$domain=totaljobs.com 160 | 161 | # https://github.com/chrisaljoudi/uBlock/issues/841 162 | # To counter `quantcast.com` in hpHosts, Peter Lowe's 163 | @@||quantcast.com^$~third-party 164 | 165 | # Chrome store feedback: "on gaana.com it blocks the site" 166 | # To counter `_social_tracking.` in EasyPrivacy 167 | @@||css5.gaanacdn.com/minify-*/min/?$script,domain=gaana.com 168 | 169 | # https://twitter.com/Urre/status/572742363069714432 170 | # To counter `/keen.min.js` in EasyPrivacy 171 | @@||keen.github.io/*/keen.min.js$~third-party 172 | 173 | # https://github.com/gorhill/uBlock/issues/182 174 | # http://www.okgoals.com/match-highlights-1425491618---41 175 | # To counter `intergi.com` in Peter Lowe's. For example, breaks: 176 | # explosm.net 177 | # lastminutegoals.org 178 | # okgoals.com 179 | # tvgolo.com 180 | @@||cdn.phoenix.intergi.com^ 181 | 182 | # kissmetrics.com broken 183 | # To counter `kissmetrics.com` in Peter Lowe's 184 | @@||kissmetrics.com^$~third-party 185 | 186 | # Twitter: https://twitter.com/swashcap/status/578413787323056128 187 | # "Found Viewpoint’s site [...] Looks like it might be broken?" 188 | # To counter hpHosts, Peter Lowe's `viewpoint.com` 189 | @@||viewpoint.com^$~third-party 190 | 191 | # http://www.reddit.com/r/ublock/comments/32k908/cnncom_videos_wont_load_w_%C2%B5block_enabled/ 192 | @@||i.cdn.turner.com/xslo/cvp/ads/freewheel/bundles/2/*$domain=cnn.com 193 | 194 | # https://github.com/chrisaljoudi/uBlock/issues/1421 195 | # To counter hpHost, MVPS, Peter Lowe: `woopra.com` 196 | @@||woopra.com^$~third-party 197 | 198 | # http://www.wilderssecurity.com/threads/ublock-origin-and-newegg.377014/ 199 | # Until the fix is in EasyPrivacy 200 | @@||monetate.net/*$script,domain=www.newegg.com 201 | 202 | # https://github.com/gorhill/uBlock/issues/372 203 | @@||googleads.g.doubleclick.net/ads/preferences/getcookie$popup,domain=myaccount.google.com 204 | 205 | # http://www.reddit.com/r/AsianBeauty/comments/3ak15v/til_if_youre_using_ublock_origin_some_of_the/ 206 | @@||img.echosting.cafe24.com/design$image,domain=jolse.com 207 | 208 | # https://forums.lanik.us/viewtopic.php?f=64&t=23859 209 | @@||imasdk.googleapis.com/js/core$subdocument,domain=globalnews.ca 210 | 211 | # https://github.com/gorhill/uBlock/issues/780 212 | @@||www.google-analytics.com/plugins/ua/linkid.js$script,domain=support.amd.com 213 | @@||www.google-analytics.com/analytics.js$script,domain=support.amd.com 214 | @@||www.googletagmanager.com/gtm.js$script,domain=support.amd.com 215 | 216 | # https://www.reddit.com/r/uBlockOrigin/comments/3oca9w/how_to_keep_ublock_from_interfering_with_button/ 217 | @@||partner.googleadservices.com/gpt/$script,domain=www.merriam-webster.com 218 | @@||www.atpworldtour.com/assets/js/util/googleAnalytics.js$script,first-party 219 | 220 | # https://twitter.com/bociusz/status/653693746626871296 221 | # To counter Peter Lowe's `pagefair.com`, but only when on `pagefair.com` 222 | @@||pagefair.com^$first-party 223 | 224 | # Chrome store: "Some of the useful content on website such as pictures and maps on redfin get blocked" 225 | @@||ssl.cdn-redfin.com/*/redfin/common/analytics/PixelTracking.js$script,domain=redfin.com 226 | 227 | # Chrome store: "uBlock Origin seems to think Southwest.com's background and header image are ads" 228 | @@||www.southwest.com/*/mbox.js$script,first-party 229 | 230 | # https://www.reddit.com/r/ublock/comments/3qo6yj/marketwatchcom_blocks_ublock/ 231 | marketwatch.com#@##ad_DisplayAd1 232 | marketwatch.com#@##ad_DisplayAd2 233 | marketwatch.com#@#.advertisement 234 | 235 | # https://twitter.com/Jadenfire/status/663837423160451072 236 | @@||d2kkl4buashh8c.cloudfront.net/ads/ads.js$script,domain=thesimsresource.com 237 | 238 | # As reported in AMO feedback section 239 | @@||nationalreview.com/sites/all/themes/*/assets/js/chartbeat.js$script 240 | 241 | # https://github.com/gorhill/uBlock/issues/1016#issuecomment-165153899 242 | @@||liveperson.net^$first-party 243 | 244 | # https://github.com/gorhill/uBlock/issues/1082 245 | @@||widgets.outbrain.com/outbrain.js$script,domain=www.themarker.com 246 | 247 | # https://github.com/gorhill/uBlock/issues/1081 248 | @@||google-analytics.com/ga.js$domain=one.co.il 249 | 250 | # https://github.com/gorhill/uBlock/issues/1297 251 | @@||taboola.com^$first-party 252 | 253 | # https://github.com/gorhill/uBlock/issues/1296 254 | @@||odb.outbrain.com/crossdomain.xml$object-subrequest 255 | 256 | # https://twitter.com/SeanHood/status/692387495993024512 257 | # To counter `207.net` in Peter Lowe's, Disconnect‎'s Malvertising filter list 258 | @@||ticketmaster.122.2o7.net$image,domain=ticketweb.co.uk 259 | 260 | # https://forums.lanik.us/viewtopic.php?f=64&t=28590 261 | phoronix.com#@#div[style^="text-align: center;"] 262 | 263 | # https://forums.lanik.us/viewtopic.php?f=64&t=28701 264 | @@||files.explosm.net/files/$image,first-party 265 | 266 | # `amazon-adsystem.com` is blocked by Peter Lowe's. The neutered script should 267 | # help lower chances of breakage. Examples: 268 | # - http://www.food.com/recipe/oven-baked-sweet-plantains-80130 (slideshow controls broken) 269 | # - [add more] 270 | ||amazon-adsystem.com/aax2/amzn_ads.js$script,redirect=amazon-adsystem.com/aax2/amzn_ads.js 271 | 272 | # This potentially unbreaks sites broken by EasyPrivacy's `/b/ss/*&aqe=` 273 | # Confirmed for: 274 | # - http://www.surfline.com/video/ (links not working) 275 | # - https://github.com/gorhill/uBlock/issues/529 276 | # - http://www.scotrail.co.uk/check-your-journey (via https://twitter.com/andy_pee_tho/status/710508529405263872) 277 | # */b/ss/*&aqe=$image,redirect=1x1-transparent.gif 278 | 279 | # This unbreak video player: 280 | # - http://www.cnet.com/videos/big-mac-survives-molten-copper-and-heats-up-internet-ep-233/ 281 | # And possibly other sites -- `taboola.com` is blocked by Peter Lowe's. 282 | ||cdn.taboola.com/libtrc/*/loader.js$script,redirect=noopjs,important,domain=cnet.com 283 | 284 | # https://twitter.com/peikas/status/711911523837460480 285 | # To counter `/analytics.min.js` in EasyPrivacy 286 | @@||cdn.segment.com/analytics.js/*/analytics.min.js$script,domain=vyte.in 287 | 288 | # https://twitter.com/maknz/status/712731132484124672 289 | # To counter `-google-tag-manager/$script` in EasyPrivacy 290 | @@||sorted.org.nz/*/angulartics-google-tag-manager.js$script,first-party 291 | 292 | # https://twitter.com/mtarnovan/status/713088377994682368 293 | # This counters `analytics.google.com` in Peter Lowe's 294 | @@||analytics.google.com^$first-party 295 | 296 | # This unbreaks video playback on weather.com 297 | # To counter `||analytics.edgekey.net^` in EasyPrivacy. 298 | @@||analytics.edgekey.net/html5/akamaihtml5-min.js$script,domain=weather.com 299 | 300 | ! https://github.com/gorhill/uBlock/issues/1540 301 | @@||tfag.de^$script,domain=chip.de 302 | 303 | # https://github.com/uBlockOrigin/uAssets/issues/16 304 | @@||nav.files.bbci.co.uk/nav-analytics/*$script,domain=bbc.com|bbc.co.uk 305 | 306 | # https://adblockplus.org/forum/viewtopic.php?f=1&t=44930 307 | @@||data.cnn.com/jsonp/cfg/*/videoconfig/cnn/desktop/domesticsectionconfig.json$script,domain=cnn.com 308 | @@||cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/ad_policy.xml$xmlhttprequest,domain=cnn.com 309 | @@||z.cdn.turner.com/analytics/cnnexpan/jsmd.min.js$script,domain=cnn.com 310 | 311 | # https://github.com/uBlockOrigin/uAssets/issues/20 312 | @@||gamespot.com/*/trackingWithCT.js$script,first-party 313 | 314 | # http://www.slideshare.net/aidanajoyce/adblocking-blocking-more-than-ads 315 | ||ggwebcast.com/*/kaltura/ga/urchin.js$script,redirect=noopjs 316 | umbro.com#@##ad_main 317 | 318 | # https://twitter.com/komandorTarkin/status/723234211444973569 319 | @@||shop.puppetlabs.com/store/js/services/googleAnalytics.js$script,first-party 320 | 321 | ! https://github.com/uBlockOrigin/uAssets/issues/33 322 | flightradar24.com#@#.twitter-feed 323 | 324 | ! https://github.com/uBlockOrigin/uAssets/issues/34 325 | @@||premium.soundcloud.com/*/js/tracking.js$script,first-party 326 | 327 | ! https://github.com/gorhill/uBlock/issues/1608 328 | ||stats.bbc.co.uk/o.gif$object,domain=bbc.com|bbc.co.uk,redirect=1x1-transparent.gif 329 | 330 | ! https://github.com/gorhill/uBlock/issues/1626 331 | @@||boldchat.com^ 332 | 333 | ! https://github.com/uBlockOrigin/uAssets/issues/46 334 | @@||snoobi.com^$script,domain=kartta.hel.fi 335 | 336 | ! https://github.com/uBlockOrigin/uAssets/issues/47 337 | @@||esellerate.net^$first-party 338 | 339 | ! https://twitter.com/AMReese/status/733524545085181952 340 | @@||dw.cbsi.com/anonc.js$script,domain=giantbomb.com 341 | 342 | ! https://github.com/gorhill/uBlock/issues/1737 343 | @@||googletagmanager.com/gtm.js?$domain=willyweather.com.au 344 | 345 | ! https://www.reddit.com/r/portugal/comments/4qud2t/problemas_com_a_extens%C3%A3o_ublock_origin_em_sites/ 346 | @@||data.netscope.marktest.pt/netscope-gemius.js$script,domain=ojogo.pt 347 | @@||gapt.hit.gemius.pl/xlgemius.js$script,domain=ojogo.pt 348 | 349 | ! https://twitter.com/AmberLynneGirl/status/751099123411980289 350 | ! To counter `naiadsystems.com` in Peter Lowe's 351 | @@||naiadsystems.com$object,domain=streamate.com 352 | 353 | ! https://github.com/gorhill/uBlock/issues/1803 354 | @@||static.adf.ly^$image,script,stylesheet,domain=q.gs 355 | 356 | ! https://twitter.com/JoshMacFarlane/status/756573821931585536 357 | ! To counter Peter Lowe's `emarketer.com` 358 | @@||emarketer.com^$first-party 359 | 360 | ! https://github.com/uBlockOrigin/uAssets/issues/86 361 | @@||snoobi.com^$script,domain=sato.fi 362 | 363 | ! https://twitter.com/bdarfler/status/768540430988378112 364 | ! To counter Peter Lowe's `tapad.com` 365 | @@||tapad.com^$first-party 366 | 367 | ! Reported in Chrome store: "Computershopper.com [...] 'this page did not fully render due to a plugin you have installed.'" 368 | @@||sp.computershopper.com/xyz?$image,~third-party 369 | 370 | ! https://twitter.com/bdarfler/status/776058551777329152 371 | @@||localytics.com^$first-party 372 | 373 | ! https://github.com/uBlockOrigin/uAssets/issues/148 374 | @@||media2.intoday.in/aajtak/$script,domain=aajtak.intoday.in|indiatoday.intoday.in 375 | 376 | ! https://twitter.com/SathyaBhat/status/785727665508487168 377 | @@||styletagassets.com/vendor/angular-google-analytics.min.js$script,domain=styletag.com 378 | 379 | ! https://twitter.com/moritzlost/status/789155425405075456 380 | @@||app.focalmark.com/bower_components/angulartics-google-analytics/dist/angulartics-ga.min.js$script,first-party 381 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License, version 2.0 2 | 3 | 1. Definitions 4 | 5 | 1.1. "Contributor" 6 | 7 | means each individual or legal entity that creates, contributes to the 8 | creation of, or owns Covered Software. 9 | 10 | 1.2. "Contributor Version" 11 | 12 | means the combination of the Contributions of others (if any) used by a 13 | Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | 17 | means Covered Software of a particular Contributor. 18 | 19 | 1.4. "Covered Software" 20 | 21 | means Source Code Form to which the initial Contributor has attached the 22 | notice in Exhibit A, the Executable Form of such Source Code Form, and 23 | Modifications of such Source Code Form, in each case including portions 24 | thereof. 25 | 26 | 1.5. "Incompatible With Secondary Licenses" 27 | means 28 | 29 | a. that the initial Contributor has attached the notice described in 30 | Exhibit B to the Covered Software; or 31 | 32 | b. that the Covered Software was made available under the terms of 33 | version 1.1 or earlier of the License, but not also under the terms of 34 | a Secondary License. 35 | 36 | 1.6. "Executable Form" 37 | 38 | means any form of the work other than Source Code Form. 39 | 40 | 1.7. "Larger Work" 41 | 42 | means a work that combines Covered Software with other material, in a 43 | separate file or files, that is not Covered Software. 44 | 45 | 1.8. "License" 46 | 47 | means this document. 48 | 49 | 1.9. "Licensable" 50 | 51 | means having the right to grant, to the maximum extent possible, whether 52 | at the time of the initial grant or subsequently, any and all of the 53 | rights conveyed by this License. 54 | 55 | 1.10. "Modifications" 56 | 57 | means any of the following: 58 | 59 | a. any file in Source Code Form that results from an addition to, 60 | deletion from, or modification of the contents of Covered Software; or 61 | 62 | b. any new file in Source Code Form that contains any Covered Software. 63 | 64 | 1.11. "Patent Claims" of a Contributor 65 | 66 | means any patent claim(s), including without limitation, method, 67 | process, and apparatus claims, in any patent Licensable by such 68 | Contributor that would be infringed, but for the grant of the License, 69 | by the making, using, selling, offering for sale, having made, import, 70 | or transfer of either its Contributions or its Contributor Version. 71 | 72 | 1.12. "Secondary License" 73 | 74 | means either the GNU General Public License, Version 2.0, the GNU Lesser 75 | General Public License, Version 2.1, the GNU Affero General Public 76 | License, Version 3.0, or any later versions of those licenses. 77 | 78 | 1.13. "Source Code Form" 79 | 80 | means the form of the work preferred for making modifications. 81 | 82 | 1.14. "You" (or "Your") 83 | 84 | means an individual or a legal entity exercising rights under this 85 | License. For legal entities, "You" includes any entity that controls, is 86 | controlled by, or is under common control with You. For purposes of this 87 | definition, "control" means (a) the power, direct or indirect, to cause 88 | the direction or management of such entity, whether by contract or 89 | otherwise, or (b) ownership of more than fifty percent (50%) of the 90 | outstanding shares or beneficial ownership of such entity. 91 | 92 | 93 | 2. License Grants and Conditions 94 | 95 | 2.1. Grants 96 | 97 | Each Contributor hereby grants You a world-wide, royalty-free, 98 | non-exclusive license: 99 | 100 | a. under intellectual property rights (other than patent or trademark) 101 | Licensable by such Contributor to use, reproduce, make available, 102 | modify, display, perform, distribute, and otherwise exploit its 103 | Contributions, either on an unmodified basis, with Modifications, or 104 | as part of a Larger Work; and 105 | 106 | b. under Patent Claims of such Contributor to make, use, sell, offer for 107 | sale, have made, import, and otherwise transfer either its 108 | Contributions or its Contributor Version. 109 | 110 | 2.2. Effective Date 111 | 112 | The licenses granted in Section 2.1 with respect to any Contribution 113 | become effective for each Contribution on the date the Contributor first 114 | distributes such Contribution. 115 | 116 | 2.3. Limitations on Grant Scope 117 | 118 | The licenses granted in this Section 2 are the only rights granted under 119 | this License. No additional rights or licenses will be implied from the 120 | distribution or licensing of Covered Software under this License. 121 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 122 | Contributor: 123 | 124 | a. for any code that a Contributor has removed from Covered Software; or 125 | 126 | b. for infringements caused by: (i) Your and any other third party's 127 | modifications of Covered Software, or (ii) the combination of its 128 | Contributions with other software (except as part of its Contributor 129 | Version); or 130 | 131 | c. under Patent Claims infringed by Covered Software in the absence of 132 | its Contributions. 133 | 134 | This License does not grant any rights in the trademarks, service marks, 135 | or logos of any Contributor (except as may be necessary to comply with 136 | the notice requirements in Section 3.4). 137 | 138 | 2.4. Subsequent Licenses 139 | 140 | No Contributor makes additional grants as a result of Your choice to 141 | distribute the Covered Software under a subsequent version of this 142 | License (see Section 10.2) or under the terms of a Secondary License (if 143 | permitted under the terms of Section 3.3). 144 | 145 | 2.5. Representation 146 | 147 | Each Contributor represents that the Contributor believes its 148 | Contributions are its original creation(s) or it has sufficient rights to 149 | grant the rights to its Contributions conveyed by this License. 150 | 151 | 2.6. Fair Use 152 | 153 | This License is not intended to limit any rights You have under 154 | applicable copyright doctrines of fair use, fair dealing, or other 155 | equivalents. 156 | 157 | 2.7. Conditions 158 | 159 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in 160 | Section 2.1. 161 | 162 | 163 | 3. Responsibilities 164 | 165 | 3.1. Distribution of Source Form 166 | 167 | All distribution of Covered Software in Source Code Form, including any 168 | Modifications that You create or to which You contribute, must be under 169 | the terms of this License. You must inform recipients that the Source 170 | Code Form of the Covered Software is governed by the terms of this 171 | License, and how they can obtain a copy of this License. You may not 172 | attempt to alter or restrict the recipients' rights in the Source Code 173 | Form. 174 | 175 | 3.2. Distribution of Executable Form 176 | 177 | If You distribute Covered Software in Executable Form then: 178 | 179 | a. such Covered Software must also be made available in Source Code Form, 180 | as described in Section 3.1, and You must inform recipients of the 181 | Executable Form how they can obtain a copy of such Source Code Form by 182 | reasonable means in a timely manner, at a charge no more than the cost 183 | of distribution to the recipient; and 184 | 185 | b. You may distribute such Executable Form under the terms of this 186 | License, or sublicense it under different terms, provided that the 187 | license for the Executable Form does not attempt to limit or alter the 188 | recipients' rights in the Source Code Form under this License. 189 | 190 | 3.3. Distribution of a Larger Work 191 | 192 | You may create and distribute a Larger Work under terms of Your choice, 193 | provided that You also comply with the requirements of this License for 194 | the Covered Software. If the Larger Work is a combination of Covered 195 | Software with a work governed by one or more Secondary Licenses, and the 196 | Covered Software is not Incompatible With Secondary Licenses, this 197 | License permits You to additionally distribute such Covered Software 198 | under the terms of such Secondary License(s), so that the recipient of 199 | the Larger Work may, at their option, further distribute the Covered 200 | Software under the terms of either this License or such Secondary 201 | License(s). 202 | 203 | 3.4. Notices 204 | 205 | You may not remove or alter the substance of any license notices 206 | (including copyright notices, patent notices, disclaimers of warranty, or 207 | limitations of liability) contained within the Source Code Form of the 208 | Covered Software, except that You may alter any license notices to the 209 | extent required to remedy known factual inaccuracies. 210 | 211 | 3.5. Application of Additional Terms 212 | 213 | You may choose to offer, and to charge a fee for, warranty, support, 214 | indemnity or liability obligations to one or more recipients of Covered 215 | Software. However, You may do so only on Your own behalf, and not on 216 | behalf of any Contributor. You must make it absolutely clear that any 217 | such warranty, support, indemnity, or liability obligation is offered by 218 | You alone, and You hereby agree to indemnify every Contributor for any 219 | liability incurred by such Contributor as a result of warranty, support, 220 | indemnity or liability terms You offer. You may include additional 221 | disclaimers of warranty and limitations of liability specific to any 222 | jurisdiction. 223 | 224 | 4. Inability to Comply Due to Statute or Regulation 225 | 226 | If it is impossible for You to comply with any of the terms of this License 227 | with respect to some or all of the Covered Software due to statute, 228 | judicial order, or regulation then You must: (a) comply with the terms of 229 | this License to the maximum extent possible; and (b) describe the 230 | limitations and the code they affect. Such description must be placed in a 231 | text file included with all distributions of the Covered Software under 232 | this License. Except to the extent prohibited by statute or regulation, 233 | such description must be sufficiently detailed for a recipient of ordinary 234 | skill to be able to understand it. 235 | 236 | 5. Termination 237 | 238 | 5.1. The rights granted under this License will terminate automatically if You 239 | fail to comply with any of its terms. However, if You become compliant, 240 | then the rights granted under this License from a particular Contributor 241 | are reinstated (a) provisionally, unless and until such Contributor 242 | explicitly and finally terminates Your grants, and (b) on an ongoing 243 | basis, if such Contributor fails to notify You of the non-compliance by 244 | some reasonable means prior to 60 days after You have come back into 245 | compliance. Moreover, Your grants from a particular Contributor are 246 | reinstated on an ongoing basis if such Contributor notifies You of the 247 | non-compliance by some reasonable means, this is the first time You have 248 | received notice of non-compliance with this License from such 249 | Contributor, and You become compliant prior to 30 days after Your receipt 250 | of the notice. 251 | 252 | 5.2. If You initiate litigation against any entity by asserting a patent 253 | infringement claim (excluding declaratory judgment actions, 254 | counter-claims, and cross-claims) alleging that a Contributor Version 255 | directly or indirectly infringes any patent, then the rights granted to 256 | You by any and all Contributors for the Covered Software under Section 257 | 2.1 of this License shall terminate. 258 | 259 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user 260 | license agreements (excluding distributors and resellers) which have been 261 | validly granted by You or Your distributors under this License prior to 262 | termination shall survive termination. 263 | 264 | 6. Disclaimer of Warranty 265 | 266 | Covered Software is provided under this License on an "as is" basis, 267 | without warranty of any kind, either expressed, implied, or statutory, 268 | including, without limitation, warranties that the Covered Software is free 269 | of defects, merchantable, fit for a particular purpose or non-infringing. 270 | The entire risk as to the quality and performance of the Covered Software 271 | is with You. Should any Covered Software prove defective in any respect, 272 | You (not any Contributor) assume the cost of any necessary servicing, 273 | repair, or correction. This disclaimer of warranty constitutes an essential 274 | part of this License. No use of any Covered Software is authorized under 275 | this License except under this disclaimer. 276 | 277 | 7. Limitation of Liability 278 | 279 | Under no circumstances and under no legal theory, whether tort (including 280 | negligence), contract, or otherwise, shall any Contributor, or anyone who 281 | distributes Covered Software as permitted above, be liable to You for any 282 | direct, indirect, special, incidental, or consequential damages of any 283 | character including, without limitation, damages for lost profits, loss of 284 | goodwill, work stoppage, computer failure or malfunction, or any and all 285 | other commercial damages or losses, even if such party shall have been 286 | informed of the possibility of such damages. This limitation of liability 287 | shall not apply to liability for death or personal injury resulting from 288 | such party's negligence to the extent applicable law prohibits such 289 | limitation. Some jurisdictions do not allow the exclusion or limitation of 290 | incidental or consequential damages, so this exclusion and limitation may 291 | not apply to You. 292 | 293 | 8. Litigation 294 | 295 | Any litigation relating to this License may be brought only in the courts 296 | of a jurisdiction where the defendant maintains its principal place of 297 | business and such litigation shall be governed by laws of that 298 | jurisdiction, without reference to its conflict-of-law provisions. Nothing 299 | in this Section shall prevent a party's ability to bring cross-claims or 300 | counter-claims. 301 | 302 | 9. Miscellaneous 303 | 304 | This License represents the complete agreement concerning the subject 305 | matter hereof. If any provision of this License is held to be 306 | unenforceable, such provision shall be reformed only to the extent 307 | necessary to make it enforceable. Any law or regulation which provides that 308 | the language of a contract shall be construed against the drafter shall not 309 | be used to construe this License against a Contributor. 310 | 311 | 312 | 10. Versions of the License 313 | 314 | 10.1. New Versions 315 | 316 | Mozilla Foundation is the license steward. Except as provided in Section 317 | 10.3, no one other than the license steward has the right to modify or 318 | publish new versions of this License. Each version will be given a 319 | distinguishing version number. 320 | 321 | 10.2. Effect of New Versions 322 | 323 | You may distribute the Covered Software under the terms of the version 324 | of the License under which You originally received the Covered Software, 325 | or under the terms of any subsequent version published by the license 326 | steward. 327 | 328 | 10.3. Modified Versions 329 | 330 | If you create software not governed by this License, and you want to 331 | create a new license for such software, you may create and use a 332 | modified version of this License if you rename the license and remove 333 | any references to the name of the license steward (except to note that 334 | such modified license differs from this License). 335 | 336 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 337 | Licenses If You choose to distribute Source Code Form that is 338 | Incompatible With Secondary Licenses under the terms of this version of 339 | the License, the notice described in Exhibit B of this License must be 340 | attached. 341 | 342 | Exhibit A - Source Code Form License Notice 343 | 344 | This Source Code Form is subject to the 345 | terms of the Mozilla Public License, v. 346 | 2.0. If a copy of the MPL was not 347 | distributed with this file, You can 348 | obtain one at 349 | http://mozilla.org/MPL/2.0/. 350 | 351 | If it is not possible or desirable to put the notice in a particular file, 352 | then You may include the notice in a location (such as a LICENSE file in a 353 | relevant directory) where a recipient would be likely to look for such a 354 | notice. 355 | 356 | You may add additional accurate notices of copyright ownership. 357 | 358 | Exhibit B - "Incompatible With Secondary Licenses" Notice 359 | 360 | This Source Code Form is "Incompatible 361 | With Secondary Licenses", as defined by 362 | the Mozilla Public License, v. 2.0. 363 | 364 | --------------------------------------------------------------------------------