├── .gitignore ├── .travis.yml ├── adblock ├── testdata │ └── too_many_wildcards.txt ├── rules_test.go └── rules.go ├── LICENSE ├── admatch └── admatch.go ├── abpcheck └── abpcheck.go ├── README.md └── adstop ├── rulecache.go └── adstop.go /.gitignore: -------------------------------------------------------------------------------- 1 | /adblock.sh 2 | /easylist/ 3 | adstop.* 4 | /*.txt 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.7 4 | - 1.8 5 | - tip 6 | 7 | -------------------------------------------------------------------------------- /adblock/testdata/too_many_wildcards.txt: -------------------------------------------------------------------------------- 1 | |http://*/*0*0*0*0*0*0*0*0$script,xmlhttprequest,domain=generation-nt.com 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Patrick Mezard 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | -------------------------------------------------------------------------------- /admatch/admatch.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | "github.com/pmezard/adblock/adblock" 10 | ) 11 | 12 | func match() error { 13 | domain := flag.String("domain", "", "URL domain") 14 | contentType := flag.String("content-type", "", "response Content-Type") 15 | originDomain := flag.String("origin-domain", "", "parent page domain") 16 | flag.Parse() 17 | args := flag.Args() 18 | if len(args) < 2 { 19 | return fmt.Errorf("at least one rule file and an URL are expected") 20 | } 21 | files := args[:len(args)-1] 22 | url := args[len(args)-1] 23 | m, added, err := adblock.NewMatcherFromFiles(files...) 24 | if err != nil { 25 | return err 26 | } 27 | fmt.Printf("%d rules loaded\n", added) 28 | rq := &adblock.Request{ 29 | URL: url, 30 | Domain: *domain, 31 | OriginDomain: *originDomain, 32 | ContentType: *contentType, 33 | Timeout: 5 * time.Second, 34 | } 35 | start := time.Now() 36 | matched, _, err := m.Match(rq) 37 | if err != nil { 38 | return err 39 | } 40 | end := time.Now() 41 | suffix := fmt.Sprintf("in %.2fs", float64(end.Sub(start))/float64(time.Second)) 42 | if matched { 43 | fmt.Println("matched " + suffix) 44 | } else { 45 | fmt.Println("not matched " + suffix) 46 | } 47 | return nil 48 | } 49 | 50 | func main() { 51 | err := match() 52 | if err != nil { 53 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 54 | os.Exit(1) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /abpcheck/abpcheck.go: -------------------------------------------------------------------------------- 1 | /* 2 | abpcheck is a command line utility to verify AdBlockPlus rules. Running: 3 | 4 | $ abpcheck easylist.txt 5 | 6 | prints any rules which cannot be parsed by adblock package. 7 | */ 8 | package main 9 | 10 | import ( 11 | "bufio" 12 | "flag" 13 | "fmt" 14 | "os" 15 | 16 | "github.com/pmezard/adblock/adblock" 17 | ) 18 | 19 | func check() error { 20 | verbose := flag.Bool("v", false, "print rejected rules") 21 | dump := flag.Bool("dump", false, "print parsed rules") 22 | flag.Parse() 23 | args := flag.Args() 24 | if len(args) != 1 { 25 | return fmt.Errorf("one input rule file expected") 26 | } 27 | fp, err := os.Open(args[0]) 28 | if err != nil { 29 | return err 30 | } 31 | defer fp.Close() 32 | 33 | ok := true 34 | rules := adblock.NewMatcher() 35 | scanner := bufio.NewScanner(fp) 36 | for scanner.Scan() { 37 | rule, err := adblock.ParseRule(scanner.Text()) 38 | if err != nil { 39 | fmt.Fprintf(os.Stderr, "error: could not parse rule:\n %s\n %s\n", 40 | scanner.Text(), err) 41 | ok = false 42 | continue 43 | } 44 | if rule == nil { 45 | continue 46 | } 47 | err = rules.AddRule(rule, 0) 48 | if *verbose && err != nil { 49 | fmt.Fprintf(os.Stderr, "error: could not add rule:\n %s\n %s\n", 50 | scanner.Text(), err) 51 | ok = false 52 | } 53 | } 54 | if !ok { 55 | return fmt.Errorf("some rules could not be parsed") 56 | } 57 | if *dump { 58 | fmt.Printf("%s\n", rules) 59 | } 60 | return nil 61 | } 62 | 63 | func main() { 64 | err := check() 65 | if err != nil { 66 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 67 | os.Exit(1) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # adblock 2 | 3 | [![Build Status](https://travis-ci.org/pmezard/adblock.png?branch=master)](https://travis-ci.org/pmezard/adblock) 4 | [![GoDoc](https://godoc.org/github.com/pmezard/adblock?status.svg)](https://godoc.org/github.com/pmezard/adblock) 5 | 6 | AdBlockPlus parser, matcher and transparent HTTP/HTTPS proxy 7 | 8 | Package documentation can be found at: 9 | 10 | http://godoc.org/github.com/pmezard/adblock/adblock 11 | 12 | ## adstop 13 | 14 | adstop is an ad-blocking transparent HTTP/HTTPS proxy. 15 | 16 | It was designed to run on low power, low memory ARM devices and serve a couple 17 | of clients, mostly old smartphones which cannot run adblockers themselves. 18 | 19 | Before using it, you have to configure your devices and network to make it 20 | accessible as a transparent proxy. One way to achieve this is to install 21 | a VPN on the server side and redirect all HTTP/HTTPS traffic to the proxy 22 | with routing rules. Then make the client browse through the VPN. 23 | 24 | HTTPS filtering requires the proxy to intercept the device traffic and decrypt 25 | it. To allow this, you have to generate a certificate and add it to your 26 | device. 27 | 28 | ``` 29 | $ adstop -http localhost:1080 \ 30 | -https localhost:1081 \ 31 | -cache .adstop \ 32 | -max-age 24h \ 33 | -ca-cert /path/to/ca.cert \ 34 | -ca-key /path/to/ca.key \ 35 | https://easylist-downloads.adblockplus.org/easylist.txt \ 36 | some_local_list.txt 37 | ``` 38 | starts the proxy and makes it listen on HTTP on port 1080, HTTPS on port 1081, 39 | fetch and load rules from easylist and a local file, cache easylist in an 40 | .adstop/ directory and refresh it every 24 hours. 41 | 42 | ### How does it work? 43 | 44 | adstop monitors HTTP/HTTPS requests and responses and if one of these matches a 45 | filter, it returns a 404 error to the client. It does not modify response 46 | bodies. Rules without options or which options are not based on returned data 47 | are applied on requests, the others on responses. 48 | 49 | The difficult part is to apply Adblock rules. They were designed to operate in 50 | a web browser and were assumed to have access to a lot more of information than 51 | a simple web proxy has. adstop supports only a subset of available rules: 52 | - Rules without options (`"$..."` suffix) are completely supported 53 | - The following options are supported: 54 | * `domain=foo.com|bar.com|~baz.com` 55 | * `font`, `image`, `objects`, `script`, `stylesheet` are roughly approximated 56 | using Content-Type. 57 | * `thirdparty` is approximated with the Referrer header. 58 | - The following options are not-supported, and related rules are discared: 59 | * `document` 60 | * `media` 61 | * `popup` 62 | - Element hiding rules are ignored. 63 | - Other options are ignored and rules applied without them. 64 | 65 | -------------------------------------------------------------------------------- /adstop/rulecache.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "log" 7 | "net/http" 8 | "os" 9 | "path/filepath" 10 | "regexp" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/pmezard/adblock/adblock" 16 | ) 17 | 18 | type RuleSet struct { 19 | Matcher *adblock.RuleMatcher 20 | Rules []string 21 | } 22 | 23 | var ( 24 | defaultDate = time.Time{} 25 | ) 26 | 27 | // RuleCache holds a RuleSet build from a list of file paths and URLs. 28 | // Non-file resources are cached in a directory and fetched at initialization 29 | // or on Rules() access if their age exceeds maxAge. The new RuleSet is built 30 | // asynchronously and will eventually be returned by another Rules() call. 31 | // 32 | // Note that all resources are updated whenever one goes stale. Rebuilding 33 | // rules can be expensive, refreshing all rules at once is preferable to 34 | // using cached data and rebuilding more frequently. 35 | // 36 | // TODO: Asynchronous implicit updates makes it hard to cleanly manage the 37 | // lifetime of the cache. A Close() method should be provided if necessary. 38 | type RuleCache struct { 39 | dir string 40 | urls []string 41 | maxAge time.Duration 42 | deadline time.Time 43 | matcherLock sync.Mutex 44 | matcher *RuleSet 45 | updating bool 46 | cacheLock sync.Mutex 47 | } 48 | 49 | // Creates a new cache. 50 | // URLs must either be file paths or HTTP/HTTPS URLs. 51 | func NewRuleCache(dir string, urls []string, maxAge time.Duration) (*RuleCache, error) { 52 | path, err := filepath.Abs(dir) 53 | if err != nil { 54 | return nil, err 55 | } 56 | err = os.MkdirAll(path, 0755) 57 | if err != nil { 58 | return nil, err 59 | } 60 | c := &RuleCache{ 61 | dir: path, 62 | urls: append([]string{}, urls...), 63 | maxAge: maxAge, 64 | } 65 | matcher, oldest, err := c.buildAll(false) 66 | if err != nil { 67 | return nil, err 68 | } 69 | c.matcher = matcher 70 | if oldest == defaultDate { 71 | oldest = time.Now() 72 | } 73 | c.deadline = oldest.Add(c.maxAge) 74 | return c, nil 75 | } 76 | 77 | // Turns a string into something suitable for a filename 78 | func makeFilename(name string) string { 79 | re := regexp.MustCompile(`[^a-z0-9\-_]`) 80 | return re.ReplaceAllString(name, "-") 81 | } 82 | 83 | // Returns a cached resource and its modification date. 84 | func (c *RuleCache) getCached(url string) (io.ReadCloser, time.Time, error) { 85 | date := time.Time{} 86 | name := makeFilename(url) 87 | path := filepath.Join(c.dir, name) 88 | fp, err := os.Open(path) 89 | if err != nil { 90 | return nil, date, err 91 | } 92 | st, err := fp.Stat() 93 | if err != nil { 94 | fp.Close() 95 | return nil, date, err 96 | } 97 | return fp, st.ModTime(), nil 98 | } 99 | 100 | // Add a resource to the cache. 101 | func (c *RuleCache) cache(url string, r io.Reader) error { 102 | name := makeFilename(url) 103 | path := filepath.Join(c.dir, name) 104 | fp, err := os.Create(path) 105 | if err != nil { 106 | return err 107 | } 108 | _, err = io.Copy(fp, r) 109 | errClose := fp.Close() 110 | if err != nil { 111 | return err 112 | } 113 | return errClose 114 | } 115 | 116 | // Fetch and HTTP resource and cache it. 117 | func (c *RuleCache) fetchAndCache(url string) error { 118 | log.Printf("fetching %s", url) 119 | rsp, err := http.Get(url) 120 | if err != nil { 121 | return err 122 | } 123 | c.cache(url, rsp.Body) 124 | rsp.Body.Close() 125 | return nil 126 | } 127 | 128 | // Returns specified resource and last modification date. The date is set 129 | // to time.Time{} for non-cached resources. If "refresh" is false, cached 130 | // resources can be returned. They will be fetched and updated otherwise. 131 | func (c *RuleCache) load(url string, refresh bool) (io.ReadCloser, time.Time, error) { 132 | if !strings.HasPrefix(url, "http://") && 133 | !strings.HasPrefix(url, "https://") { 134 | // Assume file path 135 | fp, err := os.Open(url) 136 | return fp, time.Time{}, err 137 | } 138 | 139 | fp, date, err := c.getCached(url) 140 | if err == nil { 141 | age := time.Now().Sub(date) 142 | if !refresh && (c.maxAge <= 0 || age < c.maxAge) { 143 | return fp, date, nil 144 | } 145 | fp.Close() 146 | } 147 | 148 | err = c.fetchAndCache(url) 149 | if err != nil { 150 | log.Printf("could not fetch %s: %s", url, err) 151 | } 152 | fp, date, err = c.getCached(url) 153 | return fp, date, err 154 | } 155 | 156 | // Add the rules in supplied reader to the matcher. Returns the list of added 157 | // rules (for debugging or tracing purposes) and the total number of read rules. 158 | // Some rules could not have been parsed. 159 | func buildOne(id int, r io.Reader, matcher *adblock.RuleMatcher) ([]string, int, error) { 160 | read := 0 161 | rules := []string{} 162 | scanner := bufio.NewScanner(r) 163 | for scanner.Scan() { 164 | s := scanner.Text() 165 | rule, err := adblock.ParseRule(s) 166 | if err != nil { 167 | log.Printf("error: could not parse rule:\n %s\n %s\n", 168 | scanner.Text(), err) 169 | continue 170 | } 171 | if rule == nil { 172 | continue 173 | } 174 | err = matcher.AddRule(rule, id) 175 | id++ 176 | read++ 177 | if err == nil { 178 | rules = append(rules, s) 179 | } 180 | } 181 | return rules, read, scanner.Err() 182 | } 183 | 184 | func (c *RuleCache) buildAll(refresh bool) (*RuleSet, time.Time, error) { 185 | matcher := adblock.NewMatcher() 186 | rules := []string{} 187 | id := 0 188 | read := 0 189 | oldest := time.Time{} 190 | for _, url := range c.urls { 191 | r, date, err := c.load(url, refresh) 192 | if err != nil { 193 | return nil, oldest, err 194 | } 195 | if oldest.After(date) { 196 | oldest = date 197 | } 198 | log.Printf("building rules from %s", url) 199 | built, n, err := buildOne(id, r, matcher) 200 | r.Close() 201 | if err != nil { 202 | return nil, oldest, err 203 | } 204 | rules = append(rules, built...) 205 | read += n 206 | id += len(built) 207 | } 208 | log.Printf("blacklists built: %d / %d added\n", len(rules), read) 209 | return &RuleSet{ 210 | Rules: rules, 211 | Matcher: matcher, 212 | }, oldest, nil 213 | } 214 | 215 | func (c *RuleCache) update() error { 216 | c.cacheLock.Lock() 217 | matcher, _, err := c.buildAll(true) 218 | c.cacheLock.Unlock() 219 | 220 | c.matcherLock.Lock() 221 | if err != nil { 222 | c.matcher = matcher 223 | } 224 | c.deadline = time.Now().Add(c.maxAge) 225 | c.matcherLock.Unlock() 226 | return nil 227 | } 228 | 229 | // Returns the current RuleSet. If one resource appears to be stale, an 230 | // update is performed asynchronously. 231 | func (c *RuleCache) Rules() *RuleSet { 232 | c.matcherLock.Lock() 233 | defer c.matcherLock.Unlock() 234 | if !c.updating { 235 | now := time.Now() 236 | if now.After(c.deadline) { 237 | c.updating = true 238 | go func() { 239 | log.Printf("updating") 240 | err := c.update() 241 | if err != nil { 242 | log.Printf("update error: %s\n", err) 243 | } else { 244 | log.Printf("update succeeded, next one: %s\n", 245 | c.deadline.Format(time.RFC822)) 246 | } 247 | c.matcherLock.Lock() 248 | c.updating = false 249 | c.matcherLock.Unlock() 250 | }() 251 | } 252 | } 253 | return c.matcher 254 | } 255 | -------------------------------------------------------------------------------- /adblock/rules_test.go: -------------------------------------------------------------------------------- 1 | package adblock 2 | 3 | import ( 4 | "bytes" 5 | "net/url" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | type TestInput struct { 11 | URL string 12 | Matched bool 13 | ContentType string 14 | OriginDomain string 15 | } 16 | 17 | func testInputs(t *testing.T, rules string, tests []TestInput) { 18 | parsed, err := ParseRules(bytes.NewBufferString(rules)) 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | m := NewMatcher() 23 | for _, rule := range parsed { 24 | err = m.AddRule(rule, 0) 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | } 29 | for _, test := range tests { 30 | rq := Request{ 31 | URL: test.URL, 32 | ContentType: test.ContentType, 33 | OriginDomain: test.OriginDomain, 34 | } 35 | if u, err := url.Parse(test.URL); err == nil { 36 | rq.Domain = u.Host 37 | } 38 | matched, _, err := m.Match(&rq) 39 | if err != nil { 40 | t.Errorf("unexpected match error: %s", err) 41 | } else if matched && !test.Matched { 42 | t.Errorf("unexpected match: '%+v'", test) 43 | } else if !matched && test.Matched { 44 | t.Errorf("unexpected miss: '%+v'", test) 45 | } 46 | } 47 | } 48 | 49 | func TestEmptyMatcher(t *testing.T) { 50 | testInputs(t, ` 51 | 52 | `, 53 | []TestInput{ 54 | {URL: "", Matched: false}, 55 | {URL: "foo", Matched: false}, 56 | }) 57 | } 58 | 59 | func TestExactMatch(t *testing.T) { 60 | testInputs(t, ` 61 | _ads_text. 62 | `, 63 | []TestInput{ 64 | {URL: "", Matched: false}, 65 | {URL: "foo", Matched: false}, 66 | {URL: "stuff=1&_ads_text.", Matched: true}, 67 | {URL: "stuff=1&_ads_text.field=bar", Matched: true}, 68 | }) 69 | } 70 | 71 | func TestWildcard(t *testing.T) { 72 | testInputs(t, ` 73 | a*b 74 | ad 75 | `, 76 | []TestInput{ 77 | {URL: "", Matched: false}, 78 | {URL: "foo", Matched: false}, 79 | {URL: "a", Matched: false}, 80 | {URL: "ab", Matched: true}, 81 | {URL: "acb", Matched: true}, 82 | {URL: "cacb", Matched: true}, 83 | {URL: "cacbc", Matched: true}, 84 | {URL: "ad", Matched: true}, 85 | }) 86 | } 87 | 88 | func TestSeparator(t *testing.T) { 89 | testInputs(t, ` 90 | a^ 91 | ^d 92 | `, 93 | []TestInput{ 94 | {URL: "", Matched: false}, 95 | {URL: "a", Matched: true}, 96 | {URL: "ab", Matched: false}, 97 | {URL: "a:b", Matched: true}, 98 | {URL: "d", Matched: false}, 99 | {URL: "e:d", Matched: true}, 100 | }) 101 | } 102 | 103 | func TestStartAnchor(t *testing.T) { 104 | testInputs(t, ` 105 | |a 106 | b| 107 | |c| 108 | `, 109 | []TestInput{ 110 | {URL: "a", Matched: true}, 111 | {URL: "za", Matched: false}, 112 | {URL: "az", Matched: true}, 113 | {URL: "b", Matched: true}, 114 | {URL: "zb", Matched: true}, 115 | {URL: "bz", Matched: false}, 116 | {URL: "c", Matched: true}, 117 | {URL: "zc", Matched: false}, 118 | {URL: "cz", Matched: false}, 119 | }) 120 | } 121 | 122 | func TestDomainAnchor(t *testing.T) { 123 | testInputs(t, ` 124 | ||ads.example.com 125 | ||foo.com/baz.gif 126 | `, 127 | []TestInput{ 128 | {URL: "http://ads.example.com/foo.gif", Matched: true}, 129 | {URL: "http://server1.ads.example.com/foo.gif", Matched: true}, 130 | {URL: "https://ads.example.com:8000/foo.gif", Matched: true}, 131 | {URL: "http://ads.example.com.ua/foo.gif", Matched: false}, 132 | {URL: "http://example.com/redirect/http://ads.example.com/", Matched: false}, 133 | {URL: "https://ads.foo.com/baz.gif", Matched: true}, 134 | {URL: "https://ads.foo.com/baz.png", Matched: false}, 135 | }) 136 | } 137 | 138 | func TestOptsDomain(t *testing.T) { 139 | testInputs(t, ` 140 | /ads$domain=foo.com|~info.foo.com 141 | ||bar.com^$domain=bar.com 142 | ||bar.com^$domain=baz.com 143 | `, 144 | []TestInput{ 145 | {URL: "http://foo.com/ads", Matched: true, OriginDomain: "foo.com"}, 146 | {URL: "http://foo.com/ads", Matched: false}, 147 | {URL: "http://other.foo.com/ads", Matched: true, OriginDomain: "other.foo.com"}, 148 | {URL: "http://info.foo.com/ads", Matched: false, OriginDomain: "info.foor.com"}, 149 | {URL: "http://foo.com/img", Matched: false, OriginDomain: "foo.com"}, 150 | {URL: "http://other.com/ads", Matched: false}, 151 | {URL: "http://bar.com/script", Matched: true, OriginDomain: "bar.com"}, 152 | {URL: "http://bar.com/script", Matched: true, OriginDomain: "baz.com"}, 153 | {URL: "http://bar.com/script", Matched: false, OriginDomain: "foo.com"}, 154 | }) 155 | } 156 | 157 | func TestOptsContent(t *testing.T) { 158 | testInputs(t, ` 159 | /img$image 160 | /notimg$~image 161 | /webfont$font 162 | `, 163 | []TestInput{ 164 | {URL: "http://foo.com/img", Matched: false}, 165 | {URL: "http://foo.com/img", Matched: true, ContentType: "image/png"}, 166 | {URL: "http://foo.com/img", Matched: false, ContentType: "text/plain"}, 167 | {URL: "http://foo.com/notimg", Matched: false}, 168 | {URL: "http://foo.com/notimg", Matched: false, ContentType: "image/png"}, 169 | {URL: "http://foo.com/notimg", Matched: true, ContentType: "text/plain"}, 170 | {URL: "http://foo.com/webfont", Matched: true, ContentType: "font/opentype"}, 171 | {URL: "http://foo.com/webfont", Matched: false, ContentType: "image/png"}, 172 | }) 173 | } 174 | 175 | func TestOptsThirdParty(t *testing.T) { 176 | testInputs(t, ` 177 | /img$third-party 178 | `, 179 | []TestInput{ 180 | {URL: "http://foo.com/img", Matched: true}, 181 | {URL: "http://foo.com/img", Matched: true, OriginDomain: "bar.com"}, 182 | {URL: "http://foo.com/img", Matched: false, OriginDomain: "foo.com"}, 183 | {URL: "http://foo.com/img", Matched: true, OriginDomain: "sub.foo.com"}, 184 | {URL: "http://sub.foo.com/img", Matched: false, OriginDomain: "foo.com"}, 185 | }) 186 | } 187 | 188 | func TestGenericBlock(t *testing.T) { 189 | testInputs(t, ` 190 | ?match$domain=foo.biz 191 | /ads 192 | /ads1$domain=foo.com 193 | /ads2$domain=bar.com 194 | ||foo.org^ 195 | ||bar.org^ 196 | @@||foo.com^$genericblock 197 | @@||foo.org^$genericblock 198 | @@||foo.biz^$genericblock 199 | @@/reject 200 | `, 201 | []TestInput{ 202 | // Generic match 203 | {URL: "http://foo.com/ads", Matched: false, OriginDomain: "foo.com"}, 204 | {URL: "http://bar.com/ads", Matched: true, OriginDomain: "bar.com"}, 205 | // Domain specific match 206 | {URL: "http://foo.com/ads1", Matched: true, OriginDomain: "foo.com"}, 207 | {URL: "http://bar.com/ads2", Matched: true, OriginDomain: "bar.com"}, 208 | {URL: "http://foo.org/ads3", Matched: true, OriginDomain: "foo.org"}, 209 | {URL: "http://bar.org/ads3", Matched: true, OriginDomain: "bar.org"}, 210 | // Exclude rules ignore genericblock bit 211 | {URL: "http://foo.biz/reject?match", Matched: false, OriginDomain: "foo.biz"}, 212 | }) 213 | } 214 | 215 | func testInvalidRules(t *testing.T, rules string) { 216 | parsed, err := ParseRules(bytes.NewBufferString(rules)) 217 | if err != nil { 218 | t.Fatal(err) 219 | } 220 | m := NewMatcher() 221 | for _, rule := range parsed { 222 | err = m.AddRule(rule, 0) 223 | if err != nil { 224 | return 225 | } 226 | } 227 | t.Fatalf("unexpected valid rules: %s", rules) 228 | } 229 | 230 | func TestInvalidRules(t *testing.T) { 231 | // $genericblock applies only on exclude rules 232 | testInvalidRules(t, "||foo.biz^$genericblock") 233 | } 234 | 235 | func TestInterruptedMatching(t *testing.T) { 236 | m, added, err := NewMatcherFromFiles( 237 | "testdata/too_many_wildcards.txt", 238 | ) 239 | if err != nil { 240 | t.Fatal(err) 241 | } 242 | if added == 0 { 243 | t.Fatalf("not enough rules loaded: %d", added) 244 | } 245 | rq := Request{ 246 | URL: "http://www.ultimedia.com/api/widget/smart?j=new&t=1444644802198&otherplayer=0&exclude=&meta_description=Le%20Monde.fr%20version%20mobile%20-%20L%E2%80%99attentat%20de%20samedi%20dans%20la%20capitale%20turque%2C%20qui%20a%20fait%20au%20moins%2097%20morts%2C%20met%20au%20jour%20le%20jeu%20dangereux%20du%20pouvoir%2C%20%C3%A0%20trois%20semaines%20des%20l%C3%A9gislatives.&meta_ogtitle=Apr%C3%A8s%20l%E2%80%99attentat%20d%E2%80%99Ankara%2C%20la%20Turquie%20au%20bord%20du%20gouffre&meta_ogdescription=Le%20Monde.fr%20version%20mobile%20-%20L%E2%80%99attentat%20de%20samedi%20dans%20la%20capitale%20turque%2C%20qui%20a%20fait%20au%20moins%2097%20morts%2C%20met%20au%20jour%20le%20jeu%20dangereux%20du%20pouvoir%2C%20%C3%A0%20trois%20semaines%20des%20l%C3%A9gislatives.&meta_title=Apr%C3%A8s%20l%E2%80%99attentat%20d%E2%80%99Ankara%2C%20la%20Turquie%20au%20bord%20du%20gouffre&meta_h1=Apr%C3%A8s%20l%E2%80%99attentat%20d%E2%80%99Ankara%2C%20la%20Turquie%20au%20bord%20du%20gouffre&meta_h2=Depuis%20que%20les%20%C3%A9lecteurs%20turcs%20ont%20refus%C3%A9%20de%20%3Ca%20target%3D%22_blank%22%20onclick%3D%22return%20false%3B%22%20class%3D%22lien_interne%20conjug%22%20href%3D%22http%3A%2F%2Fconjugaison.lemonde.fr%2Fconjugaison%2Fpremier-groupe%2Fdonner%2F%22%20title%3D%22Conjugaison%20du%20verbe%20donner%22%3Edonner%3C%2Fa%3E%2C%20le%207%26nbsp%3Bjuin%2C%20la%20&meta_datepublished=2015-10-12T10%3A34%3A43%2B02%3A00&date=20151012&url=http%3A%2F%2Fmobile.lemonde.fr%2Feurope%2Farticle%2F2015%2F10%2F12%2Fapres-l-attentat-d-ankara-la-turquie-au-bord-du-gouffre_4787525_3214.html&mdtk=01194867&layout=&target=ultimedia_wrapper", 247 | Domain: "www.ultimedia.com", 248 | ContentType: "application/javascript", 249 | OriginDomain: "mobile.lemonde.fr", 250 | Timeout: 200 * time.Millisecond, 251 | } 252 | ok, _, err := m.Match(&rq) 253 | if ok || err == nil { 254 | t.Fatalf("matcher successfully applied horrible rule, please change the test") 255 | } 256 | } 257 | 258 | func BenchmarkSlowMatching(b *testing.B) { 259 | m, added, err := NewMatcherFromFiles("testdata/easylist-20141019.txt") 260 | if err != nil { 261 | b.Fatal(err) 262 | } 263 | if added < 14000 { 264 | b.Fatalf("not enough rules loaded: %d", added) 265 | } 266 | rq := Request{ 267 | URL: "http://www.facebook.com/plugins/like.php?action=recommend&app_id=172278489578477&channel=http%3A%2F%2Fstatic.ak.facebook.com%2Fconnect%2Fxd_arbiter%2Fw9JKbyW340G.js%3Fversion%3D41%23cb%3Df1980a49b4%26domain%3Dtheappendix.net%26origin%3Dhttp%253A%252F%252Ftheappendix.net%252Ff81d34bec%26relation%3Dparent.parent&font=verdana&href=http%3A%2F%2Ftheappendix.net%2Fblog%2F2013%2F7%2Fwhy-does-s-look-like-f-a-guide-to-reading-very-old-books&layout=button_count&locale=en_US&sdk=joey&send=false&show_faces=false&width=90", 268 | Domain: "www.facebook.com", 269 | } 270 | b.ResetTimer() 271 | 272 | for i := 0; i < b.N; i++ { 273 | m.Match(&rq) 274 | } 275 | } 276 | 277 | func TestParseRule(t *testing.T) { 278 | // Mostly test we can parse rules with unused features 279 | rules := []string{ 280 | "||bing.com/fd/ls/$~ping", 281 | "||bing.com/fd/ls/$websocket", 282 | } 283 | for _, rule := range rules { 284 | _, err := ParseRule(rule) 285 | if err != nil { 286 | t.Fatalf("failed to parse rule: %s: %s", rule, err) 287 | } 288 | } 289 | } 290 | -------------------------------------------------------------------------------- /adstop/adstop.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "crypto/tls" 7 | "flag" 8 | "fmt" 9 | "io/ioutil" 10 | "log" 11 | "mime" 12 | "net" 13 | "net/http" 14 | _ "net/http/pprof" 15 | "net/url" 16 | "strings" 17 | "sync" 18 | "sync/atomic" 19 | "time" 20 | 21 | "github.com/elazarl/goproxy" 22 | "github.com/inconshreveable/go-vhost" 23 | "github.com/pmezard/adblock/adblock" 24 | ) 25 | 26 | var ( 27 | timeoutStr = flag.String("timeout", "5m", "HTTP/TCP connections global timeout") 28 | matchTimeoutStr = flag.String("match-timeout", "1s", "request matching timeout") 29 | httpAddr = flag.String("http", "localhost:1080", "HTTP handler address") 30 | httpsAddr = flag.String("https", "localhost:1081", "HTTPS handler address") 31 | httpDebug = flag.String("debug-addr", "", "HTTP debug address") 32 | logRequests = flag.Uint64("log", 0, "enable logging") 33 | cacheDir = flag.String("cache", ".cache", "cache directory") 34 | maxAgeArg = flag.String("max-age", "24h", "cached entries max age") 35 | caCert = flag.String("ca-cert", "", "path to CA certificate") 36 | caKey = flag.String("ca-key", "", "path to CA key") 37 | ) 38 | 39 | func logRequest(r *http.Request) { 40 | buf := &bytes.Buffer{} 41 | fmt.Fprintf(buf, "REQ\n\n") 44 | log.Println(buf.String()) 45 | } 46 | 47 | func logResponse(r *http.Response) { 48 | buf := &bytes.Buffer{} 49 | fmt.Fprintf(buf, "RSP\n\n") 52 | log.Println(buf.String()) 53 | } 54 | 55 | func getReferrerDomain(r *http.Request) string { 56 | ref := r.Header.Get("Referer") 57 | if len(ref) > 0 { 58 | u, err := url.Parse(ref) 59 | if err == nil { 60 | return u.Host 61 | } 62 | } 63 | return "" 64 | } 65 | 66 | type ProxyState struct { 67 | Duration time.Duration 68 | URL string 69 | } 70 | 71 | type FilteringHandler struct { 72 | Cache *RuleCache 73 | MatchTimeout time.Duration 74 | } 75 | 76 | func (h *FilteringHandler) OnRequest(r *http.Request, ctx *goproxy.ProxyCtx) ( 77 | *http.Request, *http.Response) { 78 | 79 | host := r.URL.Host 80 | if host == "" { 81 | host = r.Host 82 | } 83 | rq := &adblock.Request{ 84 | URL: r.URL.String(), 85 | Domain: host, 86 | OriginDomain: getReferrerDomain(r), 87 | Timeout: h.MatchTimeout, 88 | } 89 | rules := h.Cache.Rules() 90 | start := time.Now() 91 | matched, id, err := rules.Matcher.Match(rq) 92 | if err != nil { 93 | log.Printf("error: matching %s with domain=%s, origin=%, failed: %s", 94 | rq.URL, rq.Domain, rq.OriginDomain, err) 95 | } 96 | end := time.Now() 97 | duration := end.Sub(start) / time.Millisecond 98 | if matched { 99 | rule := rules.Rules[id] 100 | log.Printf("rejected in %dms: %s\n", duration, r.URL.String()) 101 | log.Printf(" by %s\n", rule) 102 | return r, goproxy.NewResponse(r, goproxy.ContentTypeText, 103 | http.StatusNotFound, "Not Found") 104 | } 105 | ctx.UserData = &ProxyState{ 106 | Duration: duration, 107 | URL: r.URL.String(), 108 | } 109 | return r, nil 110 | } 111 | 112 | func (h *FilteringHandler) OnResponse(r *http.Response, 113 | ctx *goproxy.ProxyCtx) *http.Response { 114 | 115 | if r == nil { 116 | // Happens if RoundTrip fails 117 | return r 118 | } 119 | 120 | state, ok := ctx.UserData.(*ProxyState) 121 | if !ok { 122 | // The request was rejected by the previous handler 123 | return r 124 | } 125 | 126 | duration2 := time.Duration(0) 127 | mediaType, _, err := mime.ParseMediaType(r.Header.Get("Content-Type")) 128 | if err == nil && len(mediaType) > 0 { 129 | host := ctx.Req.URL.Host 130 | if host == "" { 131 | host = ctx.Req.Host 132 | } 133 | rq := &adblock.Request{ 134 | URL: ctx.Req.URL.String(), 135 | Domain: host, 136 | OriginDomain: getReferrerDomain(ctx.Req), 137 | ContentType: mediaType, 138 | Timeout: h.MatchTimeout, 139 | } 140 | // Second level filtering, based on returned content 141 | rules := h.Cache.Rules() 142 | start := time.Now() 143 | matched, id, err := rules.Matcher.Match(rq) 144 | if err != nil { 145 | log.Printf("error: matching %s with domain=%s, origin=%, content-type: %s, "+ 146 | "failed: %s", rq.URL, rq.Domain, rq.OriginDomain, rq.ContentType, err) 147 | } 148 | end := time.Now() 149 | duration2 = end.Sub(start) / time.Millisecond 150 | if matched { 151 | r.Body.Close() 152 | rule := rules.Rules[id] 153 | log.Printf("rejected in %d/%dms: %s\n", state.Duration, duration2, 154 | state.URL) 155 | log.Printf(" by %s\n", rule) 156 | return goproxy.NewResponse(ctx.Req, goproxy.ContentTypeText, 157 | http.StatusNotFound, "Not Found") 158 | } 159 | } 160 | 161 | if atomic.LoadUint64(logRequests)%2 == 1 { 162 | logRequest(ctx.Req) 163 | logResponse(r) 164 | } 165 | log.Printf("accepted in %d/%dms: %s\n", state.Duration, duration2, state.URL) 166 | return r 167 | } 168 | 169 | // CachedConfig holds a TLS configuration. It can be in different states: 170 | // - The config is being generated, Config is nil and the Ready channel is set 171 | // - The config is ready, Config is not nil and Ready is closed. 172 | // This mechanism is used to pool concurrent generations of the same certificate. 173 | type CachedConfig struct { 174 | Config *tls.Config 175 | Ready chan struct{} 176 | } 177 | 178 | // TLSConfigCache is a goroutine-safe cache of TLS configurations mapped to hosts. 179 | type TLSConfigCache struct { 180 | cfgBuilder func(string, *goproxy.ProxyCtx) (*tls.Config, error) 181 | lock sync.Mutex 182 | cache map[string]CachedConfig 183 | hit int 184 | miss int 185 | } 186 | 187 | func NewTLSConfigCache(ca *tls.Certificate) *TLSConfigCache { 188 | return &TLSConfigCache{ 189 | cfgBuilder: goproxy.TLSConfigFromCA(ca), 190 | cache: map[string]CachedConfig{}, 191 | } 192 | } 193 | 194 | func getWildcardHost(host string) string { 195 | first := strings.Index(host, ".") 196 | if first <= 0 { 197 | return host 198 | } 199 | last := strings.LastIndex(host, ".") 200 | if last == first { 201 | // root domain, no wildcard 202 | return host 203 | } 204 | return "*" + host[first:] 205 | } 206 | 207 | func (c *TLSConfigCache) GetConfig(host string, ctx *goproxy.ProxyCtx) (*tls.Config, error) { 208 | host = getWildcardHost(host) 209 | c.lock.Lock() 210 | cached, ok := c.cache[host] 211 | if !ok { 212 | // Register a config generation event 213 | cached = CachedConfig{ 214 | Ready: make(chan struct{}), 215 | } 216 | c.cache[host] = cached 217 | } 218 | if ok { 219 | c.hit += 1 220 | } else { 221 | c.miss += 1 222 | } 223 | hit := c.hit 224 | miss := c.miss 225 | c.lock.Unlock() 226 | 227 | ctx.Warnf("signing hit/miss: %d/%d (%.1f%%)", hit, miss, 228 | 100.0*float64(hit)/float64(hit+miss)) 229 | if ok { 230 | // config is being generated or is ready, grab it 231 | <-cached.Ready 232 | cfg := cached.Config 233 | if cfg == nil { 234 | return nil, fmt.Errorf("failed to generate TLS config for %s", host) 235 | } 236 | return cfg, nil 237 | } 238 | 239 | // Generate it 240 | start := time.Now() 241 | cfg, err := c.cfgBuilder(host, ctx) 242 | stop := time.Now() 243 | ctx.Warnf("signing %s in %.0fms", host, 244 | float64(stop.Sub(start))/float64(time.Millisecond)) 245 | 246 | c.lock.Lock() 247 | if err == nil { 248 | c.cache[host] = CachedConfig{ 249 | Config: cfg, 250 | Ready: cached.Ready, 251 | } 252 | } else { 253 | delete(c.cache, host) 254 | ctx.Warnf("failed to sign %s: %s", host, err) 255 | } 256 | close(cached.Ready) 257 | c.lock.Unlock() 258 | return cfg, err 259 | } 260 | 261 | // copied/converted from https.go 262 | type dumbResponseWriter struct { 263 | net.Conn 264 | } 265 | 266 | func (dumb dumbResponseWriter) Header() http.Header { 267 | panic("Header() should not be called on this ResponseWriter") 268 | } 269 | 270 | func (dumb dumbResponseWriter) Write(buf []byte) (int, error) { 271 | if bytes.Equal(buf, []byte("HTTP/1.0 200 OK\r\n\r\n")) { 272 | // throw away the HTTP OK response from the faux CONNECT request 273 | return len(buf), nil 274 | } 275 | return dumb.Conn.Write(buf) 276 | } 277 | 278 | func (dumb dumbResponseWriter) WriteHeader(code int) { 279 | panic("WriteHeader() should not be called on this ResponseWriter") 280 | } 281 | 282 | func (dumb dumbResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { 283 | return dumb, bufio.NewReadWriter(bufio.NewReader(dumb), bufio.NewWriter(dumb)), nil 284 | } 285 | 286 | func makeCertificate(certPath, keyPath string) (*tls.Certificate, error) { 287 | cert, err := ioutil.ReadFile(certPath) 288 | if err != nil { 289 | return nil, fmt.Errorf("cannot load CA certificate: %s", err) 290 | } 291 | key, err := ioutil.ReadFile(keyPath) 292 | if err != nil { 293 | return nil, fmt.Errorf("cannot load CA key: %s", err) 294 | } 295 | ca, err := tls.X509KeyPair(cert, key) 296 | return &ca, err 297 | } 298 | 299 | func runDebugServer(addr string) error { 300 | http.HandleFunc("/trace", func(w http.ResponseWriter, r *http.Request) { 301 | res := atomic.AddUint64(logRequests, 1) 302 | action := "logging requests" 303 | if res%2 == 0 { 304 | action = "ignoring requests" 305 | } 306 | fmt.Fprintf(w, "%s\n", action) 307 | }) 308 | return http.ListenAndServe(addr, nil) 309 | } 310 | 311 | func listenTransparentTLS(proxy *goproxy.ProxyHttpServer, addr string, 312 | timeout time.Duration) error { 313 | 314 | // listen to the TLS ClientHello but make it a CONNECT request instead 315 | ln, err := net.Listen("tcp", addr) 316 | if err != nil { 317 | return err 318 | } 319 | for { 320 | c, err := ln.Accept() 321 | if err != nil { 322 | log.Printf("error accepting new connection - %v", err) 323 | continue 324 | } 325 | go func(c net.Conn) { 326 | c.SetDeadline(time.Now().Add(timeout)) 327 | tlsConn, err := vhost.TLS(c) 328 | if err != nil { 329 | log.Printf("error accepting new connection - %v", err) 330 | } 331 | if tlsConn.Host() == "" { 332 | log.Printf("cannot support non-SNI enabled clients") 333 | return 334 | } 335 | connectReq := &http.Request{ 336 | Method: "CONNECT", 337 | URL: &url.URL{ 338 | Opaque: tlsConn.Host(), 339 | Host: net.JoinHostPort(tlsConn.Host(), "443"), 340 | }, 341 | Host: tlsConn.Host(), 342 | Header: make(http.Header), 343 | } 344 | resp := dumbResponseWriter{tlsConn} 345 | proxy.ServeHTTP(resp, connectReq) 346 | }(c) 347 | } 348 | } 349 | 350 | func runProxy() error { 351 | flag.Parse() 352 | timeout, err := time.ParseDuration(*timeoutStr) 353 | if err != nil { 354 | return fmt.Errorf("could not parse timeout %s: %s", *timeoutStr, err) 355 | } 356 | matchTimeout, err := time.ParseDuration(*matchTimeoutStr) 357 | if err != nil { 358 | return fmt.Errorf("could not parse matching timeout %s: %s", *matchTimeoutStr, err) 359 | } 360 | if *caCert == "" || *caKey == "" { 361 | return fmt.Errorf("CA certificate and key must be specified") 362 | } 363 | ca, err := makeCertificate(*caCert, *caKey) 364 | if err != nil { 365 | return err 366 | } 367 | 368 | maxAge, err := time.ParseDuration(*maxAgeArg) 369 | if err != nil { 370 | return fmt.Errorf("invalid max-age: %s", err) 371 | } 372 | if maxAge < 0 { 373 | return fmt.Errorf("invalid negative max-age") 374 | } 375 | log.Printf("loading rules") 376 | cache, err := NewRuleCache(*cacheDir, flag.Args(), maxAge) 377 | if err != nil { 378 | return err 379 | } 380 | h := &FilteringHandler{ 381 | Cache: cache, 382 | MatchTimeout: matchTimeout, 383 | } 384 | 385 | if *httpDebug != "" { 386 | log.Printf("starting debug server on %s", *httpDebug) 387 | go func() { 388 | log.Println(runDebugServer(*httpDebug)) 389 | }() 390 | } 391 | 392 | log.Printf("starting servers") 393 | proxy := goproxy.NewProxyHttpServer() 394 | proxy.NonproxyHandler = http.HandlerFunc( 395 | func(w http.ResponseWriter, req *http.Request) { 396 | if req.Host == "" { 397 | log.Printf("Cannot handle requests without Host header, e.g., HTTP 1.0") 398 | return 399 | } 400 | req.URL.Scheme = "http" 401 | req.URL.Host = req.Host 402 | proxy.ServeHTTP(w, req) 403 | }) 404 | 405 | // Cache MITM certificates 406 | tlsCache := NewTLSConfigCache(ca) 407 | MitmConnect := &goproxy.ConnectAction{ 408 | Action: goproxy.ConnectMitm, 409 | TLSConfig: func(host string, ctx *goproxy.ProxyCtx) (*tls.Config, error) { 410 | return tlsCache.GetConfig(host, ctx) 411 | }, 412 | } 413 | var AlwaysMitm goproxy.FuncHttpsHandler = func(host string, ctx *goproxy.ProxyCtx) ( 414 | *goproxy.ConnectAction, string) { 415 | 416 | return MitmConnect, host 417 | } 418 | proxy.OnRequest().HandleConnect(AlwaysMitm) 419 | 420 | proxy.OnRequest().DoFunc(h.OnRequest) 421 | proxy.OnResponse().DoFunc(h.OnResponse) 422 | 423 | done := make(chan error) 424 | go func() { 425 | server := http.Server{ 426 | Addr: *httpAddr, 427 | Handler: proxy, 428 | ReadTimeout: timeout, 429 | WriteTimeout: timeout, 430 | } 431 | done <- server.ListenAndServe() 432 | }() 433 | 434 | go func() { 435 | done <- listenTransparentTLS(proxy, *httpsAddr, timeout) 436 | }() 437 | 438 | return <-done 439 | } 440 | 441 | func main() { 442 | err := runProxy() 443 | if err != nil { 444 | log.Fatalf("error: %s\n", err) 445 | } 446 | } 447 | -------------------------------------------------------------------------------- /adblock/rules.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package implements a parser and a matcher for AdBlockPlus rules. 3 | 4 | The syntax of AdBlockPlus rules is partially defined in 5 | https://adblockplus.org/en/filter-cheatsheet and 6 | https://adblockplus.org/en/filters. 7 | 8 | To parse rules and build a matcher: 9 | 10 | matcher := adblock.NewMatcher() 11 | fp, err := os.Open("easylist.txt") 12 | ... 13 | rules, err := adblock.ParseRules(fp) 14 | for _, rule := range rules { 15 | err = matcher.AddRule(rule, 0) 16 | ... 17 | } 18 | 19 | To match HTTP requests: 20 | 21 | host := r.URL.Host 22 | if host == "" { 23 | host = r.Host 24 | } 25 | rq := adblock.Request{ 26 | URL: r.URL.String(), 27 | Domain: host, 28 | // possibly fill OriginDomain from Referrer header 29 | // and ContentType from HTTP response Content-Type. 30 | Timeout: 200 * time.Millisecond, 31 | } 32 | matched, id, err := matcher.Match(rq) 33 | if err != nil { 34 | ... 35 | } 36 | if matched { 37 | // Use the rule identifier to print which rules was matched 38 | } 39 | */ 40 | package adblock 41 | 42 | import ( 43 | "bufio" 44 | "bytes" 45 | "fmt" 46 | "io" 47 | "os" 48 | "regexp" 49 | "strings" 50 | "time" 51 | ) 52 | 53 | const ( 54 | Exact = iota // string to match 55 | Wildcard = iota // * 56 | Separator = iota // ^ 57 | StartAnchor = iota // | 58 | DomainAnchor = iota // || 59 | 60 | Root = iota 61 | Substring = iota // Wildcard + Exact 62 | ) 63 | 64 | func getPartName(ruleType int) string { 65 | switch ruleType { 66 | case Exact: 67 | return "exact" 68 | case Wildcard: 69 | return "wildcard" 70 | case Separator: 71 | return "separator" 72 | case StartAnchor: 73 | return "startanchor" 74 | case DomainAnchor: 75 | return "domainanchor" 76 | case Root: 77 | return "root" 78 | case Substring: 79 | return "substring" 80 | default: 81 | return "unknown" 82 | } 83 | } 84 | 85 | // RulePart is the base component of rules. It represents a single 86 | // matching element, like an exact match, a wildcard, a domain anchor... 87 | type RulePart struct { 88 | // Rule type, like Exact, Wildcard, etc. 89 | Type int 90 | // Rule part string representation 91 | Value string 92 | } 93 | 94 | // RuleOpts defines custom rules applied to content once the URL part 95 | // has been matched by the RuleParts. 96 | type RuleOpts struct { 97 | Raw string 98 | Collapse *bool 99 | Document bool 100 | Domains []string 101 | ElemHide bool 102 | Font *bool 103 | GenericBlock bool 104 | GenericHide bool 105 | Image *bool 106 | Media *bool 107 | Object *bool 108 | ObjectSubRequest *bool 109 | Other *bool 110 | Ping *bool 111 | Popup *bool 112 | Script *bool 113 | Stylesheet *bool 114 | SubDocument *bool 115 | ThirdParty *bool 116 | Websocket *bool 117 | WebRTC *bool 118 | XmlHttpRequest *bool 119 | } 120 | 121 | // NewRuleOpts parses the rule part following the '$' separator 122 | // and return content matching options. 123 | func NewRuleOpts(s string) (RuleOpts, error) { 124 | opts := RuleOpts{Raw: s} 125 | for _, opt := range strings.Split(s, ",") { 126 | opt = strings.TrimSpace(opt) 127 | value := true 128 | if strings.HasPrefix(opt, "~") { 129 | value = false 130 | opt = opt[1:] 131 | } 132 | switch { 133 | case opt == "script": 134 | opts.Script = &value 135 | case opt == "image": 136 | opts.Image = &value 137 | case opt == "stylesheet": 138 | opts.Stylesheet = &value 139 | case opt == "object": 140 | opts.Object = &value 141 | case opt == "object-subrequest": 142 | opts.ObjectSubRequest = &value 143 | case opt == "other": 144 | opts.Other = &value 145 | case opt == "subdocument": 146 | opts.SubDocument = &value 147 | case opt == "document": 148 | opts.Document = true 149 | case opt == "elemhide": 150 | opts.ElemHide = true 151 | case opt == "genericblock": 152 | opts.GenericBlock = true 153 | case opt == "generichide": 154 | opts.GenericHide = true 155 | case opt == "third-party": 156 | opts.ThirdParty = &value 157 | case strings.HasPrefix(opt, "domain="): 158 | s = opt[len("domain="):] 159 | for _, d := range strings.Split(s, "|") { 160 | d = strings.TrimSpace(d) 161 | opts.Domains = append(opts.Domains, d) 162 | } 163 | case opt == "ping": 164 | opts.Ping = &value 165 | case opt == "websocket": 166 | opts.Websocket = &value 167 | case opt == "webrtc": 168 | opts.WebRTC = &value 169 | // Undocumented options 170 | case opt == "xmlhttprequest": 171 | opts.XmlHttpRequest = &value 172 | case opt == "media": 173 | opts.Media = &value 174 | case opt == "popup": 175 | opts.Popup = &value 176 | case opt == "collapse": 177 | opts.Collapse = &value 178 | case opt == "font": 179 | opts.Font = &value 180 | default: 181 | return opts, fmt.Errorf("unknown rule option: %s", opt) 182 | } 183 | } 184 | return opts, nil 185 | } 186 | 187 | // Rule represents a complete adblockplus rule. 188 | type Rule struct { 189 | // The original string representation 190 | Raw string 191 | // Exception is true for exclusion rules (prefixed with "@@") 192 | Exception bool 193 | // Parts is the sequence of RulePart matching URLs 194 | Parts []RulePart 195 | // Opts are optional rules applied to content 196 | Opts RuleOpts 197 | } 198 | 199 | var ( 200 | NullOpts = RuleOpts{} 201 | ) 202 | 203 | func (r *Rule) HasUnsupportedOpts() bool { 204 | // Collapse is related to ElemHide, and irrelevant 205 | return r.Opts.Document || 206 | // len(r.Opts.Domains) > 0 // handled 207 | // r.Opts.ElemHide // irrelevant 208 | // r.Opts.GenericHide // irrelevant 209 | // r.Opts.Image != nil || // handled 210 | r.Opts.Media != nil || 211 | // r.Opts.Object != nil || // handled 212 | // r.Opts.ObjectSubRequest != nil || // cannot guess request source 213 | // r.Opts.Other != nil // not sure what to do with this one 214 | r.Opts.Popup != nil 215 | // r.Opts.Script != nil || // handled 216 | // r.Opts.Stylesheet != nil || // handled 217 | // r.Opts.SubDocument != nil || // cannot guess request source 218 | // r.Opts.ThirdParty != nil // handled 219 | // r.Opts.XmlHttpRequest != nil // cannot guess request source 220 | // r.Opts.Ping // used for javascript pingback 221 | } 222 | 223 | func (r *Rule) HasContentOpts() bool { 224 | return r.Opts.Image != nil || 225 | r.Opts.Object != nil || 226 | r.Opts.Script != nil || 227 | r.Opts.Stylesheet != nil || 228 | r.Opts.Font != nil 229 | } 230 | 231 | // ParseRule parses a single rule. 232 | func ParseRule(s string) (*Rule, error) { 233 | r := Rule{Raw: s} 234 | s = strings.TrimSpace(s) 235 | if len(s) == 0 || s[0] == '!' { 236 | // Empty or comment 237 | return nil, nil 238 | } 239 | if strings.Contains(s, "##") || strings.Contains(s, "#?#") { 240 | // Element selectors are not supported 241 | return nil, nil 242 | } 243 | if strings.HasPrefix(s, "@@") { 244 | r.Exception = true 245 | s = s[2:] 246 | } 247 | if strings.HasPrefix(s, "||") { 248 | r.Parts = append(r.Parts, RulePart{Type: DomainAnchor, Value: "||"}) 249 | s = s[2:] 250 | } 251 | if pos := strings.LastIndex(s, "$"); pos >= 0 { 252 | optsStr := s[pos+1:] 253 | // Parse the options later 254 | opts, err := NewRuleOpts(optsStr) 255 | if err != nil { 256 | return nil, err 257 | } 258 | r.Opts = opts 259 | s = s[:pos] 260 | } 261 | 262 | var p RulePart 263 | for len(s) > 0 { 264 | pos := strings.IndexAny(s, "*^|") 265 | if pos < 0 { 266 | p := RulePart{Type: Exact, Value: s} 267 | r.Parts = append(r.Parts, p) 268 | break 269 | } 270 | if pos > 0 { 271 | p = RulePart{Type: Exact, Value: s[:pos]} 272 | r.Parts = append(r.Parts, p) 273 | } 274 | t := Wildcard 275 | switch s[pos] { 276 | case '*': 277 | t = Wildcard 278 | case '^': 279 | t = Separator 280 | case '|': 281 | t = StartAnchor 282 | } 283 | r.Parts = append(r.Parts, RulePart{Type: t, Value: s[pos : pos+1]}) 284 | s = s[pos+1:] 285 | } 286 | return &r, nil 287 | } 288 | 289 | // ParseRules returns the sequence of rules extracted from supplied reader 290 | // content. 291 | func ParseRules(r io.Reader) ([]*Rule, error) { 292 | rules := []*Rule{} 293 | scanner := bufio.NewScanner(r) 294 | for scanner.Scan() { 295 | r, err := ParseRule(scanner.Text()) 296 | if r == nil { 297 | continue 298 | } 299 | if err != nil { 300 | return nil, err 301 | } 302 | rules = append(rules, r) 303 | } 304 | return rules, scanner.Err() 305 | } 306 | 307 | // Request defines client request properties to be matched against a set 308 | // of rules. 309 | type Request struct { 310 | // URL is matched against rule parts. Mandatory. 311 | URL string 312 | // Domain is matched against optional domain or third-party rules 313 | Domain string 314 | // ContentType is matched against optional content rules. This 315 | // information is often available only in client responses. Filters 316 | // may be applied twice, once at request time, once at response time. 317 | ContentType string 318 | // OriginDomain is matched against optional third-party rules. 319 | OriginDomain string 320 | 321 | // Timeout is the maximum amount of time a single matching can take. 322 | Timeout time.Duration 323 | CheckFreq int 324 | 325 | // GenericBlock is true if rules not matching a specific domain are to be 326 | // ignored. If nil, the matcher will determine it internally based on 327 | // $genericblock options. 328 | GenericBlock *bool 329 | } 330 | 331 | func (rq *Request) HasGenericBlock() bool { 332 | return rq.GenericBlock != nil && *rq.GenericBlock 333 | } 334 | 335 | // RuleNode is the node structure of rule trees. 336 | // Rule trees start with a Root node containing any number of non-Root 337 | // RuleNodes. 338 | type ruleNode struct { 339 | Type int 340 | Value []byte 341 | Opts []*RuleOpts // non-empty on terminating nodes 342 | Children []*ruleNode 343 | RuleId int 344 | } 345 | 346 | // GetValue returns the node representation. It may differ from Value field 347 | // for composite nodes like Sustring. 348 | func (n *ruleNode) GetValue() string { 349 | v := n.Value 350 | if n.Type == Substring { 351 | v = make([]byte, 1+len(n.Value)) 352 | v[0] = '*' 353 | copy(v[1:], n.Value) 354 | } 355 | return string(v) 356 | } 357 | 358 | func (n *ruleNode) AddRule(parts []RulePart, opts *RuleOpts, id int) error { 359 | if len(parts) == 0 { 360 | n.Opts = append(n.Opts, opts) 361 | n.RuleId = id 362 | return nil 363 | } 364 | // Looks for existing matching rule parts 365 | part := parts[0] 366 | if part.Type != Exact && part.Type != Wildcard && part.Type != Separator && 367 | part.Type != DomainAnchor && part.Type != Substring { 368 | return fmt.Errorf("unknown rule part type: %+v", part) 369 | } 370 | var child *ruleNode 371 | value := []byte(part.Value) 372 | for _, c := range n.Children { 373 | // TODO: be smarter with ExactMatch 374 | if c.Type == part.Type && bytes.Equal(c.Value, value) { 375 | child = c 376 | break 377 | } 378 | } 379 | created := false 380 | if child == nil { 381 | child = &ruleNode{ 382 | Type: part.Type, 383 | Value: []byte(part.Value), 384 | } 385 | created = true 386 | } 387 | err := child.AddRule(parts[1:], opts, id) 388 | if err == nil && created { 389 | // Do not modify the tree when failing to insert a rule 390 | n.Children = append(n.Children, child) 391 | } 392 | return err 393 | } 394 | 395 | var ( 396 | reSeparator = regexp.MustCompile(`^(?:[^\w\d_\-\.%]|$)`) 397 | ) 398 | 399 | func matchOptsDomains(opts *RuleOpts, domain string) bool { 400 | if len(opts.Domains) == 0 { 401 | return true 402 | } 403 | accept := false 404 | for _, d := range opts.Domains { 405 | reject := strings.HasPrefix(d, "~") 406 | if reject { 407 | d = d[1:] 408 | } 409 | if domain == d || strings.HasSuffix(domain, "."+d) { 410 | if reject { 411 | return false 412 | } 413 | accept = true 414 | } 415 | } 416 | return accept 417 | } 418 | 419 | func matchOptsContent(opts *RuleOpts, contentType string) bool { 420 | if opts.Image != nil { 421 | isImage := strings.HasPrefix(contentType, "image/") 422 | if isImage != *opts.Image { 423 | return false 424 | } 425 | } 426 | if opts.Object != nil { 427 | isObject := strings.Contains(contentType, "shockwave") 428 | if isObject != *opts.Object { 429 | return false 430 | } 431 | } 432 | if opts.Script != nil { 433 | isScript := strings.Contains(contentType, "script") 434 | if isScript != *opts.Script { 435 | return false 436 | } 437 | } 438 | if opts.Stylesheet != nil { 439 | isStylesheet := strings.Contains(contentType, "css") 440 | if isStylesheet != *opts.Stylesheet { 441 | return false 442 | } 443 | } 444 | if opts.Font != nil { 445 | isFont := strings.Contains(contentType, "font") 446 | if isFont != *opts.Font { 447 | return false 448 | } 449 | } 450 | return true 451 | } 452 | 453 | func matchOptsThirdParty(opts *RuleOpts, origin, domain string) bool { 454 | if opts.ThirdParty == nil { 455 | return true 456 | } 457 | isSubdomain := origin == domain || 458 | strings.HasSuffix(domain, "."+origin) 459 | return isSubdomain != *opts.ThirdParty 460 | } 461 | 462 | // matchContext is forwarded to matching functions which call Continue(). The 463 | // current match duration is sampled and the call aborted if it exceeds a 464 | // timeout. 465 | // On failed calls, location is set to the node terminating the match and 466 | // duration is updated to the original duration plus the time exceeding the 467 | // deadline. 468 | type matchContext struct { 469 | counter int 470 | freq int 471 | duration time.Duration 472 | deadline time.Time 473 | location *ruleNode 474 | genericBlock bool 475 | isDomainRule int 476 | } 477 | 478 | func (ctx *matchContext) Continue(n *ruleNode) bool { 479 | if ctx.freq <= 0 { 480 | return true 481 | } 482 | ctx.counter += 1 483 | if ctx.counter < ctx.freq { 484 | return true 485 | } 486 | ctx.counter = 0 487 | now := time.Now() 488 | stop := now.After(ctx.deadline) 489 | if stop { 490 | ctx.location = n 491 | ctx.duration += now.Sub(ctx.deadline) 492 | } 493 | return !stop 494 | } 495 | 496 | func matchOpts(opt *RuleOpts, ctx *matchContext, rq *Request) bool { 497 | if !matchOptsDomains(opt, rq.OriginDomain) { 498 | return false 499 | } 500 | if !matchOptsContent(opt, rq.ContentType) { 501 | return false 502 | } 503 | if !matchOptsThirdParty(opt, rq.OriginDomain, rq.Domain) { 504 | return false 505 | } 506 | if ctx.genericBlock && ctx.isDomainRule == 0 && len(opt.Domains) == 0 { 507 | // genericblock only applies rules with specific domains 508 | return false 509 | } 510 | return true 511 | } 512 | 513 | func (n *ruleNode) matchChildren(ctx *matchContext, url []byte, rq *Request) ( 514 | int, []*RuleOpts) { 515 | 516 | if !ctx.Continue(n) { 517 | return -1, nil 518 | } 519 | if len(url) == 0 && len(n.Children) == 0 { 520 | for _, opt := range n.Opts { 521 | if matchOpts(opt, ctx, rq) { 522 | return n.RuleId, n.Opts 523 | } 524 | } 525 | return 0, nil 526 | } 527 | // If there are children they have to match 528 | for _, c := range n.Children { 529 | ruleId, opts := c.dispatch(ctx, url, rq) 530 | if opts != nil || ruleId < 0 { 531 | return ruleId, opts 532 | } 533 | } 534 | return 0, nil 535 | } 536 | 537 | func matchDomainAnchor(url []byte, expectedDomain []byte) ([]byte, bool) { 538 | s := url 539 | // Match https?:// 540 | if !bytes.HasPrefix(s, []byte("http")) { 541 | return nil, false 542 | } 543 | s = s[4:] 544 | if len(s) > 0 && s[0] == byte('s') { 545 | s = s[1:] 546 | } 547 | if !bytes.HasPrefix(s, []byte("://")) { 548 | return nil, false 549 | } 550 | s = s[3:] 551 | 552 | // Extract host:port part 553 | domain := s 554 | slash := bytes.IndexByte(s, byte('/')) 555 | if slash < 0 { 556 | s = nil 557 | } else { 558 | domain = s[:slash] 559 | s = s[slash:] 560 | } 561 | 562 | // Strip port 563 | Port: 564 | for i := len(domain); i > 0; i-- { 565 | c := domain[i-1] 566 | switch c { 567 | case byte('0'), byte('1'), byte('2'), byte('3'), byte('4'), 568 | byte('5'), byte('6'), byte('7'), byte('8'), byte('9'): 569 | // OK, port numbers 570 | case byte(':'): 571 | domain = domain[:i-1] 572 | break Port 573 | default: 574 | break Port 575 | } 576 | } 577 | // Exact match 578 | if bytes.Equal(expectedDomain, domain) || 579 | // Or subdomain 580 | bytes.HasSuffix(domain, expectedDomain) && 581 | len(domain) > len(expectedDomain) && 582 | domain[len(domain)-len(expectedDomain)-1] == byte('.') { 583 | return s, true 584 | } 585 | return nil, false 586 | } 587 | 588 | func (n *ruleNode) dispatch(ctx *matchContext, url []byte, rq *Request) ( 589 | int, []*RuleOpts) { 590 | 591 | for { 592 | //fmt.Printf("matching '%s' with %s[%s][final:%v]\n", 593 | // string(url), getPartName(n.Type), string(n.Value), n.Opts != nil) 594 | switch n.Type { 595 | case Exact: 596 | if !bytes.HasPrefix(url, n.Value) { 597 | return 0, nil 598 | } 599 | url = url[len(n.Value):] 600 | return n.matchChildren(ctx, url, rq) 601 | case Separator: 602 | m := reSeparator.FindSubmatch(url) 603 | if m == nil { 604 | return 0, nil 605 | } 606 | url = url[len(m[0]):] 607 | return n.matchChildren(ctx, url, rq) 608 | case Wildcard: 609 | if len(n.Children) == 0 { 610 | // Fast-path trailing wildcards 611 | return n.matchChildren(ctx, nil, rq) 612 | } 613 | if len(url) == 0 { 614 | return n.matchChildren(ctx, url, rq) 615 | } 616 | for i := 0; i < len(url); i++ { 617 | ruleId, opts := n.matchChildren(ctx, url[i:], rq) 618 | if opts != nil || ruleId < 0 { 619 | return ruleId, opts 620 | } 621 | } 622 | case DomainAnchor: 623 | remaining, ok := matchDomainAnchor(url, n.Value) 624 | if ok { 625 | ctx.isDomainRule += 1 626 | ruleId, opts := n.matchChildren(ctx, remaining, rq) 627 | ctx.isDomainRule -= 1 628 | return ruleId, opts 629 | } 630 | case Root: 631 | return n.matchChildren(ctx, url, rq) 632 | case Substring: 633 | for { 634 | if len(url) == 0 { 635 | break 636 | } 637 | pos := bytes.Index(url, n.Value) 638 | if pos < 0 { 639 | break 640 | } 641 | url = url[pos+len(n.Value):] 642 | ruleId, opts := n.matchChildren(ctx, url, rq) 643 | if opts != nil || ruleId < 0 { 644 | return ruleId, opts 645 | } 646 | } 647 | } 648 | return 0, nil 649 | } 650 | } 651 | 652 | // findNodePath returns the partial string represention of target and its 653 | // ancestors in n subtree. 654 | func findNodePath(target *ruleNode, n *ruleNode) (string, bool) { 655 | if target == n { 656 | return n.GetValue(), true 657 | } 658 | for _, c := range n.Children { 659 | s, ok := findNodePath(target, c) 660 | if ok { 661 | return n.GetValue() + s, true 662 | } 663 | } 664 | return "", false 665 | } 666 | 667 | type InterruptedError struct { 668 | Duration time.Duration 669 | Rule string 670 | } 671 | 672 | func (e *InterruptedError) Error() string { 673 | return fmt.Sprintf("interrupted at %s after %.3s", e.Rule, e.Duration) 674 | } 675 | 676 | // Match evaluates a piece of a request URL against the node subtree. If it 677 | // matches an existing rule, returns the rule identifier and its options set. 678 | // Requests are evaluated by applying the nodes on its URL in DFS order. When 679 | // the URL is completely matched by a terminal node, a node with a non-empty 680 | // Opts set, the Opts are applied on the Request properties. Any option match 681 | // validates the URL as a whole and the matching rule identifier is returned. 682 | // If the request timeout is set and exceeded, InterruptedError is returned. 683 | func (n *ruleNode) Match(url []byte, rq *Request) (int, []*RuleOpts, error) { 684 | ctx := &matchContext{ 685 | freq: rq.CheckFreq, 686 | duration: rq.Timeout, 687 | genericBlock: rq.HasGenericBlock(), 688 | } 689 | if rq.Timeout > 0 { 690 | ctx.deadline = time.Now().Add(rq.Timeout) 691 | if ctx.freq == 0 { 692 | ctx.freq = 1000 693 | } 694 | } 695 | id, ops := n.dispatch(ctx, url, rq) 696 | if ctx.location != nil { 697 | rule, ok := findNodePath(ctx.location, n) 698 | if !ok { 699 | panic("could not find node in rule tree") 700 | } 701 | return id, ops, &InterruptedError{ 702 | Duration: ctx.duration, 703 | Rule: rule, 704 | } 705 | } 706 | return id, ops, nil 707 | } 708 | 709 | // A RuleTree matches a set of adblockplus rules. 710 | type ruleTree struct { 711 | root *ruleNode 712 | } 713 | 714 | // NewRuleTree returns a new empty RuleTree. 715 | func newRuleTree() *ruleTree { 716 | return &ruleTree{ 717 | root: &ruleNode{ 718 | Type: Root, 719 | }, 720 | } 721 | } 722 | 723 | func rewriteDomainAnchors(parts []RulePart) ([]RulePart, error) { 724 | hasAnchor := false 725 | rewritten := []RulePart{} 726 | for i, part := range parts { 727 | if part.Type == DomainAnchor { 728 | // Check next part is an exact match 729 | if i != 0 { 730 | return nil, fmt.Errorf("invalid non-starting domain anchor") 731 | } 732 | if len(parts) < 2 || parts[1].Type != Exact { 733 | return nil, fmt.Errorf("domain anchor must be followed by exact match") 734 | } 735 | hasAnchor = true 736 | } else if part.Type == Exact && hasAnchor { 737 | // Extract the domain part of the following Exact part 738 | value := part.Value 739 | domain := "" 740 | slash := strings.Index(value, "/") 741 | if slash >= 0 { 742 | domain = value[:slash] 743 | value = value[slash:] 744 | } else { 745 | domain = value 746 | value = "" 747 | } 748 | // Set the domain to the preceding anchor 749 | rewritten[len(rewritten)-1] = RulePart{ 750 | Type: DomainAnchor, 751 | Value: domain, 752 | } 753 | if len(value) > 0 { 754 | // Append remaining trailing Exact 755 | rewritten = append(rewritten, RulePart{ 756 | Type: Exact, 757 | Value: value, 758 | }) 759 | } 760 | hasAnchor = false 761 | continue 762 | } 763 | rewritten = append(rewritten, part) 764 | } 765 | return rewritten, nil 766 | } 767 | 768 | // Add explicit leading and trailing wildcards where they are implicitely 769 | // required. 770 | func addLeadingTrailingWildcards(parts []RulePart) []RulePart { 771 | rewritten := []RulePart{} 772 | for i, part := range parts { 773 | first := i == 0 774 | last := i == len(parts)-1 775 | if first { 776 | // Match every leading byte unless the rule starts with an anchor 777 | if part.Type != StartAnchor && part.Type != DomainAnchor { 778 | rewritten = append(rewritten, 779 | RulePart{ 780 | Type: Wildcard, 781 | }) 782 | } 783 | } 784 | 785 | if part.Type == StartAnchor { 786 | if !first && !last { 787 | // Anchors in the middle of the rules are not anchor but 788 | // literal "|" 789 | rewritten = append(rewritten, 790 | RulePart{ 791 | Type: Exact, 792 | Value: "|", 793 | }) 794 | } 795 | } else { 796 | rewritten = append(rewritten, part) 797 | } 798 | 799 | if last { 800 | // Match every trailing byte unless the rule ends with an anchor 801 | if part.Type != StartAnchor { 802 | rewritten = append(rewritten, 803 | RulePart{ 804 | Type: Wildcard, 805 | }) 806 | } 807 | } 808 | } 809 | return rewritten 810 | } 811 | 812 | // Rewrite Wildcard + Exact as a Substring 813 | func replaceWildcardWithSubstring(parts []RulePart) []RulePart { 814 | rewritten := []RulePart{} 815 | for i, part := range parts { 816 | if i == 0 || parts[i-1].Type != Wildcard { 817 | rewritten = append(rewritten, part) 818 | continue 819 | } 820 | if part.Type != Exact { 821 | rewritten = append(rewritten, part) 822 | continue 823 | } 824 | rewritten[len(rewritten)-1] = RulePart{ 825 | Type: Substring, 826 | Value: part.Value, 827 | } 828 | } 829 | return rewritten 830 | } 831 | 832 | // AddRule add a rule and its identifier to the rule tree. 833 | func (t *ruleTree) AddRule(rule *Rule, ruleId int) error { 834 | if rule.HasUnsupportedOpts() { 835 | return fmt.Errorf("rule options are not supported") 836 | } 837 | rewritten, err := rewriteDomainAnchors(rule.Parts) 838 | if err != nil { 839 | return err 840 | } 841 | rewritten = addLeadingTrailingWildcards(rewritten) 842 | rewritten = replaceWildcardWithSubstring(rewritten) 843 | 844 | if len(rewritten) == 0 { 845 | return nil 846 | } 847 | return t.root.AddRule(rewritten, &rule.Opts, ruleId) 848 | } 849 | 850 | // Match evaluates the request. If it matches any rule, it returns the 851 | // rule identifier and its options. 852 | func (t *ruleTree) Match(rq *Request) (int, []*RuleOpts, error) { 853 | return t.root.Match([]byte(rq.URL), rq) 854 | } 855 | 856 | func (t *ruleTree) String() string { 857 | w := &bytes.Buffer{} 858 | var printNode func(*ruleNode, int) 859 | printNode = func(n *ruleNode, level int) { 860 | w.WriteString(strings.Repeat(" ", level)) 861 | w.WriteString(getPartName(n.Type)) 862 | switch n.Type { 863 | case Exact, DomainAnchor: 864 | w.WriteString("[") 865 | w.WriteString(string(n.Value)) 866 | w.WriteString("]") 867 | } 868 | if len(n.Opts) > 0 { 869 | for _, opt := range n.Opts { 870 | fmt.Fprintf(w, "[%s]", opt.Raw) 871 | } 872 | } 873 | w.WriteString("\n") 874 | for _, c := range n.Children { 875 | printNode(c, level+1) 876 | } 877 | } 878 | printNode(t.root, 0) 879 | return w.String() 880 | } 881 | 882 | // RuleMatcher implements a complete set of include and exclude AdblockPlus 883 | // rules. 884 | type RuleMatcher struct { 885 | includes *ruleTree 886 | excludes *ruleTree 887 | // Rules requiring resource content type 888 | contentIncludes *ruleTree 889 | contentExcludes *ruleTree 890 | // Match domains not matching generic rules 891 | genericBlock *ruleTree 892 | } 893 | 894 | // NewMatcher returns a new empty matcher. 895 | func NewMatcher() *RuleMatcher { 896 | return &RuleMatcher{ 897 | includes: newRuleTree(), 898 | excludes: newRuleTree(), 899 | contentIncludes: newRuleTree(), 900 | contentExcludes: newRuleTree(), 901 | genericBlock: newRuleTree(), 902 | } 903 | } 904 | 905 | // AddRule adds a rule to the matcher. Supplied rule identifier will be 906 | // returned by Match(). 907 | func (m *RuleMatcher) AddRule(rule *Rule, ruleId int) error { 908 | var tree *ruleTree 909 | if rule.Opts.GenericBlock { 910 | if !rule.Exception { 911 | return fmt.Errorf("$genericblock applies only on exclude rules: %s", rule.Raw) 912 | } 913 | return m.genericBlock.AddRule(rule, ruleId) 914 | } 915 | if rule.HasContentOpts() { 916 | if rule.Exception { 917 | tree = m.contentExcludes 918 | } else { 919 | tree = m.contentIncludes 920 | } 921 | } else { 922 | if rule.Exception { 923 | tree = m.excludes 924 | } else { 925 | tree = m.includes 926 | } 927 | } 928 | return tree.AddRule(rule, ruleId) 929 | } 930 | 931 | // Match applies include and exclude rules on supplied request. If the 932 | // request is accepted, it returns true and the matching rule identifier. 933 | func (m *RuleMatcher) Match(rq *Request) (bool, int, error) { 934 | copied := false 935 | if rq.GenericBlock == nil { 936 | _, opts, err := m.genericBlock.Match(rq) 937 | if err != nil { 938 | return false, 0, err 939 | } 940 | if opts != nil { 941 | // Do not mutate caller structures 942 | copied = true 943 | genericBlock := true 944 | rq = &(*rq) 945 | rq.GenericBlock = &genericBlock 946 | } 947 | } 948 | inc := m.includes 949 | exc := m.excludes 950 | if len(rq.ContentType) > 0 { 951 | inc = m.contentIncludes 952 | exc = m.contentExcludes 953 | } 954 | id, opts, err := inc.Match(rq) 955 | if opts == nil || err != nil { 956 | return false, 0, err 957 | } 958 | if copied { 959 | // Exclude rules ignore the genericBlock bit, unless explicitely set by 960 | // the caller 961 | rq.GenericBlock = nil 962 | } 963 | _, opts, err = exc.Match(rq) 964 | return opts == nil, id, err 965 | } 966 | 967 | // String returns a textual representation of the include and exclude rules, 968 | // matching request with or without content. 969 | func (m *RuleMatcher) String() string { 970 | return fmt.Sprintf("includes:\n%s\nexcludes:\n%s\n"+ 971 | "content-includes:\n%s\ncontent-excludes:\n%s\n", 972 | m.includes, m.excludes, m.contentIncludes, m.contentExcludes) 973 | } 974 | 975 | func loadRulesFromFile(m *RuleMatcher, path string) (int, error) { 976 | fp, err := os.Open(path) 977 | if err != nil { 978 | return 0, err 979 | } 980 | defer fp.Close() 981 | parsed, err := ParseRules(fp) 982 | if err != nil { 983 | return 0, err 984 | } 985 | added := 0 986 | for _, rule := range parsed { 987 | err := m.AddRule(rule, 0) 988 | if err == nil { 989 | added += 1 990 | } 991 | } 992 | return added, nil 993 | } 994 | 995 | func NewMatcherFromFiles(paths ...string) (*RuleMatcher, int, error) { 996 | added := 0 997 | m := NewMatcher() 998 | for _, path := range paths { 999 | n, err := loadRulesFromFile(m, path) 1000 | if err != nil { 1001 | return nil, 0, err 1002 | } 1003 | added += n 1004 | } 1005 | return m, added, nil 1006 | } 1007 | --------------------------------------------------------------------------------