├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── finder.go
├── finder_test.go
├── helper.go
├── helper_test.go
├── interfaces
├── doc.go
├── fetcher.go
├── parser.go
├── registrar.go
└── reporter.go
├── main.go
├── processed.go
└── report.go
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 | go:
3 | - 1.5
4 | - 1.6
5 |
6 | install:
7 | - go get -t -v ./...
8 |
9 | script:
10 | - go test -v ./...
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Konstantin Komelin
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # insecRes - Insecure Resource Finder
2 | [](https://travis-ci.org/kkomelin/insecres)
3 | [](https://goreportcard.com/report/github.com/kkomelin/insecres)
4 | [](http://godoc.org/github.com/kkomelin/insecres)
5 |
6 | A console tool that finds insecure resources on HTTPS sites.
7 | It is written in Go language and uses the power of "multi-threading" (goroutines) to crawl and parse site pages.
8 |
9 | ## The motivation
10 |
11 | Some time ago, I switched my site to HTTPS. _And you should too!_
12 | All went well except the fact that my pages contained images, embedded videos and other resources,
13 | which pointed to HTTP content and made browsers display warnings about the insecure content on the pages.
14 | After some research of existing tools, which did not fit my needs, I decided to create my own one.
15 |
16 | ## Features
17 |
18 | - Crawls all site pages in parallel
19 | - Finds the following resources with absolute HTTP (insecure) urls:
20 | - IMG
21 | - IFRAME
22 | - OBJECT
23 | - AUDIO, VIDEO, SOURCE, TRACK
24 | - Uses a random delay between requests to prevent blacklisting
25 | - Prints results to a CSV file
26 |
27 | ## Installation
28 |
29 | First of all, [install Go](https://golang.org/doc/install).
30 |
31 | After that, run the following command:
32 |
33 | ```
34 | go get github.com/kkomelin/insecres
35 | ```
36 |
37 | ## Usage
38 |
39 | - Find insecure resources on a site and print results to the console:
40 | ```
41 | $GOPATH/bin/insecres https://example.com
42 | ```
43 | - Find insecure resources on a site and print results to a CSV file:
44 | ```
45 | $GOPATH/bin/insecres -f="/home/user/report.csv" https://example.com
46 | ```
47 | - Display usage guide:
48 | ```
49 | $GOPATH/bin/insecres -h
50 | ```
51 |
52 | ## Roadmap
53 |
54 | - [ ] Display result counters
55 | - [ ] Compare performance of simple regex parsing and Tokenized parsing, which is currently used
56 | - [x] Implement an option for printing results to a CSV file
57 | - [x] Add random delay between requests to prevent blacklisting
58 | - [x] Ignore trailing slashes (https://example.com and https://example.com/ are considered equivalent)
59 | - [x] Handle domains w/ and w/o WWW
60 | - [x] Support IFRAME tags
61 | - [x] Support OBJECT tags
62 | - [x] Support AUDIO, VIDEO, SOURCE and TRACK tags
63 |
--------------------------------------------------------------------------------
/finder.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "crypto/tls"
5 | "fmt"
6 | "golang.org/x/net/html"
7 | "io"
8 | "net/http"
9 | "net/url"
10 | "strings"
11 | )
12 |
13 | // ResourceAndLinkFinder encapsulates logic that is used for finding page link urls and resource urls..
14 | type ResourceAndLinkFinder struct{}
15 |
16 | // Fetch page by url and returns response body.
17 | func (f ResourceAndLinkFinder) Fetch(url string) (responseBody io.ReadCloser, err error) {
18 | transport := &http.Transport{
19 | TLSClientConfig: &tls.Config{
20 | InsecureSkipVerify: true,
21 | },
22 | }
23 |
24 | client := http.Client{Transport: transport}
25 |
26 | response, err := client.Get(url)
27 | if err != nil {
28 | return nil, err
29 | }
30 | //defer response.Body.Close()
31 |
32 | return response.Body, nil
33 | }
34 |
35 | // Parse takes a reader object and returns a slice of insecure resource urls
36 | // found in the HTML.
37 | // It does not close the reader. The reader should be closed from the outside.
38 | func (f ResourceAndLinkFinder) Parse(baseUrl string, httpBody io.Reader) (resourceUrls []string, linkUrls []string, err error) {
39 |
40 | resourceMap := make(map[string]bool)
41 | linkMap := make(map[string]bool)
42 |
43 | page := html.NewTokenizer(httpBody)
44 | for {
45 | tokenType := page.Next()
46 | if tokenType == html.ErrorToken {
47 | break
48 | }
49 | token := page.Token()
50 |
51 | switch {
52 | case f.isResourceToken(token):
53 | uris, err := f.processResourceToken(token)
54 | if err != nil {
55 | continue
56 | }
57 |
58 | for _, uri := range uris {
59 | resourceMap[uri] = true
60 | }
61 | case f.isLinkToken(token):
62 | uri, err := f.processLinkToken(token, baseUrl)
63 | if err != nil {
64 | continue
65 | }
66 |
67 | linkMap[uri] = true
68 | }
69 | }
70 |
71 | resourceUrls = make([]string, 0, len(resourceMap))
72 |
73 | for k := range resourceMap {
74 | resourceUrls = append(resourceUrls, k)
75 | }
76 |
77 | linkUrls = make([]string, 0, len(linkMap))
78 |
79 | for k := range linkMap {
80 | linkUrls = append(linkUrls, k)
81 | }
82 |
83 | return resourceUrls, linkUrls, nil
84 | }
85 |
86 | // Determine whether the token passed is a resource token.
87 | func (f ResourceAndLinkFinder) isResourceToken(token html.Token) bool {
88 |
89 | switch {
90 | case token.Type == html.SelfClosingTagToken && token.Data == "img":
91 | return true
92 | case token.Type == html.StartTagToken:
93 | switch token.Data {
94 | case
95 | "iframe",
96 | "object",
97 | "video",
98 | "audio",
99 | "source",
100 | "track":
101 | return true
102 | }
103 | }
104 | return false
105 | }
106 |
107 | // Determine whether the token passed is a resource token.
108 | func (f ResourceAndLinkFinder) isTargetedResourceTokenAttribute(token html.Token, attribute html.Attribute) bool {
109 |
110 | if token.Data == "object" && attribute.Key == "data" {
111 | return true
112 | }
113 |
114 | if attribute.Key == "src" || attribute.Key == "poster" {
115 | return true
116 | }
117 |
118 | return false
119 | }
120 |
121 | // Process resource token in order to get urls of the resources (a few if it is video, for example).
122 | func (f ResourceAndLinkFinder) processResourceToken(token html.Token) (map[string]string, error) {
123 |
124 | result := make(map[string]string)
125 |
126 | // Loop for tag attributes.
127 | for _, attr := range token.Attr {
128 |
129 | if !f.isTargetedResourceTokenAttribute(token, attr) {
130 | continue
131 | }
132 |
133 | uri, err := url.Parse(attr.Val)
134 | if err != nil {
135 | continue
136 | }
137 |
138 | // Ignore relative and secure urls.
139 | if !uri.IsAbs() || uri.Scheme == "https" || (uri.Host != "" && strings.HasPrefix(uri.String(), "//")) {
140 | continue
141 | }
142 |
143 | result[attr.Key] = uri.String()
144 | }
145 |
146 | if len(result) == 0 {
147 | return nil, fmt.Errorf("Targeted attributes have not been found. Skipped.")
148 | }
149 |
150 | return result, nil
151 | }
152 |
153 | // Determine whether the token passed is a link token.
154 | func (f ResourceAndLinkFinder) isLinkToken(token html.Token) bool {
155 | switch {
156 | case token.Type == html.StartTagToken && token.Data == "a":
157 | return true
158 | default:
159 | return false
160 | }
161 | }
162 |
163 | // Process token in order to get an absolute url of the link.
164 | func (f ResourceAndLinkFinder) processLinkToken(token html.Token, base string) (string, error) {
165 |
166 | // Loop for tag attributes.
167 | for _, attr := range token.Attr {
168 | if attr.Key != "href" {
169 | continue
170 | }
171 |
172 | // Ignore anchors.
173 | if strings.HasPrefix(attr.Val, "#") {
174 | return "", fmt.Errorf("Url is an anchor. Skipped.")
175 | }
176 |
177 | uri, err := url.Parse(attr.Val)
178 | if err != nil {
179 | return "", err
180 | }
181 |
182 | baseUrl, err := url.Parse(base)
183 | if err != nil {
184 | return "", err
185 | }
186 |
187 | // Return result if the uri is absolute.
188 | if uri.IsAbs() || (uri.Host != "" && strings.HasPrefix(uri.String(), "//")) {
189 |
190 | // Ignore external urls considering urls w/ WWW and w/o WWW as the same.
191 | if strings.TrimPrefix(uri.Host, "www.") != strings.TrimPrefix(baseUrl.Host, "www.") {
192 | return "", fmt.Errorf("Url is expernal. Skipped.")
193 | }
194 |
195 | return strings.TrimSuffix(uri.String(), "/"), nil
196 | }
197 |
198 | // Make it absolute if it's relative.
199 | absoluteUrl := f.convertToAbsolute(uri, baseUrl)
200 |
201 | return strings.TrimSuffix(absoluteUrl.String(), "/"), nil
202 | }
203 |
204 | return "", fmt.Errorf("Src has not been found. Skipped.")
205 | }
206 |
207 | // Convert a relative url to absolute.
208 | func (f ResourceAndLinkFinder) convertToAbsolute(source, base *url.URL) *url.URL {
209 | return base.ResolveReference(source)
210 | }
211 |
--------------------------------------------------------------------------------
/finder_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 | "testing"
7 | )
8 |
9 | // TestParse tests fetcher.Parse method and its results.
10 | func TestParse(t *testing.T) {
11 |
12 | fmt.Println("TestParse")
13 |
14 | reader := strings.NewReader(`
15 |
16 |
17 |
18 |
19 |
20 |
21 | Anchor (ignored)
22 | Relative link
23 | Absolute HTTP link
24 | Absolute HTTPS link
25 | Absolute HTTPS link
26 | External link
27 | Reproduces bug in Go url.isAbs()
28 | Ignoring trailing slash
29 |
30 |
31 |
32 |
36 |
40 |
45 |
50 | `)
51 |
52 | expectedResources := map[string]int{
53 | // img[src]
54 | "http://example.com/images/test.png": 0,
55 | // iframe[src]
56 | "http://www.youtube.com/embed/0sRPY3WWSNc": 0,
57 | // object[data]
58 | "http://www.example.com/flash/insecure.swf": 0,
59 | // audio[src]
60 | "http://www.example.com/audio.ogg": 0,
61 | // audio track[src]
62 | "http://www.example.com/audio_track.vtt": 0,
63 | // audio source[src]
64 | "http://www.example.com/audio_in_source.ogg": 0,
65 | // video[src]
66 | "http://www.example.com/video.mp4": 0,
67 | // video[poster]
68 | "http://www.example.com/poster.jpg": 0,
69 | // video track[src]
70 | "http://www.example.com/video_track.vtt": 0,
71 | // video source[src]
72 | "http://www.example.com/video_in_source.mp4": 0,
73 | }
74 |
75 | expectedLinks := map[string]int{
76 | "https://example.com/article/test1": 0,
77 | "http://example.com/test2": 0,
78 | "https://example.com/test3": 0,
79 | "http://www.example.com/test3": 0,
80 | "http://www.example.com/test4": 0,
81 | }
82 |
83 | resources, links, err := (ResourceAndLinkFinder{}).Parse("https://example.com/", reader)
84 | if err != nil {
85 | t.Fatalf("fetcher.Parse has returned error: %s\n", err)
86 | }
87 |
88 | // Check resources.
89 | fmt.Printf("Resources: %q\n", resources)
90 |
91 | if len(resources) != len(expectedResources) {
92 | t.Errorf("Wrong number of resources. Found %d of %d", len(resources), len(expectedResources))
93 | } else {
94 | for i := 0; i < len(resources); i++ {
95 | if _, ok := expectedResources[resources[i]]; !ok {
96 | t.Errorf("Resource url is not found in the expected values: %s", resources[i])
97 | }
98 | }
99 | }
100 |
101 | // Check links.
102 | fmt.Printf("Links: %q\n", links)
103 |
104 | if len(links) != len(expectedLinks) {
105 | t.Errorf("Wrong number of links. Found %d of %d", len(links), len(expectedLinks))
106 |
107 | } else {
108 | for i := 0; i < len(links); i++ {
109 | if _, ok := expectedLinks[links[i]]; !ok {
110 | t.Errorf("Link url is not found in the expected values: %s", links[i])
111 | }
112 | }
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/helper.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "github.com/kkomelin/insecres/interfaces"
6 | "math/rand"
7 | "strings"
8 | "time"
9 | )
10 |
11 | // Goroutine callback, which fetches and parses the passed url
12 | // in order to find insecure resources and next urls to fetch from.
13 | func processPage(url string, queue chan string, registrar interfaces.Registrar, fetcher interfaces.Fetcher, parser interfaces.Parser, reporter interfaces.Reporter) {
14 |
15 | // Ignore processed urls.
16 | if !registrar.IsNew(url) {
17 | return
18 | }
19 | // Lock url so that no one other goroutine can process it.
20 | registrar.Register(url)
21 |
22 | fmt.Print(".")
23 |
24 | responseBody, err := fetcher.Fetch(url)
25 | if err != nil {
26 | fmt.Printf("Error occured: %s\n", err)
27 | return
28 | }
29 |
30 | defer responseBody.Close()
31 |
32 | insecureResourceUrls, pageUrls, err := parser.Parse(url, responseBody)
33 | if err != nil {
34 | fmt.Printf("Error occured: %s\n", err)
35 | return
36 | }
37 |
38 | reportPageResources(url, insecureResourceUrls, reporter)
39 |
40 | for _, url := range pageUrls {
41 | // Random pause before sending to the main thread.
42 | delayBetweenRequests()
43 | queue <- url
44 | }
45 | }
46 |
47 | // Reports page resources.
48 | func reportPageResources(url string, resources []string, reporter interfaces.Reporter) error {
49 | if len(resources) == 0 {
50 | return nil
51 | }
52 |
53 | if !reporter.IsEmpty() {
54 | for i, insecureResourceUrl := range resources {
55 | resources[i] = url + ", " + insecureResourceUrl
56 | }
57 |
58 | return reporter.WriteLines(resources)
59 | }
60 |
61 | fmt.Printf("\n%s:\n", url)
62 | for _, insecureResourceUrl := range resources {
63 | fmt.Printf("- %s\n", insecureResourceUrl)
64 | }
65 | return nil
66 | }
67 |
68 | // Implement random pause before sending the next request to
69 | // (no more than beforeEngTimeout/ and no less than beforeEngTimeout/4 constant).
70 | // It is one of the measures to prevent banning by the server.
71 | func delayBetweenRequests() {
72 | randNum := randomInRange(MinDelayBetweenRequests, MaxDelayBetweenRequests)
73 | time.Sleep(time.Duration(randNum) * time.Millisecond)
74 | }
75 |
76 | // Returns a random number in a given range.
77 | // The idea has been borrowed from http://golangcookbook.blogspot.ru/2012/11/generate-random-number-in-given-range.html
78 | // and improved.
79 | func randomInRange(min, max int) int {
80 | rand.Seed(time.Now().UTC().UnixNano())
81 |
82 | if (min == 0 || max == 0) || (min > max) {
83 | return 0
84 | }
85 |
86 | if min == max {
87 | return min
88 | }
89 |
90 | return rand.Intn(max-min) + min
91 | }
92 |
93 | // Crawl pages starting from the passed url and find insecure resources.
94 | func Crawl(url, reportFile string) {
95 |
96 | url = strings.TrimSuffix(url, "/")
97 |
98 | report := &Report{}
99 |
100 | // Print results to the file.
101 | if reportFile != "" {
102 | err := report.Open(reportFile)
103 | if err != nil {
104 | fmt.Println(err)
105 | return
106 | }
107 | } else { // Print results to console.
108 | fmt.Println("-----")
109 | fmt.Println("Insecure resources (grouped by page):")
110 | fmt.Println("-----")
111 | }
112 |
113 | registry := &Processed{processed: make(map[string]int)}
114 | finder := &ResourceAndLinkFinder{}
115 |
116 | queue := make(chan string)
117 |
118 | go processPage(url, queue, registry, finder, finder, report)
119 |
120 | tick := time.Tick(time.Duration(BeforeEngDelay) * time.Millisecond)
121 | flag := false
122 | for {
123 | select {
124 | case url := <-queue:
125 | flag = false
126 |
127 | go processPage(url, queue, registry, finder, finder, report)
128 | case <-tick:
129 | if flag {
130 | // TODO: Implement a verbose mode when all crawled pages are also displayed.
131 | //if false {
132 | // fmt.Println("-----")
133 | // fmt.Println("Analized pages:")
134 | // fmt.Println("-----")
135 | // fmt.Println(registry)
136 | //}
137 | fmt.Println("")
138 |
139 | // Close report.
140 | report.Close()
141 |
142 | return
143 | }
144 | flag = true
145 | }
146 | }
147 | }
148 |
149 | func displayHelp() {
150 | fmt.Printf(`usage: insecres [-h|-f="path/to/report.csv"]
151 | ARGUMENTS
152 | url
153 | A url to start from, e.g. https://example.com"
154 | OPTIONS
155 | -h
156 | Show this help message.
157 | -f
158 | Define the location of the CSV file with the results.
159 | If it is not set, results are printed to the console.
160 | `)
161 | }
162 |
--------------------------------------------------------------------------------
/helper_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "testing"
6 | )
7 |
8 | // TestRandomInRange tests randomInRange function.
9 | func TestRandomInRange(t *testing.T) {
10 |
11 | fmt.Println("TestRandomInRange")
12 |
13 | tests := []struct {
14 | min int
15 | max int
16 | expectedResult int
17 | }{
18 | // Edge cases.
19 | {0, 0, 0},
20 | {1000, 1000, 1000},
21 | // Normal case.
22 | {500, 1000, -1},
23 | // Wrong parameters.
24 | {5000, 1000, 0},
25 | {1000, 500, 0},
26 | }
27 |
28 | for _, testData := range tests {
29 | givenResult := randomInRange(testData.min, testData.max)
30 |
31 | // Normal parameters, random results.
32 | if testData.expectedResult == -1 {
33 | if testData.min > givenResult || givenResult > testData.max {
34 | t.Errorf("randomInRange(%d, %d): Given: %d", testData.min, testData.max, givenResult)
35 | }
36 | continue
37 | }
38 |
39 | // Edge and wrong parameters, fixed result.
40 | if givenResult != testData.expectedResult {
41 | t.Errorf("randomInRange(%d, %d): Given: %d, Expected: %d", testData.min, testData.max, givenResult, testData.expectedResult)
42 | }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/interfaces/doc.go:
--------------------------------------------------------------------------------
1 | // Package interfaces provides interfaces for the application, such as Fetcher, Parser, etc.
2 | package interfaces
3 |
--------------------------------------------------------------------------------
/interfaces/fetcher.go:
--------------------------------------------------------------------------------
1 | package interfaces
2 |
3 | import "io"
4 |
5 | // Fetcher is the interface that wraps the Fetch method.
6 | type Fetcher interface {
7 | // Fetch fetches page by url and returns the response body.
8 | Fetch(url string) (responseBody io.ReadCloser, err error)
9 | }
10 |
--------------------------------------------------------------------------------
/interfaces/parser.go:
--------------------------------------------------------------------------------
1 | package interfaces
2 |
3 | import "io"
4 |
5 | // Parser is the interface that wraps the Parse method.
6 | type Parser interface {
7 | // Parse parses passed response body and finds urls of resources and pages.
8 | Parse(baseUrl string, httpBody io.Reader) (resourceUrls []string, linkUrls []string, err error)
9 | }
10 |
--------------------------------------------------------------------------------
/interfaces/registrar.go:
--------------------------------------------------------------------------------
1 | package interfaces
2 |
3 | // Registrar is the interface that wraps methods necessary for storing processed urls.
4 | // All methods of this interface should be thread-safe.
5 | type Registrar interface {
6 | // Register adds processed url to the registry.
7 | Register(url string)
8 | // IsNew checks whether the passed url is new or not.
9 | IsNew(url string) bool
10 | }
11 |
--------------------------------------------------------------------------------
/interfaces/reporter.go:
--------------------------------------------------------------------------------
1 | package interfaces
2 |
3 | // Reporter is the interface that wraps methods for reporting results.
4 | type Reporter interface {
5 | // Init prepares file to report to.
6 | Open(filePath string) error
7 | // WriteLines dumps slice of strings to the report.
8 | WriteLines(lines []string) error
9 | // Close releases file.
10 | Close() error
11 | // IsEmpty checks whether a target resource is nil or not.
12 | IsEmpty() bool
13 | }
14 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "flag"
5 | )
6 |
7 | const (
8 | // BeforeEngDelay defines time in milliseconds, which the program waits before exit
9 | // so that all goroutines can finish and return results.
10 | BeforeEngDelay int = 2000
11 | // MinDelayBetweenRequests is minimum time in milliseconds,
12 | // which the program waits before processing any new url.
13 | // We wait for some random time (between MinDelayBetweenRequests and MaxDelayBetweenRequests)
14 | // to prevent blacklisting by the server.
15 | MinDelayBetweenRequests int = 500
16 | // MaxDelayBetweenRequests is maximum time in milliseconds,
17 | // which the program waits before processing any new url.
18 | MaxDelayBetweenRequests int = 1000
19 | )
20 |
21 | func main() {
22 | var (
23 | helpFlag bool
24 | reportFlag string
25 | )
26 |
27 | // Find options.
28 | flag.BoolVar(&helpFlag, "h", false, "")
29 | flag.StringVar(&reportFlag, "f", "", "")
30 | flag.Parse()
31 |
32 | // Find argument.
33 | args := flag.Args()
34 | if len(args) < 1 {
35 | displayHelp()
36 | return
37 | }
38 |
39 | // Display help.
40 | if helpFlag {
41 | displayHelp()
42 | return
43 | }
44 |
45 | // Run the crawler.
46 | Crawl(args[0], reportFlag)
47 | }
48 |
--------------------------------------------------------------------------------
/processed.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "sync"
5 | )
6 |
7 | // Processed is a thread-safe storage for processed urls.
8 | type Processed struct {
9 | processed map[string]int
10 | mux sync.Mutex
11 | }
12 |
13 | // Register adds a processed url to the registry.
14 | func (r *Processed) Register(url string) {
15 | r.mux.Lock()
16 | defer r.mux.Unlock()
17 |
18 | r.processed[url] = 1
19 | }
20 |
21 | // IsNew checks whether the url is new.
22 | func (r *Processed) IsNew(url string) bool {
23 | r.mux.Lock()
24 | defer r.mux.Unlock()
25 |
26 | if _, ok := r.processed[url]; ok {
27 | return false
28 | }
29 |
30 | return true
31 | }
32 |
33 | // String defines our own way to output the processed urls.
34 | // [url1]\n
35 | // [url2]\n
36 | func (r *Processed) String() string {
37 | r.mux.Lock()
38 | defer r.mux.Unlock()
39 |
40 | output := ""
41 | for url := range r.processed {
42 | output += url + "\n"
43 | }
44 | return output
45 | }
46 |
--------------------------------------------------------------------------------
/report.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "bufio"
5 | "os"
6 | "sync"
7 | )
8 |
9 | // Report is a thread-safe data reporting tool.
10 | type Report struct {
11 | file *os.File
12 | writer *bufio.Writer
13 | mux sync.Mutex
14 | }
15 |
16 | // Open opens or creates a file and initializes buffered writer.
17 | func (r *Report) Open(filePath string) error {
18 | var err error
19 |
20 | r.file, err = os.Create(filePath)
21 | if err != nil {
22 | return err
23 | }
24 |
25 | r.writer = bufio.NewWriter(r.file)
26 |
27 | return nil
28 | }
29 |
30 | // WriteLines dump slice of strings to the file. It also adds trailing endline marker to each string.
31 | func (r *Report) WriteLines(lines []string) error {
32 | r.mux.Lock()
33 | defer r.mux.Unlock()
34 |
35 | var err error
36 |
37 | for _, line := range lines {
38 | _, err = r.writer.WriteString(line + "\n")
39 | if err != nil {
40 | return err
41 | }
42 | }
43 |
44 | return r.writer.Flush()
45 | }
46 |
47 | // Close closes file handler in case it is not empty.
48 | func (r *Report) Close() error {
49 | r.mux.Lock()
50 | defer r.mux.Unlock()
51 |
52 | if r.IsEmpty() {
53 | return nil
54 | }
55 |
56 | return r.file.Close()
57 | }
58 |
59 | // IsEmpty check whether the file handler is initialized or not.
60 | func (r *Report) IsEmpty() bool {
61 | return (r.file == nil)
62 | }
63 |
--------------------------------------------------------------------------------