├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── assert_test.go ├── basetag_test.go ├── cmd └── example │ └── main.go ├── complex_test.go ├── crawler.go ├── errors.go ├── examples_test.go ├── ext.go ├── fileext_test.go ├── logger.go ├── options.go ├── popchannel.go ├── spyext_test.go ├── tbldef_test.go ├── tblrun_test.go ├── testdata ├── hosta │ ├── page1.html │ ├── page2.html │ ├── page3.html │ ├── page4.html │ ├── page5.html │ └── robots.txt ├── hostb │ ├── page1.html │ ├── page2.html │ ├── pageunlinked.html │ └── robots.txt ├── hostc │ ├── page1.html │ ├── page2.html │ └── page3.html ├── hostd │ ├── index.html │ ├── page3.html │ └── subdir │ │ ├── page1.html │ │ ├── page2.html │ │ ├── pagea.html │ │ └── pageb.html ├── robota │ ├── page1.html │ ├── page2.html │ └── robots.txt ├── robotb │ ├── page1.html │ ├── page2.html │ ├── page3.html │ ├── page4.html │ └── robots.txt └── robotc │ ├── page1.html │ ├── page2.html │ ├── page3.html │ ├── page4.html │ └── robots.txt ├── urlcontext.go ├── urlcontext_test.go ├── worker.go └── worker_test.go /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/.travis.yml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/README.md -------------------------------------------------------------------------------- /assert_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/assert_test.go -------------------------------------------------------------------------------- /basetag_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/basetag_test.go -------------------------------------------------------------------------------- /cmd/example/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/cmd/example/main.go -------------------------------------------------------------------------------- /complex_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/complex_test.go -------------------------------------------------------------------------------- /crawler.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/crawler.go -------------------------------------------------------------------------------- /errors.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/errors.go -------------------------------------------------------------------------------- /examples_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/examples_test.go -------------------------------------------------------------------------------- /ext.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/ext.go -------------------------------------------------------------------------------- /fileext_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/fileext_test.go -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/logger.go -------------------------------------------------------------------------------- /options.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/options.go -------------------------------------------------------------------------------- /popchannel.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/popchannel.go -------------------------------------------------------------------------------- /spyext_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/spyext_test.go -------------------------------------------------------------------------------- /tbldef_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/tbldef_test.go -------------------------------------------------------------------------------- /tblrun_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/tblrun_test.go -------------------------------------------------------------------------------- /testdata/hosta/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hosta/page1.html -------------------------------------------------------------------------------- /testdata/hosta/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hosta/page2.html -------------------------------------------------------------------------------- /testdata/hosta/page3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hosta/page3.html -------------------------------------------------------------------------------- /testdata/hosta/page4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hosta/page4.html -------------------------------------------------------------------------------- /testdata/hosta/page5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hosta/page5.html -------------------------------------------------------------------------------- /testdata/hosta/robots.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /testdata/hostb/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostb/page1.html -------------------------------------------------------------------------------- /testdata/hostb/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostb/page2.html -------------------------------------------------------------------------------- /testdata/hostb/pageunlinked.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostb/pageunlinked.html -------------------------------------------------------------------------------- /testdata/hostb/robots.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /testdata/hostc/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostc/page1.html -------------------------------------------------------------------------------- /testdata/hostc/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostc/page2.html -------------------------------------------------------------------------------- /testdata/hostc/page3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostc/page3.html -------------------------------------------------------------------------------- /testdata/hostd/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/index.html -------------------------------------------------------------------------------- /testdata/hostd/page3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/page3.html -------------------------------------------------------------------------------- /testdata/hostd/subdir/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/subdir/page1.html -------------------------------------------------------------------------------- /testdata/hostd/subdir/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/subdir/page2.html -------------------------------------------------------------------------------- /testdata/hostd/subdir/pagea.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/subdir/pagea.html -------------------------------------------------------------------------------- /testdata/hostd/subdir/pageb.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/hostd/subdir/pageb.html -------------------------------------------------------------------------------- /testdata/robota/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robota/page1.html -------------------------------------------------------------------------------- /testdata/robota/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robota/page2.html -------------------------------------------------------------------------------- /testdata/robota/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / 3 | -------------------------------------------------------------------------------- /testdata/robotb/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotb/page1.html -------------------------------------------------------------------------------- /testdata/robotb/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotb/page2.html -------------------------------------------------------------------------------- /testdata/robotb/page3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotb/page3.html -------------------------------------------------------------------------------- /testdata/robotb/page4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotb/page4.html -------------------------------------------------------------------------------- /testdata/robotb/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: Googlebot 2 | Disallow: /page2.html 3 | -------------------------------------------------------------------------------- /testdata/robotc/page1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotc/page1.html -------------------------------------------------------------------------------- /testdata/robotc/page2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotc/page2.html -------------------------------------------------------------------------------- /testdata/robotc/page3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotc/page3.html -------------------------------------------------------------------------------- /testdata/robotc/page4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotc/page4.html -------------------------------------------------------------------------------- /testdata/robotc/robots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/testdata/robotc/robots.txt -------------------------------------------------------------------------------- /urlcontext.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/urlcontext.go -------------------------------------------------------------------------------- /urlcontext_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/urlcontext_test.go -------------------------------------------------------------------------------- /worker.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/worker.go -------------------------------------------------------------------------------- /worker_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IntelligenceX/gocrawl/HEAD/worker_test.go --------------------------------------------------------------------------------