├── go.mod ├── go.sum ├── main.go ├── tests └── utils_test.go └── utils └── utils.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mottet-dev/medium-go-colly-basics 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.5.0 // indirect 7 | github.com/antchfx/htmlquery v1.0.0 // indirect 8 | github.com/antchfx/xmlquery v1.0.0 // indirect 9 | github.com/antchfx/xpath v1.0.0 // indirect 10 | github.com/gobwas/glob v0.2.3 // indirect 11 | github.com/gocolly/colly v1.2.0 12 | github.com/kennygrant/sanitize v1.2.4 // indirect 13 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect 14 | github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea // indirect 15 | golang.org/x/net v0.0.0-20190607181551-461777fb6f67 // indirect 16 | google.golang.org/appengine v1.6.1 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= 2 | github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= 3 | github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= 4 | github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 5 | github.com/antchfx/htmlquery v1.0.0 h1:O5IXz8fZF3B3MW+B33MZWbTHBlYmcfw0BAxgErHuaMA= 6 | github.com/antchfx/htmlquery v1.0.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8= 7 | github.com/antchfx/xmlquery v1.0.0 h1:YuEPqexGG2opZKNc9JU3Zw6zFXwC47wNcy6/F8oKsrM= 8 | github.com/antchfx/xmlquery v1.0.0/go.mod h1:/+CnyD/DzHRnv2eRxrVbieRU/FIF6N0C+7oTtyUtCKk= 9 | github.com/antchfx/xpath v1.0.0 h1:Q5gFgh2O40VTSwMOVbFE7nFNRBu3tS21Tn0KAWeEjtk= 10 | github.com/antchfx/xpath v1.0.0/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 11 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 12 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 13 | github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= 14 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= 15 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= 16 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 17 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= 18 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= 19 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= 20 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 21 | github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea h1:hH8P1IiDpzRU6ZDbDh/RDnVuezi2oOXJpApa06M0zyI= 22 | github.com/temoto/robotstxt v0.0.0-20180810133444-97ee4a9ee6ea/go.mod h1:aOux3gHPCftJ3KHq6Pz/AlDjYJ7Y+yKfm1gU/3B0u04= 23 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 24 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 25 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 26 | golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 27 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 28 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 29 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 30 | golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00= 31 | golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 32 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 33 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 34 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 35 | golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 36 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 37 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 38 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 39 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 40 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 41 | google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I= 42 | google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= 43 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/gocolly/colly" 7 | "github.com/mottet-dev/medium-go-colly-basics/utils" 8 | ) 9 | 10 | func main() { 11 | c := colly.NewCollector() 12 | 13 | c.OnRequest(func(r *colly.Request) { 14 | fmt.Println("Visiting", r.URL) 15 | }) 16 | 17 | c.OnHTML("div.s-result-list.s-search-results.sg-row", func(e *colly.HTMLElement) { 18 | e.ForEach("div.a-section.a-spacing-medium", func(_ int, e *colly.HTMLElement) { 19 | var productName, stars, price string 20 | 21 | productName = e.ChildText("span.a-size-medium.a-color-base.a-text-normal") 22 | 23 | if productName == "" { 24 | // If we can't get any name, we return and go directly to the next element 25 | return 26 | } 27 | 28 | stars = e.ChildText("span.a-icon-alt") 29 | utils.FormatStars(&stars) 30 | 31 | price = e.ChildText("span.a-price > span.a-offscreen") 32 | utils.FormatPrice(&price) 33 | 34 | fmt.Printf("Product Name: %s \nStars: %s \nPrice: %s \n", productName, stars, price) 35 | }) 36 | }) 37 | 38 | c.Visit("https://www.amazon.com/s?k=nintendo+switch&ref=nb_sb_noss_1") 39 | } 40 | -------------------------------------------------------------------------------- /tests/utils_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/mottet-dev/medium-go-colly-basics/utils" 7 | ) 8 | 9 | func TestFormatPriceWithOnePrice(t *testing.T) { 10 | input := "$299.00" 11 | 12 | expectedOutput := "299.00" 13 | 14 | utils.FormatPrice(&input) 15 | 16 | if input != expectedOutput { 17 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 18 | } 19 | } 20 | 21 | func TestFormatPriceWithTwoPrices(t *testing.T) { 22 | input := "$299.00$480.00" 23 | 24 | expectedOutput := "299.00" 25 | 26 | utils.FormatPrice(&input) 27 | 28 | if input != expectedOutput { 29 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 30 | } 31 | } 32 | 33 | func TestFormatPriceWithThreePrices(t *testing.T) { 34 | input := "$299.00$480.00$489.25" 35 | 36 | expectedOutput := "299.00" 37 | 38 | utils.FormatPrice(&input) 39 | 40 | if input != expectedOutput { 41 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 42 | } 43 | } 44 | 45 | func TestFormatPriceWithFourDigitLenght(t *testing.T) { 46 | input := "$1234.58" 47 | 48 | expectedOutput := "1234.58" 49 | 50 | utils.FormatPrice(&input) 51 | 52 | if input != expectedOutput { 53 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 54 | } 55 | } 56 | 57 | func TestFormatPriceWithFourDigitLenghtTwoPrices(t *testing.T) { 58 | input := "$1234.58$4895.49" 59 | 60 | expectedOutput := "1234.58" 61 | 62 | utils.FormatPrice(&input) 63 | 64 | if input != expectedOutput { 65 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 66 | } 67 | } 68 | 69 | func TestFormatPriceWithoutCent(t *testing.T) { 70 | input := "$1234" 71 | 72 | expectedOutput := "1234" 73 | 74 | utils.FormatPrice(&input) 75 | 76 | if input != expectedOutput { 77 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 78 | } 79 | } 80 | 81 | func TestFormatStars(t *testing.T) { 82 | input := "4.7 out of 5 stars" 83 | 84 | expectedOutput := "4.7" 85 | 86 | utils.FormatStars(&input) 87 | 88 | if input != expectedOutput { 89 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 90 | } 91 | } 92 | 93 | func TestFormatStarsTwo(t *testing.T) { 94 | input := "3.2 out of 5 stars" 95 | 96 | expectedOutput := "3.2" 97 | 98 | utils.FormatStars(&input) 99 | 100 | if input != expectedOutput { 101 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 102 | } 103 | } 104 | 105 | func TestFormatStarsThree(t *testing.T) { 106 | input := "1.0 out of 5 stars" 107 | 108 | expectedOutput := "1.0" 109 | 110 | utils.FormatStars(&input) 111 | 112 | if input != expectedOutput { 113 | t.Errorf("The input: %s is not equal to the expected output: %s", input, expectedOutput) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "regexp" 4 | 5 | func FormatPrice(price *string) { 6 | r := regexp.MustCompile(`\$(\d+(\.\d+)?).*$`) 7 | 8 | newPrices := r.FindStringSubmatch(*price) 9 | 10 | if len(newPrices) > 1 { 11 | *price = newPrices[1] 12 | } else { 13 | *price = "Unknown" 14 | } 15 | 16 | } 17 | 18 | func FormatStars(stars *string) { 19 | if len(*stars) >= 3 { 20 | *stars = (*stars)[0:3] 21 | } else { 22 | *stars = "Unknown" 23 | } 24 | } 25 | --------------------------------------------------------------------------------