├── .gitignore ├── LICENSE ├── README.md ├── food ├── Scrape.go └── scrape_test.go └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | still-good-food 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Elliot Iddon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Still Good Food 2 | =============== 3 | 4 | What? 5 | ----- 6 | This is a scraper for that site, written in go. The program will produce a JSON blob representing what I consider the important (pull requests welcome if you disagree) fields of a recipe to be from a URL that's a recipe. It exposes functions and types that you could use directly as a library if you don't want to convert back from a JSON blob/do your own IO. 7 | 8 | Why? 9 | ---- 10 | Earlier this year an article implied that the [bbc good food website](http://www.bbcgoodfood.com/) would be taken down. 11 | This turned out to be [false](https://twitter.com/bbcgoodfood/status/732504825930223616) but by the time I realised that I'd already written this, so here you are. 12 | 13 | License 14 | ------- 15 | This entire repository is available under the terms in LICENSE, which is the MIT license. 16 | 17 | -------------------------------------------------------------------------------- /food/Scrape.go: -------------------------------------------------------------------------------- 1 | package food 2 | 3 | import ( 4 | "fmt" 5 | "github.com/PuerkitoBio/goquery" 6 | "strings" 7 | ) 8 | 9 | type NutritionInfo struct { 10 | Kcal string 11 | Fat string 12 | Saturates string 13 | Carbs string 14 | Sugars string 15 | Fibre string 16 | Protein string 17 | Salt string 18 | } 19 | 20 | type Recipe struct { 21 | Name string 22 | Ingredients []string 23 | Steps []string 24 | Yield string 25 | Difficulty string 26 | Preparation string 27 | Cook string 28 | Nutrition NutritionInfo 29 | } 30 | 31 | func Scrape(recipe string) (Recipe, error) { 32 | doc, err := goquery.NewDocument(recipe) 33 | if err != nil { 34 | return Recipe{}, err 35 | } 36 | 37 | name := doc.Find(".recipe-header__title").First().Text() 38 | 39 | var ingredients []string 40 | doc.Find(".ingredients-list__item").Each(func(i int, s *goquery.Selection) { 41 | s.Find("span").Remove() 42 | ingredients = append(ingredients, strings.TrimSpace(s.Text())) 43 | }) 44 | 45 | var steps []string 46 | doc.Find(".method__item").Each(func(i int, s *goquery.Selection) { 47 | steps = append(steps, strings.TrimSpace(s.Text())) 48 | }) 49 | 50 | yield := extract("recipeYield", doc) 51 | 52 | difficulty := strings.TrimSpace(doc.Find("section.recipe-details__item--skill-level").Text()) 53 | 54 | preparationSpan := doc.Find(".recipe-details__cooking-time-prep") 55 | preparationSpan.Find("strong").Remove() 56 | 57 | preparation := strings.TrimSpace(preparationSpan.Text()) 58 | 59 | cookSpan := doc.Find(".recipe-details__cooking-time-cook") 60 | cookSpan.Find("strong").Remove() 61 | 62 | cook := strings.TrimSpace(cookSpan.Text()) 63 | 64 | nutrition := NutritionInfo{ 65 | Kcal: extract("calories", doc), 66 | Fat: extract("fatContent", doc), 67 | Saturates: extract("saturatedFatContent", doc), 68 | Sugars: extract("sugarContent", doc), 69 | Fibre: extract("fiberContent", doc), 70 | Protein: extract("proteinContent", doc), 71 | Salt: extract("sodiumContent", doc), 72 | } 73 | 74 | r := Recipe{ 75 | Name: name, 76 | Ingredients: ingredients, 77 | Steps: steps, 78 | Yield: yield, 79 | Difficulty: difficulty, 80 | Preparation: preparation, 81 | Cook: cook, 82 | Nutrition: nutrition, 83 | } 84 | 85 | return r, nil 86 | } 87 | func extract(itemprop string, doc *goquery.Document) string { 88 | return strings.TrimSpace(doc.Find(fmt.Sprintf("span[itemprop='%s']", itemprop)).Text()) 89 | } 90 | -------------------------------------------------------------------------------- /food/scrape_test.go: -------------------------------------------------------------------------------- 1 | package food 2 | 3 | import "testing" 4 | 5 | func TestScrape(t *testing.T) { 6 | r := "Gooseberry \u0026 custard pies" 7 | cases := []struct { 8 | in string 9 | want string 10 | }{ 11 | {"http://www.bbcgoodfood.com/recipes/gooseberry-custard-pies", r}, 12 | } 13 | for _, c := range cases { 14 | got, _ := Scrape(c.in) 15 | if got.Name != c.want { 16 | t.Errorf("Scrape%q) == %q, want %q", c.in, got, c.want) 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/sleepypikachu/still-good-food/food" 7 | "os" 8 | ) 9 | 10 | func main() { 11 | if len(os.Args) != 2 { 12 | fmt.Fprintf(os.Stderr, "usage: %s \n", os.Args[0]) 13 | return 14 | } 15 | 16 | r, err := food.Scrape(os.Args[1]) 17 | if err != nil { 18 | panic(err) 19 | } 20 | 21 | b, err := json.Marshal(r) 22 | if err != nil { 23 | panic(err) 24 | } 25 | 26 | fmt.Printf("%s\n", b) 27 | } 28 | --------------------------------------------------------------------------------