├── tests ├── library │ ├── __init__.py │ ├── test_exceptions.py │ └── test_utils.py ├── test_data │ ├── .gitkeep │ ├── test_settings_module │ │ ├── __init__.py │ │ └── test_settings.py │ └── schemaorg.testhtml ├── conftest.py ├── __init__.py ├── test__settings_module.py ├── test_schemaorg.py ├── test_mykitchen101.py ├── test_cookeatshare.py ├── test_thewoksoflife.py └── test_acouplecooks.py ├── recipe_scrapers ├── __version__.py ├── settings │ ├── template.py │ ├── v12_settings.py │ └── default.py ├── bonappetit.py ├── cookeatshare.py ├── amazingribs.py ├── globo.py ├── kochbar.py ├── abril.py ├── cdkitchen.py ├── foodandwine.py ├── acouplecooks.py ├── budgetbytes.py ├── foodnetwork.py ├── eatwhattonight.py ├── archanaskitchen.py ├── justonecookbook.py ├── giallozafferano.py ├── thenutritiouskitchen.py ├── practicalselfreliance.py ├── plugins │ ├── __init__.py │ ├── _interface.py │ ├── bcp47_validate.py │ ├── normalize_string.py │ ├── template.py │ ├── opengraph_image_fetch.py │ ├── schemaorg_priority.py │ └── schemaorg_fill.py ├── g750g.py ├── misya.py ├── tasty.py ├── chefkoch.py ├── cookpad.py ├── gousto.py ├── blueapron.py ├── cuisineaz.py ├── kuchniadomowa.py ├── marmiton.py ├── myrecipes.py ├── zenbelly.py ├── averiecooks.py ├── cybercook.py ├── hassenchef.py ├── justataste.py ├── livelytable.py ├── nytimes.py ├── ohsheglows.py ├── purelypope.py ├── skinnytaste.py ├── bakingsense.py ├── castironketo.py ├── hostthetoast.py ├── bakingmischeif.py ├── domesticateme.py ├── gimmesomeoven.py ├── littlespicejar.py ├── lovingitvegan.py ├── recipietineats.py ├── simplywhisked.py ├── sweetcsdesigns.py ├── thewoksoflife.py ├── vanillaandbean.py ├── watchwhatueat.py ├── ambitiouskitchen.py ├── bowlofdelicious.py ├── fifteenspatulas.py ├── halfbakedharvest.py ├── lecremedelacrumb.py ├── melskitchencafe.py ├── minimalistbaker.py ├── primaledgehealth.py ├── spendwithpennies.py ├── theclevercarrot.py ├── thekitchenmagpie.py ├── atelierdeschefs.py ├── eatsmarter.py ├── wholefoods.py ├── nourishedbynutrition.py ├── sallysbakingaddiction.py ├── yemek.py ├── alltomat.py ├── purplecarrot.py ├── bbcgoodfood.py ├── redhousespice.py ├── downshiftology.py ├── eatingbirdfood.py ├── realfoodtesco.py ├── jimcooksfoodgood.py ├── rainbowplantlife.py ├── headbangerskitchen.py ├── paleorunningmomma.py ├── vegrecipesofindia.py ├── indianhealthyrecipes.py ├── dr.py ├── hellofresh.py ├── innit.py ├── bettycrocker.py ├── steamykitchen.py ├── seriouseats.py ├── inspiralized.py ├── food.py ├── tudogostoso.py ├── franzoesischkochen.py ├── marthastewart.py ├── closetcooking.py ├── copykat.py ├── whatsgabycooking.py ├── rachlmansfield.py ├── heinzbrasil.py ├── przepisy.py ├── hundredandonecookbooks.py ├── springlane.py ├── thekitchn.py ├── paninihappy.py ├── thepioneerwoman.py ├── foodrepublic.py ├── tasteofhome.py ├── tastesoflizzyt.py ├── cookinglight.py ├── cucchiaio.py ├── kennymcgovern.py ├── timesofindia.py ├── eatingwell.py ├── jamieoliver.py ├── countryliving.py ├── reishunger.py ├── food52.py ├── onehundredonecookbooks.py ├── realsimple.py ├── wikicookbook.py ├── tastykitchen.py ├── cookieandkate.py ├── simplyquinoa.py ├── mybakingaddiction.py ├── saveur.py ├── forksoverknives.py ├── thevintagemixer.py ├── sallysblog.py ├── _factory.py ├── comidinhasdochef.py ├── mykitchen101.py ├── kingarthur.py ├── _exceptions.py ├── motherthyme.py ├── twopeasandtheirpod.py ├── simplyrecipes.py ├── momswithcrockpots.py ├── kwestiasmaku.py ├── streetkitchen.py ├── fitmencook.py ├── geniuskitchen.py ├── mykitchen101en.py ├── ig.py ├── bigoven.py ├── woop.py ├── yummly.py ├── southernliving.py ├── cookstr.py ├── joyfoodsunshine.py ├── nutritionbynathalie.py ├── heb.py ├── allrecipes.py ├── zeitwochenmarkt.py ├── finedininglovers.py ├── rezeptwelt.py ├── panelinha.py ├── bbcfood.py ├── justbento.py └── afghankitchenrecipes.py ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── new_scraper.md │ └── scraper_bug_report.md └── workflows │ ├── linters.yaml │ ├── publish.yaml │ └── unittests.yaml ├── MANIFEST.in ├── requirements-dev.txt ├── .flake8 ├── run_tests.py ├── .coveragerc ├── .pre-commit-config.yaml ├── templates ├── scraper.py └── test_scraper.py ├── LICENSE └── setup.py /tests/library/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_data/test_settings_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /recipe_scrapers/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "13.12.1" 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | 4 | exclude requirements-dev.txt 5 | recursive-exclude tests * 6 | -------------------------------------------------------------------------------- /tests/test_data/test_settings_module/test_settings.py: -------------------------------------------------------------------------------- 1 | SUPPRESS_EXCEPTIONS = True 2 | TEST_MODE = True 3 | META_HTTP_EQUIV = True 4 | # LOG_LEVEL = 20 5 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | black>=21.4b2 3 | coverage>=4.5.1 4 | flake8>=3.8.3 5 | flake8-printf-formatting>=1.1.0 6 | pre-commit>=2.6.0 7 | pytest>=6.1.1 8 | unittest-parallel>=1.5.0 9 | # language-tags>=1.0.0 10 | # tld>=0.12.3 11 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # FS002 comes from pugin "flake8-use-fstring" 3 | # and would error on `str.format()` usage 4 | ignore = E203, E266, E501, W503, FS002 5 | max-line-length = 88 6 | max-complexity = 18 7 | select = B,C,E,F,W,T4,B9 8 | exclude = tests/test_data/* 9 | -------------------------------------------------------------------------------- /run_tests.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | if __name__ == "__main__": 4 | run_tests_command = ( 5 | "unittest-parallel -t . -s tests --coverage --coverage-rcfile .coveragerc" 6 | ) 7 | subprocess.run(run_tests_command.split(" "), check=True, text=True) 8 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/template.py: -------------------------------------------------------------------------------- 1 | SUPPRESS_EXCEPTIONS = True 2 | 3 | 4 | # the most powerful feature is adding custom plugins 5 | # for example to add "inner-most" plugin: 6 | # PLUGINS += ( 7 | # "path.to.my.custom_plugin" 8 | # ) 9 | # 10 | # and to add "outer-most" plugin: 11 | # PLUGINS = ( 12 | # "path.to.my.custom_plugin", 13 | # ) + PLUGINS 14 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = recipe_scrapers 4 | relative_files = True 5 | 6 | omit = recipe_scrapers/_abstract.py 7 | recipe_scrapers/__init__.py 8 | recipe_scrapers/__version__.py 9 | 10 | [report] 11 | exclude_lines = 12 | pragma: no cover 13 | 14 | # Don't complain if tests don't hit defensive assertion code: 15 | raise AttributeError 16 | raise NotImplementedError 17 | 18 | ignore_errors = True 19 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/v12_settings.py: -------------------------------------------------------------------------------- 1 | # Settings that will make recipe-scrapers>=13.0.0 act almost identical as recipe-scrapers<13.0.0 2 | SUPPRESS_EXCEPTIONS = True 3 | META_HTTP_EQUIV = True 4 | ON_EXCEPTION_RETURN_VALUES = { 5 | "title": "", 6 | "total_time": 0, 7 | "yields": "", 8 | "image": "", 9 | "ingredients": [], 10 | "instructions": "", 11 | "ratings": -1, 12 | "reviews": None, 13 | "links": [], 14 | "language": "en", 15 | "nutrients": {}, 16 | } 17 | -------------------------------------------------------------------------------- /recipe_scrapers/bonappetit.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BonAppetit(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bonappetit.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return None 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /recipe_scrapers/cookeatshare.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CookEatShare(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cookeatshare.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return None 14 | 15 | def image(self): 16 | return self.schema.image() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /recipe_scrapers/amazingribs.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AmazingRibs(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "amazingribs.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://gitlab.com/pycqa/flake8.git 9 | rev: 3.8.3 10 | hooks: 11 | - id: flake8 12 | additional_dependencies: [flake8-use-fstring] 13 | - repo: https://github.com/pycqa/isort 14 | rev: 5.7.0 15 | hooks: 16 | - id: isort 17 | args: ["--profile", "black", "--filter-files"] 18 | - repo: https://github.com/psf/black 19 | rev: 19.3b0 20 | hooks: 21 | - id: black 22 | -------------------------------------------------------------------------------- /recipe_scrapers/globo.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Globo(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "receitas.globo.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/kochbar.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Kochbar(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "kochbar.de" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/abril.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Abril(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "claudia.abril.com.br" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/cdkitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CdKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cdkitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/foodandwine.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FoodAndWine(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "foodandwine.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/acouplecooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ACoupleCooks(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "acouplecooks.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/budgetbytes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BudgetBytes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "budgetbytes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/foodnetwork.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FoodNetwork(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "foodnetwork.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def author(self): 13 | return self.schema.author() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/eatwhattonight.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class EatWhatTonight(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "eatwhattonight.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/archanaskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ArchanasKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "archanaskitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/justonecookbook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JustOneCookbook(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "justonecookbook.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /tests/library/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from recipe_scrapers import ( 4 | NoSchemaFoundInWildMode, 5 | WebsiteNotImplementedError, 6 | scrape_me, 7 | ) 8 | 9 | 10 | class TestExceptions(unittest.TestCase): 11 | def test_WebsiteNotImplementedError(self): 12 | with self.assertRaises(WebsiteNotImplementedError): 13 | scrape_me("https://example.com/recipe") 14 | 15 | def test_NoSchemaFoundInWildMode(self): 16 | exception = NoSchemaFoundInWildMode("example.com") 17 | 18 | self.assertEqual(exception.url, "example.com") 19 | self.assertEqual(exception.message, "No Recipe Schema found at example.com.") 20 | -------------------------------------------------------------------------------- /recipe_scrapers/giallozafferano.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class GialloZafferano(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ricette.giallozafferano.it" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/thenutritiouskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheNutritiousKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thenutritiouskitchen.co" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /.github/workflows/linters.yaml: -------------------------------------------------------------------------------- 1 | name: linters 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | linters: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.x" 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements-dev.txt 24 | - name: black and flake checks 25 | run: | 26 | black --check . 27 | flake8 --count . 28 | -------------------------------------------------------------------------------- /recipe_scrapers/practicalselfreliance.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PracticalSelfReliance(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "practicalselfreliance.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # from .bcp47_validate import Bcp47ValidatePlugin 2 | from .exception_handling import ExceptionHandlingPlugin 3 | from .html_tags_stripper import HTMLTagStripperPlugin 4 | from .normalize_string import NormalizeStringPlugin 5 | from .opengraph_image_fetch import OpenGraphImageFetchPlugin 6 | from .schemaorg_fill import SchemaOrgFillPlugin 7 | from .schemaorg_priority import SchemaOrgPriorityPlugin 8 | 9 | __all__ = [ 10 | # "Bcp47ValidatePlugin", 11 | "ExceptionHandlingPlugin", 12 | "HTMLTagStripperPlugin", 13 | "NormalizeStringPlugin", 14 | "OpenGraphImageFetchPlugin", 15 | "SchemaOrgFillPlugin", 16 | "SchemaOrgPriorityPlugin", 17 | ] 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption( 6 | "--online", 7 | action="store_true", 8 | default=False, 9 | help="run unit tests against online web content", 10 | ) 11 | 12 | 13 | @pytest.fixture(scope="session", autouse=True) 14 | def configure_online(request): 15 | seen = {None} 16 | session = request.node 17 | online = request.config.getoption("--online") 18 | for item in session.items: 19 | cls = item.getparent(pytest.Class) 20 | if cls not in seen: 21 | if hasattr(cls.obj, "online"): 22 | cls.obj.online = online 23 | seen.add(cls) 24 | -------------------------------------------------------------------------------- /recipe_scrapers/g750g.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class G750g(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "750g.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/misya.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Misya(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "misya.info" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/tasty.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Tasty(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "tasty.co" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/chefkoch.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Chefkoch(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "chefkoch.de" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cookpad.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CookPad(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cookpad.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/gousto.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Gousto(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "gousto.co.uk" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/blueapron.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BlueApron(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "blueapron.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cuisineaz.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CuisineAZ(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cuisineaz.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/kuchniadomowa.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class KuchniaDomowa(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "kuchnia-domowa.pl" 8 | 9 | def title(self): 10 | return self.soup.find("h2").get_text().strip() 11 | 12 | def image(self): 13 | urls = self.soup.findAll("img", {"class": "article-img", "id": "article-img-1"}) 14 | return f"https:{urls[1]['src']}" 15 | 16 | def instructions(self): 17 | instructions = self.soup.find("div", {"id": "recipe-instructions"}).findAll( 18 | "li" 19 | ) 20 | instructions = [x.text for x in instructions] 21 | return "\n".join(instructions) 22 | -------------------------------------------------------------------------------- /recipe_scrapers/marmiton.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Marmiton(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "marmiton.org" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/myrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class MyRecipes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "myrecipes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/zenbelly.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ZenBelly(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "zenbelly.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/averiecooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AverieCooks(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "averiecooks.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cybercook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Cybercook(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cybercook.com.br" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/hassenchef.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Hassanchef(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "hassanchef.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/justataste.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JustATaste(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "justataste.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/livelytable.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LivelyTable(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "livelytable.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/nytimes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class NYTimes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cooking.nytimes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/ohsheglows.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class OhSheGlows(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ohsheglows.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/purelypope.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PurelyPope(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "purelypope.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/skinnytaste.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SkinnyTaste(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "skinnytaste.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bakingsense.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BakingSense(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "baking-sense.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/castironketo.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CastIronKeto(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "castironketo.net" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/hostthetoast.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Hostthetoast(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "hostthetoast.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bakingmischeif.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BakingMischeif(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bakingmischief.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/domesticateme.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class DomesticateMe(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "domesticate-me.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/gimmesomeoven.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class GimmeSomeOven(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "gimmesomeoven.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/littlespicejar.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LittleSpiceJar(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "littlespicejar.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/lovingitvegan.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Lovingitvegan(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "lovingitvegan.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/recipietineats.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RecipieTinEats(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "recipietineats.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/simplywhisked.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SimplyWhisked(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "simplywhisked.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/sweetcsdesigns.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SweetCsDesigns(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "sweetcsdesigns.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/thewoksoflife.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Thewoksoflife(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thewoksoflife.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/vanillaandbean.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class VanillaAndBean(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "vanillaandbean.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/watchwhatueat.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class WatchWhatUEat(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "watchwhatueat.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/ambitiouskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AmbitiousKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ambitiouskitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bowlofdelicious.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BowlOfDelicious(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bowlofdelicious.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/fifteenspatulas.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FifteenSpatulas(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "fifteenspatulas.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/halfbakedharvest.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HalfBakedHarvest(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "halfbakedharvest.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/lecremedelacrumb.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LeCremeDeLaCrumb(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "lecremedelacrumb.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/melskitchencafe.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class MelsKitchenCafe(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "melskitchencafe.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/minimalistbaker.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Minimalistbaker(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "minimalistbaker.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/primaledgehealth.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PrimalEdgeHealth(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "primaledgehealth.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/spendwithpennies.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SpendWithPennies(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "spendwithpennies.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/theclevercarrot.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheCleverCarrot(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "theclevercarrot.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/thekitchenmagpie.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheKitchenMagPie(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thekitchenmagpie.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/atelierdeschefs.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AtelierDesChefs(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "atelierdeschefs.fr" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | yields = self.soup.find("option", {"class": "yield"}) 17 | return f"{yields.get('value')} Servings" 18 | 19 | def ingredients(self): 20 | return self.schema.ingredients() 21 | 22 | def instructions(self): 23 | return self.schema.instructions() 24 | 25 | def ratings(self): 26 | return self.schema.ratings() 27 | -------------------------------------------------------------------------------- /recipe_scrapers/eatsmarter.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Eatsmarter(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"eatsmarter.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: "3.x" 16 | - name: Build the package 17 | run: | 18 | python -m pip install --user --upgrade setuptools wheel 19 | python setup.py sdist bdist_wheel 20 | - name: Publish a Python distribution to PyPI 21 | uses: pypa/gh-action-pypi-publish@master 22 | with: 23 | user: __token__ 24 | password: ${{ secrets.PYPI_API_TOKEN }} 25 | verbose: true 26 | -------------------------------------------------------------------------------- /recipe_scrapers/wholefoods.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class WholeFoods(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"wholefoodsmarket.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/nourishedbynutrition.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class NourishedByNutrition(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "nourishedbynutrition.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/sallysbakingaddiction.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SallysBakingAddiction(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "sallysbakingaddiction.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/yemek.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Yemek(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "yemek.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/alltomat.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AllTomat(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "alltommat.se" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/purplecarrot.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class PurpleCarrot(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "purplecarrot.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | return self.schema.ingredients() 24 | 25 | def instructions(self): 26 | return normalize_string(self.schema.instructions()) 27 | 28 | def nutrients(self): 29 | return self.schema.nutrients() 30 | -------------------------------------------------------------------------------- /recipe_scrapers/bbcgoodfood.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._abstract import AbstractScraper 4 | 5 | 6 | class BBCGoodFood(AbstractScraper): 7 | @classmethod 8 | def host(cls): 9 | return "bbcgoodfood.com" 10 | 11 | def title(self): 12 | return self.schema.title() 13 | 14 | def total_time(self): 15 | return self.schema.total_time() 16 | 17 | def yields(self): 18 | return self.schema.yields() 19 | 20 | def image(self): 21 | return self.schema.image() 22 | 23 | def ingredients(self): 24 | return self.schema.ingredients() 25 | 26 | def instructions(self): 27 | inst = str(self.schema.instructions()) 28 | instSoup = BeautifulSoup(inst, features="html.parser") 29 | return instSoup.text 30 | -------------------------------------------------------------------------------- /recipe_scrapers/redhousespice.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RedHouseSpice(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "redhousespice.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/downshiftology.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Downshiftology(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "downshiftology.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/eatingbirdfood.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class EatingBirdFood(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "eatingbirdfood.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/realfoodtesco.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RealFoodTesco(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "realfood.tesco.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/jimcooksfoodgood.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JimCooksFoodGood(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "jimcooksfoodgood.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/rainbowplantlife.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RainbowPlantLife(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "rainbowplantlife.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/headbangerskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HeadbangersKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "headbangerskitchen.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/paleorunningmomma.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PaleoRunningMomma(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "paleorunningmomma.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/vegrecipesofindia.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class VegRecipesOfIndia(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "vegrecipesofindia.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/indianhealthyrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class IndianHealthyRecipes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "indianhealthyrecipes.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /tests/library/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from recipe_scrapers._utils import get_minutes 4 | 5 | 6 | class TestUtils(unittest.TestCase): 7 | def test_get_minutes_english_description(self): 8 | text = "1 hour 15 mins" 9 | result = get_minutes(text) 10 | 11 | assert result == 75 12 | 13 | def test_get_minutes_english_abbreviation(self): 14 | text = "3h10m" 15 | self.assertEqual(190, get_minutes(text)) 16 | 17 | def test_get_minutes_short_iso_format(self): 18 | text = "PT2H30M" 19 | self.assertEqual(150, get_minutes(text)) 20 | 21 | def test_get_minutes_long_iso_format(self): 22 | text = "P0DT1H10M" 23 | self.assertEqual(70, get_minutes(text)) 24 | 25 | def test_get_minutes_int_in_string_literal(self): 26 | text = "90" 27 | self.assertEqual(90, get_minutes(text)) 28 | -------------------------------------------------------------------------------- /recipe_scrapers/dr.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Dr(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "dr.dk" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def language(self): 22 | meta_language = self.soup.find( 23 | "meta", 24 | attrs={"name": lambda x: x and x.lower() == "language", "content": True}, 25 | ) 26 | 27 | return meta_language.get("content") 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new_scraper.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New website scraper request 3 | about: Add support for a new recipe website 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | We always enjoy adding support for new websites to the library! 11 | 12 | To help us out, please check that recipes published on the website you're requesting are public (we can't currently scrape recipes that require an account login) and add sample recipe URL(s) below: 13 | 14 | - https:// ... 15 | 16 | Can you write Python and would you like to help add the scraper yourself? We'd be glad for your assistance! We can provide you with guidance and code review in return. If so, tick any of the relevant boxes below: 17 | 18 | - [ ] I'd like to try adding this scraper myself 19 | - [ ] I'd like guidance to help me develop a scraper 20 | - [ ] I'd prefer if the `recipe-scrapers` community try to add this 21 | -------------------------------------------------------------------------------- /recipe_scrapers/hellofresh.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HelloFresh(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"hellofresh.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def nutrients(self): 28 | return self.schema.nutrients() 29 | 30 | def cuisine(self): 31 | return self.schema.cuisine() 32 | 33 | def category(self): 34 | return self.schema.category() 35 | -------------------------------------------------------------------------------- /recipe_scrapers/innit.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | """ 4 | Note that innit hosts recipes for several companies. I found it while looking at centralmarket.com 5 | """ 6 | 7 | 8 | class Innit(AbstractScraper): 9 | @classmethod 10 | def host(self, domain="com"): 11 | return f"innit.{domain}" 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | return self.schema.instructions() 30 | 31 | def ratings(self): 32 | return self.schema.ratings() 33 | 34 | def nutrients(self): 35 | return self.schema.nutrients() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/bettycrocker.py: -------------------------------------------------------------------------------- 1 | # BettyCrocker.com scraper 2 | # Written by G.D. Wallters 3 | # Freely released the code to recipe_scraper group 4 | # 18 January, 2020 5 | # ======================================================= 6 | 7 | 8 | from ._abstract import AbstractScraper 9 | 10 | 11 | class BettyCrocker(AbstractScraper): 12 | @classmethod 13 | def host(cls): 14 | return "bettycrocker.com" 15 | 16 | def title(self): 17 | return self.schema.title() 18 | 19 | def total_time(self): 20 | return self.schema.total_time() 21 | 22 | def yields(self): 23 | return self.schema.yields() 24 | 25 | def image(self): 26 | return self.schema.image() 27 | 28 | def ingredients(self): 29 | return self.schema.ingredients() 30 | 31 | def instructions(self): 32 | return self.schema.instructions() 33 | 34 | def ratings(self): 35 | return self.schema.ratings() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/steamykitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SteamyKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "steamykitchen.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | # Recipe section and schema have no image so stealing from the page 23 | return self.soup.find("img")["src"] 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | return self.schema.instructions() 30 | 31 | def ratings(self): 32 | # Schema has no ratings and I can't see any near the recipe 33 | return None 34 | -------------------------------------------------------------------------------- /templates/scraper.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Template(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "example.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def category(self): 16 | return self.schema.category() 17 | 18 | def total_time(self): 19 | return self.schema.total_time() 20 | 21 | def yields(self): 22 | return self.schema.yields() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | 36 | def cuisine(self): 37 | return self.schema.cuisine() 38 | 39 | def description(self): 40 | return self.schema.description() 41 | -------------------------------------------------------------------------------- /recipe_scrapers/seriouseats.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields 3 | 4 | 5 | class SeriousEats(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "seriouseats.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | recipe_servings = self.soup.find("div", {"class": "recipe-serving"}) 21 | recipe_yield = self.soup.find("div", {"class": "recipe-yield"}) 22 | return get_yields( 23 | (recipe_servings or recipe_yield).find("span", {"class": "meta-text__data"}) 24 | ) 25 | 26 | def ingredients(self): 27 | return self.schema.ingredients() 28 | 29 | def instructions(self): 30 | return self.schema.instructions() 31 | 32 | def ratings(self): 33 | return self.schema.ratings() 34 | -------------------------------------------------------------------------------- /recipe_scrapers/inspiralized.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Inspiralized(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "inspiralized.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("span", {"itemprop": "totalTime"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("span", {"itemprop": "servingSize"})) 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("li", {"class": "ingredient"}) 21 | 22 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 23 | 24 | def instructions(self): 25 | instructions = self.soup.findAll("li", {"class": "instruction"}) 26 | 27 | return "\n".join( 28 | [normalize_string(instruction.get_text()) for instruction in instructions] 29 | ) 30 | -------------------------------------------------------------------------------- /recipe_scrapers/food.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Food(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "food.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "recipe-facts__time"})) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.find("div", {"class": "recipe-facts__servings"}).get_text() 19 | ) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.findAll("li", {"class": "recipe-ingredients__item"}) 23 | 24 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 25 | 26 | def instructions(self): 27 | instructions = self.soup.findAll("li", {"class": "recipe-directions__step"}) 28 | 29 | return "\n".join([instruction.get_text() for instruction in instructions]) 30 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/_interface.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Iterable 3 | 4 | 5 | class PluginInterface(ABC): 6 | """ 7 | Interface that all "Plugins" (including the ones written by programmers 8 | using the package) should implement. 9 | 10 | Every plugin should have the following 2 methods implemented: 11 | 12 | - should_run 13 | - run 14 | """ 15 | 16 | run_on_hosts: Iterable[str] = ("*",) 17 | run_on_methods: Iterable[str] = ("title",) 18 | 19 | @classmethod 20 | @abstractmethod 21 | def run(cls, decorated): 22 | pass 23 | 24 | @classmethod 25 | def should_run(cls, host, method): 26 | return cls._should_run_host_check(host) and cls._should_run_method_check(method) 27 | 28 | @classmethod 29 | def _should_run_host_check(cls, host): 30 | return "*" in cls.run_on_hosts or host in cls.run_on_hosts 31 | 32 | @classmethod 33 | def _should_run_method_check(cls, method): 34 | return method in cls.run_on_methods 35 | -------------------------------------------------------------------------------- /recipe_scrapers/tudogostoso.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class TudoGostoso(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tudogostoso.com.br" 9 | 10 | def title(self): 11 | return normalize_string(self.soup.find("h1").get_text()) 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("time", {"class": "dt-duration"})) 15 | 16 | def ingredients(self): 17 | ingredients_html = self.soup.findAll("span", {"class": "p-ingredient"}) 18 | 19 | return [ 20 | normalize_string(ingredient.get_text()) for ingredient in ingredients_html 21 | ] 22 | 23 | def instructions(self): 24 | instructions_html = self.soup.findAll( 25 | "div", {"class": "instructions e-instructions"} 26 | ) 27 | 28 | return "\n".join( 29 | normalize_string(instruction.get_text()) 30 | for instruction in instructions_html 31 | ) 32 | -------------------------------------------------------------------------------- /.github/workflows/unittests.yaml: -------------------------------------------------------------------------------- 1 | name: unittests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, macos-latest, windows-latest] 17 | python-version: ["3.7", "3.8", "3.9", "3.10"] 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements-dev.txt 28 | - name: Unittest and Coverage Report 29 | run: | 30 | python run_tests.py 31 | - name: coveralls.io 32 | uses: AndreMiras/coveralls-python-action@develop 33 | with: 34 | # coveralls repo token 35 | github-token: "SmlfzlVJy4ow55rduU7IU5GmmFCfAdGeq" 36 | -------------------------------------------------------------------------------- /recipe_scrapers/franzoesischkochen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FranzoesischKochen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "franzoesischkochen.de" 8 | 9 | def author(self): 10 | # TODO: check to see whether the pages begin using 'name' (lowercase initial) 11 | # if they do, then we can use self.schema.author() instead here 12 | return self.schema.data.get("author").get("Name") 13 | 14 | def title(self): 15 | return self.schema.title() 16 | 17 | def total_time(self): 18 | return self.schema.total_time() 19 | 20 | def yields(self): 21 | # TODO: can recipe yields / servings be retrieved from these pages? 22 | return None 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | -------------------------------------------------------------------------------- /recipe_scrapers/marthastewart.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes 3 | 4 | 5 | class MarthaStewart(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "marthastewart.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | s = ( 15 | self.soup.findAll("div", {"class": "two-subcol-content-wrapper"})[0] 16 | .find("div", {"class": "recipe-meta-item-body"}) 17 | .text.strip() 18 | ) 19 | return get_minutes(s) 20 | 21 | def yields(self): 22 | return ( 23 | self.soup.findAll("div", {"class": "two-subcol-content-wrapper"})[1] 24 | .find("div", {"class": "recipe-meta-item-body"}) 25 | .text.strip() 26 | ) 27 | 28 | def ingredients(self): 29 | return self.schema.ingredients() 30 | 31 | def instructions(self): 32 | return self.schema.instructions() 33 | 34 | def ratings(self): 35 | return self.schema.ratings() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/closetcooking.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class ClosetCooking(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "closetcooking.com" 9 | 10 | def title(self): 11 | return normalize_string( 12 | self.soup.find("h1", {"class": "entry-title"}).get_text() 13 | ) 14 | 15 | def total_time(self): 16 | return get_minutes(self.soup.find(itemprop="totalTime").parent) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find(itemprop="recipeYield").parent) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.findAll("li", {"itemprop": "recipeIngredient"}) 23 | 24 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 25 | 26 | def instructions(self): 27 | instructions = self.soup.findAll("li", {"itemprop": "recipeInstructions"}) 28 | 29 | return "\n".join( 30 | [normalize_string(instruction.get_text()) for instruction in instructions] 31 | ) 32 | -------------------------------------------------------------------------------- /recipe_scrapers/copykat.py: -------------------------------------------------------------------------------- 1 | # copykat.py 2 | # Written by G.D. Wallters 3 | # Freely released the code to recipe_scraper group 4 | # 8 February, 2020 5 | # ======================================================= 6 | 7 | 8 | from ._abstract import AbstractScraper 9 | from ._utils import normalize_string 10 | 11 | 12 | class CopyKat(AbstractScraper): 13 | @classmethod 14 | def host(cls): 15 | return "copykat.com" 16 | 17 | def title(self): 18 | return self.schema.title() 19 | 20 | def total_time(self): 21 | return self.schema.total_time() 22 | 23 | def yields(self): 24 | return self.schema.yields() 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | 35 | def ratings(self): 36 | return self.schema.ratings() 37 | 38 | def description(self): 39 | d = normalize_string(self.soup.find("span", {"style": "display: block;"}).text) 40 | 41 | return d if d else None 42 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/bcp47_validate.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from language_tags import tags 4 | 5 | from ._interface import PluginInterface 6 | 7 | 8 | class Bcp47ValidatePlugin(PluginInterface): 9 | """ 10 | If you wish to use this plugin make sure you 11 | pip install language-tags>=1.0.0 12 | 13 | Validates if the value returned by .language() is a truthfully a language abbreviation 14 | For more info read https://github.com/OnroerendErfgoed/language-tags and the corresponding links there 15 | 16 | - https://tools.ietf.org/html/bcp47 17 | - https://tools.ietf.org/html/rfc5646 18 | - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 19 | """ 20 | 21 | run_on_hosts = ("*",) 22 | run_on_methods = ("language",) 23 | 24 | @classmethod 25 | def run(cls, decorated): 26 | @functools.wraps(decorated) 27 | def decorated_method_wrapper(self, *args, **kwargs): 28 | tag = tags.tag(decorated(self, *args, **kwargs)) 29 | return str(tag) if tag.valid else None 30 | 31 | return decorated_method_wrapper 32 | -------------------------------------------------------------------------------- /recipe_scrapers/whatsgabycooking.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class WhatsGabyCooking(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "whatsgabycooking.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("p", {"class": "header-recipe-time"})) 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 21 | 22 | return [ 23 | normalize_string(ingredient.get_text()) 24 | for ingredient in ingredients 25 | if len(ingredient) > 0 26 | ] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "wprm-recipe-instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/rachlmansfield.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes 3 | 4 | 5 | class RachlMansfield(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "rachlmansfield.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | # Total time is not reported correctly by the schema data. Using the sum of the prep and cook times 18 | # as a workaround instead. 19 | prep_time = get_minutes(self.schema.data.get("prepTime")) or 0 20 | cook_time = get_minutes(self.schema.data.get("cookTime")) or 0 21 | return prep_time + cook_time 22 | 23 | def yields(self): 24 | return self.schema.yields() 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | 35 | def ratings(self): 36 | return self.schema.ratings() 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Hristo Harsev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /recipe_scrapers/heinzbrasil.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class HeinzBrasil(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "heinzbrasil.com.br" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "krRDPrecName"}).get_text() 12 | 13 | def total_time(self): 14 | return 0 15 | 16 | def image(self): 17 | return self.soup.find("img", {"class": "krBanImg"})["src"] 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("div", {"class": "krRDPIngreListText"}) 21 | 22 | return [ 23 | normalize_string( 24 | "{} {}".format(ingredient["qty"], ingredient["ingredientname"]) 25 | ) 26 | for ingredient in ingredients 27 | ] 28 | 29 | def instructions(self): 30 | instructions = ( 31 | self.soup.find("div", {"class": "krRecipeMakeItText"}) 32 | .findNext("div", {"class": "class"}) 33 | .nextSibling 34 | ) 35 | return normalize_string(instructions.get_text()) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/przepisy.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Przepisy(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "przepisy.pl" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "time-count"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("div", {"class": "person-count"})) 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("span", {"class": "text-bg-white"}) 21 | 22 | return [ 23 | normalize_string(i.get_text()) + " " + normalize_string(j.get_text()) 24 | for i, j in zip(ingredients[0::2], ingredients[1::2]) 25 | ] 26 | 27 | def instructions(self): 28 | instructions = self.soup.findAll("p", {"class": "step-info-description"}) 29 | 30 | return "\n".join( 31 | [normalize_string(instruction.get_text()) for instruction in instructions] 32 | ) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import pytest 5 | 6 | 7 | class ScraperTest(unittest.TestCase): 8 | 9 | maxDiff = None 10 | online = False 11 | test_file_name = None 12 | 13 | def setUp(self): 14 | os.environ[ 15 | "RECIPE_SCRAPERS_SETTINGS" 16 | ] = "tests.test_data.test_settings_module.test_settings" 17 | 18 | test_file_name = ( 19 | self.test_file_name 20 | if self.test_file_name 21 | else self.scraper_class.__name__.lower() 22 | ) 23 | with open( 24 | "tests/test_data/{}.testhtml".format(test_file_name), encoding="utf-8" 25 | ) as testfile: 26 | self.harvester_class = self.scraper_class(testfile) 27 | canonical_url = self.harvester_class.canonical_url() 28 | if self.online: 29 | if not canonical_url: 30 | pytest.skip( 31 | f"could not find canonical url for online test of scraper '{self.scraper_class.__name__}'" 32 | ) 33 | self.harvester_class = self.scraper_class(url=canonical_url) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/hundredandonecookbooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class HundredAndOneCookbooks(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "101cookbooks.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.findAll("div", {"class": "wprm-recipe-time"})[-1].get_text() 16 | ) 17 | 18 | def yields(self): 19 | return get_yields( 20 | self.soup.findAll("div", {"class": "wprm-recipe-time"})[0].get_text() 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "wprm-recipe-instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/normalize_string.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | from .._utils import normalize_string 7 | from ._interface import PluginInterface 8 | 9 | logging.basicConfig() 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class NormalizeStringPlugin(PluginInterface): 14 | """ 15 | Explicitly run the output from the methods listed through normalize_string 16 | """ 17 | 18 | decorate_hosts = ("*",) 19 | run_on_methods = ("title",) 20 | 21 | @classmethod 22 | def run(cls, decorated): 23 | @functools.wraps(decorated) 24 | def decorated_method_wrapper(self, *args, **kwargs): 25 | # TODO: write logging. Configure logging. 26 | logger.setLevel(settings.LOG_LEVEL) 27 | class_name = self.__class__.__name__ 28 | method_name = decorated.__name__ 29 | logger.debug( 30 | f"Decorating: {class_name}.{method_name}() with NormalizeStringPlugin" 31 | ) 32 | 33 | return normalize_string(decorated(self, *args, **kwargs)) 34 | 35 | return decorated_method_wrapper 36 | -------------------------------------------------------------------------------- /recipe_scrapers/springlane.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Springlane(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "springlane.de" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def category(self): 16 | return self.schema.category() 17 | 18 | def cuisine(self): 19 | return self.schema.cuisine() 20 | 21 | def total_time(self): 22 | return self.schema.total_time() 23 | 24 | def cook_time(self): 25 | return self.schema.cook_time() 26 | 27 | def prep_time(self): 28 | return self.schema.prep_time() 29 | 30 | def yields(self): 31 | return self.schema.yields() 32 | 33 | def image(self): 34 | return self.schema.image() 35 | 36 | def nutrients(self): 37 | return self.schema.nutrients() 38 | 39 | def ingredients(self): 40 | return self.schema.ingredients() 41 | 42 | def instructions(self): 43 | return self.schema.instructions() 44 | 45 | def ratings(self): 46 | return self.schema.ratings() 47 | -------------------------------------------------------------------------------- /recipe_scrapers/thekitchn.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TheKitchn(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thekitchn.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "Recipe__title"}).get_text() 12 | 13 | def total_time(self): 14 | elements = self.soup.findAll("p", {"class": "Recipe__timeEntry"}) 15 | return sum([get_minutes(element) for element in elements]) 16 | 17 | def yields(self): 18 | return get_yields( 19 | self.soup.find("p", {"class": "jsx-1778438071 Recipe__yield"}) 20 | ) 21 | 22 | def ingredients(self): 23 | ingredients = self.soup.findAll("li", {"class": "Recipe__ingredient"}) 24 | 25 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 26 | 27 | def instructions(self): 28 | instructions = self.soup.findAll("li", {"class": "Recipe__instructionStep"}) 29 | 30 | return "\n".join( 31 | [normalize_string(instruction.get_text()) for instruction in instructions] 32 | ) 33 | -------------------------------------------------------------------------------- /recipe_scrapers/paninihappy.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class PaniniHappy(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "paninihappy.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("span", {"class": "duration"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("span", {"class": "yield"})) 18 | 19 | def image(self): 20 | image = self.soup.find("img", {"class": "post_image", "src": True}) 21 | return image["src"] if image else None 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/thepioneerwoman.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class ThePioneerWoman(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thepioneerwoman.com" 9 | 10 | def title(self): 11 | return self.soup.find("h3", {"class": "recipe-title"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(dd) 17 | for dd in self.soup.find( 18 | "div", {"class": "recipe-summary-time"} 19 | ).findAll("dd") 20 | ] 21 | ) 22 | 23 | def yields(self): 24 | return get_yields(self.soup.find("span", {"itemprop": "recipeYield"})) 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.find("ul", {"class": "list-ingredients"}).findAll("li") 28 | 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.findAll("div", {"class": "panel-body"})[-1] 33 | 34 | return normalize_string(instructions.get_text()).replace(".", ".\n") 35 | -------------------------------------------------------------------------------- /recipe_scrapers/foodrepublic.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class FoodRepublic(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "foodrepublic.com" 9 | 10 | def title(self): 11 | return self.soup.find("h3", {"class": "recipe-title"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(self.soup.find("li", {"class": "prep-time"})), 17 | get_minutes(self.soup.find("li", {"class": "cook-time"})), 18 | ] 19 | ) 20 | 21 | def yields(self): 22 | return get_yields(self.soup.find("span", {"itemprop": "recipeYield"})) 23 | 24 | def ingredients(self): 25 | ingredients = self.soup.findAll("li", {"itemprop": "recipeIngredient"}) 26 | 27 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 28 | 29 | def instructions(self): 30 | instructions = self.soup.find("div", {"class": "directions"}).findAll("li") 31 | 32 | return "\n".join( 33 | [normalize_string(instruction.get_text()) for instruction in instructions] 34 | ) 35 | -------------------------------------------------------------------------------- /recipe_scrapers/tasteofhome.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class TasteOfHome(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tasteofhome.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | return self.schema.ingredients() 24 | 25 | def instructions(self): 26 | instructions = self.soup.findAll("li", {"class": "recipe-directions__item"}) 27 | if instructions: 28 | return "\n".join( 29 | [ 30 | normalize_string(instruction.get_text()) 31 | for instruction in instructions 32 | ] 33 | ) 34 | else: 35 | # In case our HTML parsing doesn't find any instructions, fall back to what the schema provides. 36 | return self.schema.instructions() 37 | 38 | def ratings(self): 39 | return self.schema.ratings() 40 | -------------------------------------------------------------------------------- /recipe_scrapers/tastesoflizzyt.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TastesOfLizzyT(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tastesoflizzyt.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "wprm-recipe-total-time-container"}) 16 | ) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find("span", {"class": "wprm-recipe-servings"})) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.find( 23 | "ul", {"class": "wprm-recipe-ingredients"} 24 | ).findAll("li") 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.find( 30 | "ul", {"class": "wprm-recipe-instructions"} 31 | ).findAll("li") 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/cookinglight.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class CookingLight(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cookinglight.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return get_yields(self.schema.yields()) 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | ingredients = self.soup.find("div", {"class": "ingredients"}).ul.findAll("li") 24 | return "\n".join( 25 | [normalize_string(ingredient.get_text()) for ingredient in ingredients] 26 | ) 27 | 28 | def instructions(self): 29 | instructions = self.soup.find("div", {"class": "recipe-instructions"}).findAll( 30 | "div", {"class": "step"} 31 | ) 32 | return "\n".join([normalize_string(instr.get_text()) for instr in instructions]) 33 | 34 | def ratings(self): 35 | try: 36 | return self.schema.ratings() 37 | except Exception: 38 | return None 39 | -------------------------------------------------------------------------------- /recipe_scrapers/cucchiaio.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields 3 | 4 | 5 | class Cucchiaio(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cucchiaio.it" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | block = self.soup.find("div", {"class": "scheda-ricetta-new"}) 18 | if block: 19 | return sum(map(get_minutes, block.findAll("tr"))) 20 | return 0 21 | 22 | def yields(self): 23 | header = self.soup.find("td", text="PORZIONI") 24 | if header: 25 | value = header.find_next("td") 26 | return get_yields(value) 27 | return None 28 | 29 | def image(self): 30 | data = self.soup.find("div", {"class": "auto"}).find("img", {"class": "image"}) 31 | if data: 32 | data = data.get("src") 33 | return data 34 | 35 | def ingredients(self): 36 | return self.schema.ingredients() 37 | 38 | def instructions(self): 39 | return self.schema.instructions() 40 | 41 | def ratings(self): 42 | return None 43 | -------------------------------------------------------------------------------- /recipe_scrapers/kennymcgovern.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class KennyMcGovern(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "kennymcgovern.com" 9 | 10 | def title(self): 11 | return self.soup.find("div", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/timesofindia.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class TimesOfIndia(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "recipes.timesofindia.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | ingredients = self.soup.find_all("label", attrs={"class": "clearfix"}) 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | 36 | def language(self): 37 | meta_language = self.soup.find("meta", attrs={"http-equiv": "content-language"}) 38 | 39 | return normalize_string(meta_language.get("content")) 40 | 41 | def cuisine(self): 42 | return self.schema.cuisine() 43 | -------------------------------------------------------------------------------- /tests/test__settings_module.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | 7 | class SettingsModuleTest(unittest.TestCase): 8 | def test_default_settings(self): 9 | 10 | os.environ["RECIPE_SCRAPERS_SETTINGS"] = "recipe_scrapers.settings.default" 11 | 12 | self.assertTrue( 13 | len(settings.PLUGINS) > 0, 14 | "There should be some plugins in the default project's settings", 15 | ) 16 | 17 | self.assertFalse( 18 | settings.SUPPRESS_EXCEPTIONS, 19 | "SUPPRESS_EXCEPTIONS should be set to False in the project's default settings", 20 | ) 21 | 22 | def test_settings_change_when_new_module_set(self): 23 | self.assertFalse( 24 | settings.SUPPRESS_EXCEPTIONS, 25 | "SUPPRESS_EXCEPTIONS should be set to False in the project's default settings", 26 | ) 27 | 28 | os.environ[ 29 | "RECIPE_SCRAPERS_SETTINGS" 30 | ] = "tests.test_data.test_settings_module.test_settings" 31 | 32 | self.assertTrue( 33 | settings.SUPPRESS_EXCEPTIONS, 34 | "SUPPRESS_EXCEPTIONS should be set to True after settings are changed with the testing ones", 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/eatingwell.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string, get_minutes, get_yields 3 | 4 | 5 | class EatingWell(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "eatingwell.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def image(self): 14 | return self.schema.image() 15 | 16 | def ingredients(self): 17 | return self.schema.ingredients() 18 | 19 | def instructions(self): 20 | return self.schema.instructions() 21 | 22 | def total_time(self): 23 | div = self.soup.findAll("div", {"class": "recipe-meta-item"}) 24 | d = { 25 | normalize_string(key): normalize_string(value) 26 | for key, value in [i.text.split(":") for i in div] 27 | if value is not None 28 | } 29 | return get_minutes(d.get("total")) 30 | 31 | def yields(self): 32 | div = self.soup.findAll("div", {"class": "recipe-meta-item"}) 33 | d = { 34 | normalize_string(key): normalize_string(value) 35 | for key, value in (i.text.split(":") for i in div) 36 | if value is not None 37 | } 38 | return get_yields(d.get("Servings")) 39 | -------------------------------------------------------------------------------- /recipe_scrapers/jamieoliver.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class JamieOliver(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "jamieoliver.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "time"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("div", {"class": "recipe-detail serves"})) 18 | 19 | def image(self): 20 | container = self.soup.find("div", {"class": "recipe-header-left"}) 21 | if not container: 22 | return None 23 | 24 | image = container.find("img", {"src": True}) 25 | return image["src"] if image else None 26 | 27 | def ingredients(self): 28 | ingredients = self.soup.find("ul", {"class", "ingred-list"}).findAll("li") 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.find("ol", {"class": "recipeSteps"}).findAll("li") 33 | return "\n".join([normalize_string(inst.get_text()) for inst in instructions]) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/countryliving.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class CountryLiving(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "countryliving.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "content-hed recipe-hed"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "total-time-amount"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find( 20 | "div", {"class": "recipe-details-item yields"} 21 | ).get_text() 22 | 23 | return get_yields("{} servings".format(yields)) 24 | 25 | def ingredients(self): 26 | ingredients = self.soup.findAll("div", {"class": "ingredient-item"}) 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | instructions = self.soup.find("div", {"class": "direction-lists"}).find_all( 32 | "li" 33 | ) 34 | 35 | return "\n".join( 36 | [normalize_string(instruction.get_text()) for instruction in instructions] 37 | ) 38 | -------------------------------------------------------------------------------- /recipe_scrapers/reishunger.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class Reishunger(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "reishunger.de" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | result = self.soup.find("section", {"class": "recipe-preparation"}) 30 | if result: 31 | result = "\n".join( 32 | normalize_string(i.get_text()) for i in result.findAll("p") 33 | ) 34 | return result 35 | 36 | def ratings(self): 37 | block = self.soup.find("div", {"id": "recipe-header"}).find( 38 | "div", {"class": "nrating"} 39 | ) 40 | if block: 41 | cnt = len(block.findAll("span", {"class": "fa-star"})) 42 | return cnt 43 | return block 44 | -------------------------------------------------------------------------------- /recipe_scrapers/food52.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers._abstract import AbstractScraper 2 | from recipe_scrapers._utils import get_minutes, normalize_string 3 | 4 | 5 | class Food52(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "food52.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | ul = self.soup.find("ul", {"class": "recipe__details"}) 15 | total = 0 16 | for li in ul.find_all("li"): 17 | if li.span.get_text().lower() in ["prep time", "cook time"]: 18 | total += get_minutes(list(li.children)[2].strip()) 19 | return total 20 | 21 | def yields(self): 22 | return self.schema.yields() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | instructions = self.soup.findAll("li", {"class": "recipe__list-step"}) 32 | 33 | return "\n".join( 34 | [ 35 | normalize_string(instruction.span.get_text()) 36 | for instruction in instructions 37 | ] 38 | ) 39 | 40 | def ratings(self): 41 | return self.schema.ratings() 42 | -------------------------------------------------------------------------------- /recipe_scrapers/onehundredonecookbooks.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from ._abstract import AbstractScraper 4 | 5 | 6 | class OneHundredOneCookBooks(AbstractScraper): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | self.soup = self.soup.find("div", id="recipe") 10 | 11 | @classmethod 12 | def host(cls): 13 | return "101cookbooks.com" 14 | 15 | def author(self): 16 | return self.schema.author() 17 | 18 | def title(self): 19 | return self.soup.find("h1").get_text() 20 | 21 | def total_time(self): 22 | return self.schema.total_time() 23 | 24 | def yields(self): 25 | data = self.soup.find_all("p", limit=3, recursive=False)[-1].get_text() 26 | extraction = re.search("([0-9]+) servings", data) 27 | return extraction.group(1) if extraction else None 28 | 29 | def image(self): 30 | return self.schema.image() 31 | 32 | def ingredients(self): 33 | ingredients = self.soup.find("blockquote").p.stripped_strings 34 | return list(ingredients) 35 | 36 | def instructions(self): 37 | return self.soup.find_all("p", limit=2, recursive=False)[1].get_text( 38 | "\n", strip=True 39 | ) 40 | 41 | def ratings(self): 42 | return None 43 | -------------------------------------------------------------------------------- /recipe_scrapers/realsimple.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class RealSimple(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "realsimple.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text(strip=True) 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.findAll("div", {"class": "recipe-meta-item"})[1]) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.findAll("div", {"class": "recipe-meta-item"})[2] 19 | .find("div", {"class": "recipe-meta-item-body"}) 20 | .get_text() 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.find("div", {"class": "ingredients"}).findAll("li") 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("div", {"class": "step"}) 30 | 31 | return "\n".join( 32 | [ 33 | normalize_string(instruction.find("p").get_text()) 34 | for instruction in instructions 35 | if instruction.find("p") is not None 36 | ] 37 | ) 38 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/template.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.plugins._interface import PluginInterface 5 | from recipe_scrapers.settings import settings 6 | 7 | logging.basicConfig() 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class TemplatePlugin(PluginInterface): 12 | """ 13 | Sample starting point to write your custom plugin. 14 | 15 | Check the available plugins implementations for more details. 16 | """ 17 | 18 | run_on_hosts = ("*",) 19 | run_on_methods = ( 20 | "title", 21 | # ... others 22 | ) 23 | 24 | @classmethod 25 | def run(cls, decorated): 26 | @functools.wraps(decorated) 27 | def decorated_method_wrapper(self, *args, **kwargs): 28 | # in here you'll have self.soup, self.schema and the other 29 | # instance attributes/methods you can work with. 30 | # check other plugins for examples 31 | logger.setLevel(settings.LOG_LEVEL) 32 | class_name = self.__class__.__name__ 33 | method_name = decorated.__name__ 34 | logger.debug( 35 | f"Decorating: {class_name}.{method_name}() with TemplatePlugin" 36 | ) 37 | return decorated(self, *args, **kwargs) 38 | 39 | return decorated_method_wrapper 40 | -------------------------------------------------------------------------------- /recipe_scrapers/wikicookbook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class WikiCookbook(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "en.wikibooks.org" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text().replace("Cookbook:", "") 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("th", string="Time").find_next_sibling("td")) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.find("th", string="Servings").find_next_sibling("td") 19 | ) 20 | 21 | def image(self): 22 | image = self.soup.find("a", {"class": "image"}).find("img", {"src": True}) 23 | return image["src"] if image else None 24 | 25 | def ingredients(self): 26 | ingredients = ( 27 | self.soup.find("span", {"id": "Ingredients"}).find_next("ul").findAll("li") 28 | ) 29 | 30 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 31 | 32 | def instructions(self): 33 | instructions = ( 34 | self.soup.find("span", {"id": "Procedure"}).find_next("ol").findAll("li") 35 | ) 36 | 37 | return "\n".join( 38 | [normalize_string(instruction.get_text()) for instruction in instructions] 39 | ) 40 | -------------------------------------------------------------------------------- /recipe_scrapers/tastykitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TastyKitchen(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tastykitchen.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"itemprop": "name"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(self.soup.find("time", {"itemprop": "prepTime"})), 17 | get_minutes(self.soup.find("time", {"itemprop": "cookTime"})), 18 | ] 19 | ) 20 | 21 | def yields(self): 22 | return get_yields(self.soup.find("span", {"itemprop": "yield"})) 23 | 24 | def image(self): 25 | image = self.soup.find("img", {"class": "the_recipe_image", "src": True}) 26 | return image["src"] if image else None 27 | 28 | def ingredients(self): 29 | ingredients = self.soup.find("ul", {"class": "ingredients"}).findAll("li") 30 | 31 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 32 | 33 | def instructions(self): 34 | instructions = self.soup.find("span", {"itemprop": "instructions"}).findAll("p") 35 | 36 | return "\n".join( 37 | [normalize_string(direction.get_text()) for direction in instructions] 38 | ) 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/scraper_bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Scraper bug report 3 | about: Report a scraper that is not working correctly 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for filing a bug report with us! 11 | 12 | If your request is about a website that is not supported, please open a 'new scraper' issue request instead. 13 | 14 | To help get the issue fixed, please fill in the information below. 15 | 16 | **Pre-filing checks** 17 | 18 | - [ ] I have searched for open issues that report the same problem 19 | - [ ] I have checked that the bug affects the latest version of the library 20 | 21 | **The URL of the recipe(s) that are not being scraped correctly** 22 | 23 | - https:// ... 24 | 25 | **The version of Python you're using** 26 | 27 | ... 28 | 29 | **The operating system of your environment** 30 | 31 | ... 32 | 33 | **The results you expect to see** 34 | 35 | ... 36 | 37 | **The results (including any Python error messages) that you are seeing** 38 | 39 | ... 40 | 41 | Can you write Python and would you like to help fix the scraper yourself? We'd be glad for your assistance! We can provide you with guidance and code review in return. If so, tick any of the relevant boxes below: 42 | 43 | - [ ] I'd like to try fixing this scraper myself 44 | - [ ] I'd like guidance to help me develop a fix 45 | - [ ] I'd prefer if the `recipe-scrapers` team try to fix this 46 | -------------------------------------------------------------------------------- /recipe_scrapers/cookieandkate.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class CookieAndKate(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cookieandkate.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "tasty-recipes-total-time"}) 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "tasty-recipes-yield"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.find( 25 | "div", {"class": "tasty-recipe-ingredients"} 26 | ).find_all("li") 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | instructions = self.soup.find( 32 | "div", {"class": "tasty-recipe-instructions"} 33 | ).find_all("li") 34 | 35 | return "\n".join( 36 | [normalize_string(instruction.get_text()) for instruction in instructions] 37 | ) 38 | 39 | def ratings(self): 40 | return round(float(self.soup.find("span", {"class": "average"}).get_text()), 2) 41 | -------------------------------------------------------------------------------- /recipe_scrapers/simplyquinoa.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class SimplyQuinoa(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "simplyquinoa.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | data = self.soup.find("span", {"class": "wprm-recipe-rating-average"}) 39 | return round(float(data.get_text()), 2) if data else None 40 | -------------------------------------------------------------------------------- /recipe_scrapers/mybakingaddiction.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class MyBakingAddiction(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "mybakingaddiction.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "mv-create-time-total"}).get_text() 16 | ) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find("div", {"class": "mv-create-time-yield"})) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.find("div", {"class": "mv-create-ingredients"}).findAll( 23 | "li" 24 | ) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.find( 30 | "div", {"class": "mv-create-instructions"} 31 | ).findAll("li") 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | rating = self.soup.find("div", {"class": "mv-create-reviews"}).attrs.get( 39 | "data-mv-create-rating", None 40 | ) 41 | 42 | return round(float(rating), 2) 43 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | about = {} 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | with open( 8 | os.path.join(here, "recipe_scrapers", "__version__.py"), "r", encoding="utf-8" 9 | ) as f: 10 | exec(f.read(), about) 11 | 12 | README = open(os.path.join(os.path.dirname(__file__), "README.rst")).read() 13 | 14 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) 15 | 16 | setup( 17 | name="recipe_scrapers", 18 | url="https://github.com/hhursev/recipe-scrapers/", 19 | version=about["__version__"], 20 | author="Hristo Harsev", 21 | author_email="r+pypi@hharsev.com", 22 | description="Python package, scraping recipes from all over the internet", 23 | keywords="python recipes scraper harvest recipe-scraper recipe-scrapers", 24 | long_description=README, 25 | long_description_content_type="text/x-rst", 26 | install_requires=["beautifulsoup4>=4.6.0", "extruct>=0.8.0", "requests>=2.19.1"], 27 | packages=find_packages(), 28 | package_data={"": ["LICENSE"]}, 29 | include_package_data=True, 30 | classifiers=[ 31 | "Programming Language :: Python :: 3", 32 | "License :: OSI Approved :: MIT License", 33 | "Intended Audience :: Developers", 34 | "Operating System :: OS Independent", 35 | "Topic :: Internet :: WWW/HTTP", 36 | ], 37 | python_requires=">=3.6", 38 | ) 39 | -------------------------------------------------------------------------------- /recipe_scrapers/saveur.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Saveur(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "saveur.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.soup.find("h1").get_text() 15 | 16 | def total_time(self): 17 | prep_time = self.soup.find("meta", {"property": "prepTime"}) 18 | cook_time = self.soup.find("meta", {"property": "cookTime"}) 19 | return sum( 20 | [ 21 | get_minutes(prep_time.get("content")) if prep_time else 0, 22 | get_minutes(cook_time.get("content")) if cook_time else 0, 23 | ] 24 | ) 25 | 26 | def yields(self): 27 | return get_yields( 28 | self.soup.find("span", {"property": "recipeYield"}).get_text() 29 | ) 30 | 31 | def ingredients(self): 32 | ingredients = self.soup.findAll("li", {"property": "recipeIngredient"}) 33 | 34 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 35 | 36 | def instructions(self): 37 | instructions = self.soup.findAll("li", {"property": "recipeInstructions"}) 38 | 39 | return "\n".join( 40 | [normalize_string(instruction.get_text()) for instruction in instructions] 41 | ) 42 | -------------------------------------------------------------------------------- /recipe_scrapers/forksoverknives.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class ForksOverKnives(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "forksoverknives.com" 9 | 10 | def author(self): 11 | author = self.soup.find("div", attrs={"class": "post-info"}).find("a") 12 | return normalize_string(author.get_text()) 13 | 14 | def title(self): 15 | return self.schema.title() 16 | 17 | def total_time(self): 18 | return self.schema.total_time() 19 | 20 | def yields(self): 21 | yields = normalize_string( 22 | self.soup.find("i", attrs={"class": "icon-serving"}).next_sibling.get_text() 23 | ) 24 | # Get the first string after "Makes". 25 | return yields.split(" ", 1)[1] 26 | 27 | def image(self): 28 | return self.schema.image() 29 | 30 | def ingredients(self): 31 | return self.schema.ingredients() 32 | 33 | def instructions(self): 34 | return self.schema.instructions() 35 | 36 | def ratings(self): 37 | ratings = normalize_string( 38 | self.soup.find("div", attrs={"class": "headline"}) 39 | .find("span", attrs={"class": "rated-count"}) 40 | .get_text() 41 | ) 42 | # Unwrap parens 43 | ratings = ratings[1:] 44 | # return the first element 45 | return float(ratings.split()[0]) 46 | -------------------------------------------------------------------------------- /recipe_scrapers/thevintagemixer.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class TheVintageMixer(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thevintagemixer.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time-minutes"}).parent 16 | ) 17 | 18 | def image(self): 19 | container = self.soup.find("div", {"class": "wprm-recipe-image"}) 20 | if not container: 21 | return None 22 | 23 | image = container.find("img", {"src": True}) 24 | return image["src"] if image else None 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 28 | 29 | return [ 30 | normalize_string(ingredient.get_text()) 31 | for ingredient in ingredients 32 | if len(normalize_string(ingredient.get_text())) > 0 33 | ] 34 | 35 | def instructions(self): 36 | instructions = self.soup.findAll( 37 | "div", {"class": "wprm-recipe-instruction-text"} 38 | ) 39 | 40 | return "\n".join( 41 | [normalize_string(instruction.get_text()) for instruction in instructions] 42 | ) 43 | -------------------------------------------------------------------------------- /templates/test_scraper.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.template import Template 2 | from tests import ScraperTest 3 | 4 | 5 | class TestTemplateScraper(ScraperTest): 6 | 7 | scraper_class = Template 8 | 9 | def test_host(self): 10 | self.assertEqual("example.com", self.harvester_class.host()) 11 | 12 | def test_author(self): 13 | self.assertEqual(None, self.harvester_class.author()) 14 | 15 | def test_title(self): 16 | self.assertEqual(None, self.harvester_class.title()) 17 | 18 | def test_category(self): 19 | self.assertEqual(None, self.harvester_class.category()) 20 | 21 | def test_total_time(self): 22 | self.assertEqual(None, self.harvester_class.total_time()) 23 | 24 | def test_yields(self): 25 | self.assertEqual(None, self.harvester_class.yields()) 26 | 27 | def test_image(self): 28 | self.assertEqual(None, self.harvester_class.image()) 29 | 30 | def test_ingredients(self): 31 | self.assertEqual(None, self.harvester_class.ingredients()) 32 | 33 | def test_instructions(self): 34 | self.assertEqual(None, self.harvester_class.instructions()) 35 | 36 | def test_ratings(self): 37 | self.assertEqual(None, self.harvester_class.ratings()) 38 | 39 | def test_cuisine(self): 40 | self.assertEqual(None, self.harvester_class.cuisine()) 41 | 42 | def test_description(self): 43 | self.assertEqual(None, self.harvester_class.description()) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/sallysblog.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class SallysBlog(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "sallys-blog.de" 9 | 10 | def title(self): 11 | return normalize_string( 12 | self.soup.find("h1", {"class": "blog--detail-headline"}).get_text() 13 | ) 14 | 15 | def total_time(self): 16 | return get_minutes(self.soup.find("span", {"id": "zubereitungszeit"})) 17 | 18 | def yields(self): 19 | amount = self.soup.find("input", {"class": "float-left"}).get("value") 20 | unit = self.soup.find("span", {"id": "is_singular"}).get_text() 21 | 22 | return f"{amount} {unit}" 23 | 24 | def ingredients(self): 25 | ingredients = self.soup.findAll("li", {"class": "quantity"}) 26 | 27 | return [normalize_string(i.get_text()) for i in ingredients] 28 | 29 | def instructions(self): 30 | instructionBlock = self.soup.find( 31 | "div", {"class": "blog--detail-description block"} 32 | ) 33 | instructions = instructionBlock.findAll( 34 | "div", {"class": ["content_type_2", "content_type_3", "content_type_4"]} 35 | ) 36 | 37 | return "\n".join( 38 | [ 39 | normalize_string(instruction.find("p").get_text()) 40 | for instruction in instructions 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /recipe_scrapers/_factory.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_host_name 3 | 4 | 5 | class SchemaScraperFactory: 6 | class SchemaScraper(AbstractScraper): 7 | def host(self) -> str: 8 | return get_host_name(self.url) 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def category(self): 14 | return self.schema.category() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def cook_time(self): 20 | return self.schema.cook_time() 21 | 22 | def prep_time(self): 23 | return self.schema.prep_time() 24 | 25 | def yields(self): 26 | return self.schema.yields() 27 | 28 | def image(self): 29 | return self.schema.image() 30 | 31 | def ingredients(self): 32 | return self.schema.ingredients() 33 | 34 | def instructions(self): 35 | return self.schema.instructions() 36 | 37 | def ratings(self): 38 | return self.schema.ratings() 39 | 40 | def author(self): 41 | return self.schema.author() 42 | 43 | def cuisine(self): 44 | return self.schema.cuisine() 45 | 46 | def description(self): 47 | return self.schema.description() 48 | 49 | @classmethod 50 | def generate(cls, url, **options): 51 | return cls.SchemaScraper(url, **options) 52 | -------------------------------------------------------------------------------- /recipe_scrapers/comidinhasdochef.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class ComidinhasDoChef(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "comidinhasdochef.com" 9 | 10 | def author(self): 11 | return self.soup.find("span", {"class": "theauthor"}).get_text(strip=True) 12 | 13 | def title(self): 14 | return self.soup.find("h1", {"class": "title"}).get_text() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | yields = self.soup.find("span", {"itemprop": "recipeYield"}) 21 | return yields.get_text() if yields else None 22 | 23 | def image(self): 24 | return self.schema.image() 25 | 26 | def ingredients(self): 27 | return [ 28 | normalize_string(ingredient.get_text()) 29 | for ingredient in self.soup.find_all("li", {"itemprop": "recipeIngredient"}) 30 | ] 31 | 32 | def instructions(self): 33 | instructions = [ 34 | normalize_string(instruction.get_text(strip=True)) 35 | for instruction in self.soup.find_all( 36 | "li", {"itemprop": "recipeInstructions"} 37 | ) 38 | ] 39 | return "\n".join(instructions) 40 | 41 | def ratings(self): 42 | rating = self.soup.find("span", {"itemprop": "ratingValue"}).get_text() 43 | return round(float(rating), 2) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/mykitchen101.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from ._abstract import AbstractScraper 6 | from ._utils import get_yields, normalize_string 7 | 8 | 9 | class MyKitchen101(AbstractScraper): 10 | @classmethod 11 | def host(cls): 12 | return "mykitchen101.com" 13 | 14 | def author(self): 15 | return self.soup.find("a", {"rel": "author"}).get_text() 16 | 17 | def title(self): 18 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 19 | 20 | def yields(self): 21 | return get_yields(self.soup.find("p", text=re.compile("分量:")).get_text()) 22 | 23 | def image(self): 24 | return self.schema.image() 25 | 26 | def ingredients(self): 27 | soup = BeautifulSoup(str(self.soup), features="html.parser") 28 | ingredients = ( 29 | soup.find(name="p", text=re.compile("材料:")).find_next("ul").find_all("li") 30 | ) 31 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 32 | 33 | def instructions(self): 34 | soup = BeautifulSoup(str(self.soup), features="html.parser") 35 | instructions = soup.find(name="p", text=re.compile("做法:")).find_all_next("p") 36 | return "\n".join( 37 | [ 38 | normalize_string(instruction.get_text()) 39 | for instruction in instructions 40 | if instruction.get_text()[:1].isdigit() 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /recipe_scrapers/kingarthur.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._abstract import AbstractScraper 4 | from ._utils import normalize_string 5 | 6 | 7 | class KingArthur(AbstractScraper): 8 | @classmethod 9 | def host(cls): 10 | return "kingarthurbaking.com" 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | """ 29 | King Arthur updated how they format their instructions to include html (instructions wrapped in

) in the 30 | `recipeInstructions`, parse the instructions assuming each step is wrapped in a

first, and fallback to just 31 | returning the schema instructions in case this is changed, again. 32 | """ 33 | schema_instructions = self.schema.instructions() 34 | soup = BeautifulSoup(schema_instructions, "html.parser") 35 | instruction_elms = soup.findAll("p") 36 | if instruction_elms: 37 | return "\n".join( 38 | [normalize_string(elm.get_text()) for elm in instruction_elms] 39 | ) 40 | return schema_instructions 41 | 42 | def ratings(self): 43 | return self.schema.ratings() 44 | -------------------------------------------------------------------------------- /recipe_scrapers/_exceptions.py: -------------------------------------------------------------------------------- 1 | class RecipeScrapersExceptions(Exception): 2 | def __init__(self, message): 3 | self.message = message 4 | super().__init__(message) 5 | 6 | def __str__(self): 7 | return f"recipe-scrapers exception: {self.message}" 8 | 9 | 10 | class WebsiteNotImplementedError(RecipeScrapersExceptions): 11 | """Error when website is not supported by this library.""" 12 | 13 | def __init__(self, domain): 14 | self.domain = domain 15 | message = f"Website ({self.domain}) not supported." 16 | super().__init__(message) 17 | 18 | 19 | class NoSchemaFoundInWildMode(RecipeScrapersExceptions): 20 | """Error when wild_mode fails to locate schema at the url""" 21 | 22 | def __init__(self, url): 23 | self.url = url 24 | message = f"No Recipe Schema found at {self.url}." 25 | super().__init__(message) 26 | 27 | 28 | class ElementNotFoundInHtml(RecipeScrapersExceptions): 29 | """Error when we cannot locate the HTML element on the page""" 30 | 31 | def __init__(self, element): 32 | self.element = element 33 | message = ( 34 | "Element not found in html (self.soup.find returned None). Check traceback." 35 | ) 36 | super().__init__(message) 37 | 38 | 39 | class SchemaOrgException(RecipeScrapersExceptions): 40 | """Error in parsing or missing portion of the Schema.org data org the page""" 41 | 42 | def __init__(self, message): 43 | super().__init__(message) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/motherthyme.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class MotherThyme(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "motherthyme.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | return round( 39 | float( 40 | self.soup.find( 41 | "span", {"class": "wprm-recipe-rating-average"} 42 | ).get_text() 43 | ), 44 | 2, 45 | ) 46 | -------------------------------------------------------------------------------- /recipe_scrapers/twopeasandtheirpod.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TwoPeasAndTheirPod(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "twopeasandtheirpod.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | minutes = self.soup.select_one(".wprm-recipe-total_time").get_text() 15 | unit = self.soup.select_one(".wprm-recipe-total_time-unit").get_text() 16 | 17 | return get_minutes("{} {}".format(minutes, unit)) 18 | 19 | def yields(self): 20 | return get_yields( 21 | self.soup.select_one( 22 | "div.wprm-recipe-details-container dl:nth-of-type(5) dd" 23 | ).get_text() 24 | ) 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 28 | 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.select(".wprm-recipe-instruction-text") 33 | 34 | return "\n".join( 35 | [normalize_string(instruction.get_text()) for instruction in instructions] 36 | ) 37 | 38 | def image(self): 39 | image = self.soup.find("div", {"class": "wprm-recipe-image"}).find("img") 40 | 41 | return image["src"] if image else None 42 | -------------------------------------------------------------------------------- /recipe_scrapers/simplyrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class SimplyRecipes(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "simplyrecipes.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "total-time"}) 16 | .find("span", {"class": "meta-text__data"}) 17 | .text 18 | ) 19 | 20 | def yields(self): 21 | return get_yields( 22 | normalize_string( 23 | self.soup.find("div", {"class": "recipe-serving"}) 24 | .find("span", {"class": "meta-text__data"}) 25 | .text 26 | ) 27 | ) 28 | 29 | def ingredients(self): 30 | ingredients = self.soup.find("ul", {"class": "ingredient-list"}).findAll("li") 31 | 32 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 33 | 34 | def instructions(self): 35 | steps = self.soup.find( 36 | "div", {"class": "structured-project__steps"} 37 | ).ol.findAll("li") 38 | 39 | return "\n".join( 40 | [ 41 | normalize_string( 42 | step.div.text + ": " + "".join([p.text for p in step.findAll("p")]) 43 | ) 44 | for step in steps 45 | ] 46 | ) 47 | -------------------------------------------------------------------------------- /tests/test_data/schemaorg.testhtml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/default.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.plugins import ( # SchemaOrgPriorityPlugin,; Bcp47ValidatePlugin, 2 | ExceptionHandlingPlugin, 3 | HTMLTagStripperPlugin, 4 | NormalizeStringPlugin, 5 | OpenGraphImageFetchPlugin, 6 | SchemaOrgFillPlugin, 7 | ) 8 | 9 | # Plugins to be attached. 10 | # The upper most plugin is the "outer most" executed. 11 | # Check recipe_scrapers.settings.template.py for ways to extend. 12 | PLUGINS = ( 13 | ExceptionHandlingPlugin, 14 | HTMLTagStripperPlugin, 15 | NormalizeStringPlugin, 16 | OpenGraphImageFetchPlugin, 17 | SchemaOrgFillPlugin, 18 | # Bcp47ValidatePlugin, 19 | # SchemaOrgPriorityPlugin, 20 | ) 21 | 22 | META_HTTP_EQUIV = True 23 | 24 | 25 | SUPPRESS_EXCEPTIONS = False 26 | # Applicable only if SUPPRESS_EXCEPTIONS is True, otherwise ignored 27 | # silence .[method]() exception and return the value 28 | # as listed in the config here. 29 | ON_EXCEPTION_RETURN_VALUES = { 30 | "title": None, 31 | "total_time": None, 32 | "yields": None, 33 | "image": None, 34 | "ingredients": None, 35 | "instructions": None, 36 | "ratings": None, 37 | "reviews": None, 38 | "links": None, 39 | "language": None, 40 | "nutrients": None, 41 | } 42 | 43 | 44 | TEST_MODE = False 45 | 46 | 47 | # logging.DEBUG # 10 48 | # logging.INFO # 20 49 | # logging.WARNING # 30 50 | # logging.ERROR # 40 51 | # logging.CRITICAL # 50 52 | # https://docs.python.org/3/howto/logging.html 53 | LOG_LEVEL = 30 54 | -------------------------------------------------------------------------------- /recipe_scrapers/momswithcrockpots.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class MomsWithCrockPots(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "momswithcrockpots.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | return round( 39 | float( 40 | self.soup.find( 41 | "span", {"class": "wprm-recipe-rating-average"} 42 | ).get_text() 43 | ), 44 | 2, 45 | ) 46 | -------------------------------------------------------------------------------- /recipe_scrapers/kwestiasmaku.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class KwestiaSmaku(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "kwestiasmaku.com" 9 | 10 | def author(self): 11 | return normalize_string( 12 | self.soup.find("span", {"itemprop": "author"}).get_text() 13 | ) 14 | 15 | def title(self): 16 | return normalize_string(self.soup.find("div", {"itemprop": "name"}).get_text()) 17 | 18 | def yields(self): 19 | return get_yields( 20 | self.soup.find("div", {"class": "field-name-field-ilosc-porcji"}) 21 | ) 22 | 23 | def image(self): 24 | return ( 25 | self.soup.find("div", {"class": "view-zdjecia"}) 26 | .find("li", {"class": "first"}) 27 | .img["src"] 28 | ) 29 | 30 | def ingredients(self): 31 | ingredients = self.soup.find( 32 | "div", {"class": "field-name-field-skladniki"} 33 | ).find_all("li") 34 | return [normalize_string(i.get_text()) for i in ingredients] 35 | 36 | def instructions(self): 37 | instructions = self.soup.find( 38 | "div", {"class": "field-name-field-przygotowanie"} 39 | ).find_all("li") 40 | return "\n".join([normalize_string(i.get_text()) for i in instructions]) 41 | 42 | def ratings(self): 43 | return float(self.soup.find("span", {"itemprop": "ratingValue"}).get_text()) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/streetkitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class StreetKitchen(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "streetkitchen.hu" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return None 15 | 16 | def image(self): 17 | return ( 18 | self.soup.find("div", {"class": "article-featured-image-bg"}) 19 | .find("noscript") 20 | .find("img")["src"] 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = [] 25 | ingredient_group = self.soup.find("div", {"class": "ingredient-group"}).findAll( 26 | "dd" 27 | ) 28 | 29 | for ingredient in ingredient_group: 30 | ingredients.append(normalize_string(ingredient.get_text()).strip()) 31 | 32 | return ingredients 33 | 34 | def instructions(self): 35 | instructions = self.soup.find("div", {"class": "the-content-div"}).findAll("p")[ 36 | :-1 37 | ] # the last paragraph is advertisement, not instructions 38 | instructions_arr = [] 39 | for instruction in instructions: 40 | instructions_arr.append(instruction.get_text()) 41 | return "\n".join(instructions_arr) 42 | 43 | def yields(self): 44 | return get_yields( 45 | self.soup.find("span", {"class": "quantity-number"}).get_text() 46 | ) 47 | -------------------------------------------------------------------------------- /recipe_scrapers/fitmencook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class FitMenCook(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "fitmencook.com" 9 | 10 | def title(self): 11 | raw_title = self.soup.find("h2", {"class": "gap-none"}).get_text() 12 | title = raw_title.replace("\t", "") 13 | title = title.replace("\n", "") 14 | 15 | return title 16 | 17 | def total_time(self): 18 | return get_minutes(self.soup.find("span", {"class": "total-time"})) 19 | 20 | def yields(self): 21 | for h4 in self.soup.findAll("h4"): 22 | for strong in h4.findAll("strong"): 23 | raw_yield = strong.text 24 | for word in raw_yield.split(): 25 | if word.isdigit(): 26 | yields = word 27 | 28 | return get_yields("{} servings".format(yields)) 29 | 30 | def ingredients(self): 31 | ingredients_parent = self.soup.find("div", {"class": "recipe-ingredients"}) 32 | ingredients = ingredients_parent.findAll("li") 33 | 34 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 35 | 36 | def instructions(self): 37 | instructions_parent = self.soup.find("div", {"class": "recipe-steps"}) 38 | instructions = instructions_parent.findAll("li") 39 | 40 | return "\n".join( 41 | [normalize_string(instruction.get_text()) for instruction in instructions] 42 | ) 43 | -------------------------------------------------------------------------------- /recipe_scrapers/geniuskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class GeniusKitchen(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "geniuskitchen.com" 9 | 10 | def title(self): 11 | return ( 12 | self.soup.find("title").get_text().replace(" Recipe - Genius Kitchen", "") 13 | ) 14 | 15 | def total_time(self): 16 | return get_minutes(self.soup.find("td", {"class": "time"})) 17 | 18 | def yields(self): 19 | return get_yields( 20 | self.soup.find("td", {"class": "servings"}).find("span", {"class": "count"}) 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.find("ul", {"class": "ingredient-list"}).findAll("li") 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | raw_directions = ( 30 | self.soup.find("div", {"class": "directions-inner container-xs"}) 31 | .find("ol") 32 | .findAll("li") 33 | ) 34 | 35 | directions = [] 36 | 37 | for direction in raw_directions: 38 | if "Submit a Correction" not in direction.get_text(): 39 | directions.append(normalize_string(direction.get_text())) 40 | 41 | return "\n".join(directions) 42 | 43 | def ratings(self): 44 | rating = self.soup.find("span", {"class": "sr-only"}).get_text() 45 | 46 | return round(float(rating), 2) 47 | -------------------------------------------------------------------------------- /recipe_scrapers/mykitchen101en.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from ._abstract import AbstractScraper 6 | from ._utils import get_yields, normalize_string 7 | 8 | 9 | class MyKitchen101en(AbstractScraper): 10 | @classmethod 11 | def host(cls): 12 | return "mykitchen101en.com" 13 | 14 | def author(self): 15 | return self.soup.find("a", {"rel": "author"}).get_text() 16 | 17 | def title(self): 18 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 19 | 20 | def yields(self): 21 | return get_yields(self.soup.find("p", text=re.compile("Yields: ")).get_text()) 22 | 23 | def image(self): 24 | return self.schema.image() 25 | 26 | def ingredients(self): 27 | soup = BeautifulSoup(str(self.soup), features="html.parser") 28 | ingredients = ( 29 | soup.find(name="p", text=re.compile("Ingredients:")) 30 | .find_next("ul") 31 | .find_all("li") 32 | ) 33 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 34 | 35 | def instructions(self): 36 | soup = BeautifulSoup(str(self.soup), features="html.parser") 37 | instructions = soup.find( 38 | name="p", text=re.compile("Directions:") 39 | ).find_all_next("p") 40 | return "\n".join( 41 | [ 42 | normalize_string(instruction.get_text()) 43 | for instruction in instructions 44 | if instruction.get_text()[:1].isdigit() 45 | ] 46 | ) 47 | -------------------------------------------------------------------------------- /recipe_scrapers/ig.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class IG(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "receitas.ig.com.br" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"itemprop": "name"}).get_text() 12 | 13 | def total_time(self): 14 | container = self.soup.find("div", {"class": "box-info-preparacao"}) 15 | if not container: 16 | return None 17 | else: 18 | return int(container.find("span", {"class": "valor"}).get_text()) 19 | 20 | def yields(self): 21 | container = self.soup.find("div", {"class": "box-info-rendimento"}) 22 | if not container: 23 | return None 24 | else: 25 | return get_yields(container.find("span", {"class": "valor"}).get_text()) 26 | 27 | def image(self): 28 | container = self.soup.find("div", {"class": "box-img-receita"}) 29 | if not container: 30 | return None 31 | 32 | image = container.find("img", {"src": True}) 33 | return image["src"] if image else None 34 | 35 | def ingredients(self): 36 | ingredients = self.soup.find("ul", {"class", "lista-ingredientes"}).findAll( 37 | "li" 38 | ) 39 | 40 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 41 | 42 | def instructions(self): 43 | instructions = self.soup.find("div", {"class": "box-preparo"}) 44 | return normalize_string(instructions.get_text()) 45 | -------------------------------------------------------------------------------- /recipe_scrapers/bigoven.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class BigOven(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "bigoven.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("div", {"class": "yield"}).text) 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | rows = self.soup.find("ul", {"class": "ingredients-list"}).findAll("li") 24 | return [ 25 | normalize_string(row.span.text) 26 | for row in rows 27 | if "ingHeading" not in row.span["class"] 28 | ] 29 | 30 | def instructions(self): 31 | ps = self.soup.find("div", {"class": "instructions"}).findAll("p") 32 | return "\n".join([normalize_string(p.text) for p in ps]) 33 | 34 | def ratings(self): 35 | try: 36 | cnt = ( 37 | self.soup.find("div", {"class": "recipe-rating"}) 38 | .find("span", {"class": "count"}) 39 | .text 40 | ) 41 | rating = ( 42 | self.soup.find("div", {"class": "recipe-rating"}) 43 | .find("span", {"class": "rating"}) 44 | .text 45 | ) 46 | return {"count": int(cnt), "rating": round(float(rating), 2)} 47 | except Exception: 48 | return None 49 | -------------------------------------------------------------------------------- /tests/test_schemaorg.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from recipe_scrapers._exceptions import SchemaOrgException 4 | from recipe_scrapers._schemaorg import SchemaOrg 5 | 6 | 7 | class TestSchemaOrg(unittest.TestCase): 8 | def setUp(self): 9 | with open("tests/test_data/schemaorg.testhtml", encoding="utf-8") as pagedata: 10 | self.schema = SchemaOrg(pagedata.read()) 11 | 12 | def test_total_time_with_schema_missing_all_data_should_raise_exception(self): 13 | keys = ["totalTime", "cookTime", "prepTime"] 14 | for k in keys: 15 | if k in self.schema.data: 16 | del self.schema.data[k] 17 | with self.assertRaises(SchemaOrgException): 18 | self.assertEqual(self.schema.total_time(), None) 19 | 20 | def test_total_time_with_schema__all_zeros(self): 21 | keys = ["totalTime", "cookTime", "prepTime"] 22 | for k in keys: 23 | self.schema.data[k] = "PT0M" 24 | self.assertEqual(self.schema.total_time(), 0) 25 | del self.schema.data["totalTime"] 26 | self.assertEqual(self.schema.total_time(), 0) 27 | 28 | def test_nutrient_retrieval(self): 29 | expected_nutrients = { 30 | "calories": "240 calories", 31 | "fatContent": "9 grams fat", 32 | } 33 | self.assertEqual(self.schema.nutrients(), expected_nutrients) 34 | 35 | def test_graph_schema_without_context(self): 36 | with open( 37 | "tests/test_data/schemaorg_graph.testhtml", encoding="utf-8" 38 | ) as pagedata: 39 | schema = SchemaOrg(pagedata.read()) 40 | self.assertNotEqual(schema.data, {}) 41 | -------------------------------------------------------------------------------- /recipe_scrapers/woop.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Woop(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "woop.co.nz" 9 | 10 | def title(self): 11 | found = self.soup.find("meta", {"property": "og:title"}) 12 | return normalize_string(found["content"]) 13 | 14 | def ingredients(self): 15 | div = self.soup.findAll("div", {"class": "ingredients"})[0] 16 | li = div.findChildren("li") 17 | return [normalize_string(i.text) for i in li if i.text] 18 | 19 | def yields(self): 20 | div = self.soup.findAll("div", {"class": "serving-amount"})[0] 21 | value = div.findChildren("div", {"class": "value"})[0] 22 | return get_yields(value.text) 23 | 24 | def total_time(self): 25 | div = self.soup.findAll("div", {"class": "cook-time"})[0] 26 | p = div.findChildren("p")[0] 27 | value = get_minutes(p) 28 | return value 29 | 30 | def instructions(self): 31 | div = self.soup.findAll("div", {"class": "cooking-instructions"})[0] 32 | li = div.findChildren("li") 33 | normalized = [normalize_string(i.text) for i in li] 34 | return "\n".join([i for i in normalized if i]) 35 | 36 | def nutrients(self): 37 | div = self.soup.findAll("div", {"class": "nutritional-info"})[0] 38 | li = div.findChildren("li") 39 | return { 40 | normalize_string(nutrient): normalize_string(value) 41 | for nutrient, value in [i.text.split(":") for i in li] 42 | if value is not None 43 | } 44 | -------------------------------------------------------------------------------- /recipe_scrapers/yummly.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Yummly(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "yummly.com" 9 | 10 | def title(self): 11 | found = self.soup.find("h1") 12 | return found.get_text() if found else None 13 | 14 | def total_time(self): 15 | data = self.soup.findAll("div", {"class": "recipe-summary-item"}, limit=2) 16 | return get_minutes(data[1]) if data else None 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find("div", {"class": "servings"})) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.findAll("li", {"class": "IngredientLine"}) 23 | 24 | return ( 25 | [ 26 | " ".join( 27 | normalize_string(span.get_text()) 28 | for span in ingredient.select( 29 | """ 30 | span[class^=amount], 31 | span[class^=unit], 32 | span[class^=ingredient], 33 | span[class^=remainder]""" 34 | ) 35 | ) 36 | for ingredient in ingredients 37 | ] 38 | if ingredients 39 | else None 40 | ) 41 | 42 | def instructions(self): 43 | instructions = self.soup.findAll("li", attrs={"class": "prep-step"}) 44 | return ( 45 | "\n".join(normalize_string(instr.get_text()) for instr in instructions) 46 | if instructions 47 | else None 48 | ) 49 | -------------------------------------------------------------------------------- /recipe_scrapers/southernliving.py: -------------------------------------------------------------------------------- 1 | # southernliving.com scraper 2 | # Written by G.D. Wallters 3 | # Freely released the code to recipe_scraper group 4 | # 9 February, 2020 5 | # ======================================================= 6 | 7 | 8 | from ._abstract import AbstractScraper 9 | from ._utils import get_minutes, get_yields, normalize_string 10 | 11 | 12 | class SouthernLiving(AbstractScraper): 13 | @classmethod 14 | def host(cls): 15 | return "southernliving.com" 16 | 17 | def title(self): 18 | return self.schema.title() 19 | 20 | def total_time(self): 21 | return get_minutes(self.schema.total_time()) 22 | 23 | def yields(self): 24 | return get_yields(self.schema.yields()) 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | instructions = self.soup.find("ul", {"class": "instructions-section"}).findAll( 34 | "li", {"class": "instructions-section-item"} 35 | ) 36 | return "\n".join( 37 | [ 38 | normalize_string( 39 | instruction.find("div", {"class": "paragraph"}).get_text() 40 | ) 41 | for instruction in instructions 42 | ] 43 | ) 44 | 45 | def ratings(self): 46 | return self.schema.ratings() 47 | 48 | def description(self): 49 | des = self.soup.find( 50 | "div", 51 | attrs={"class": lambda e: e.startswith("recipe-summary") if e else False}, 52 | ) 53 | return normalize_string(des.get_text()) 54 | -------------------------------------------------------------------------------- /recipe_scrapers/cookstr.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Cookstr(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cookstr.com" 9 | 10 | def title(self): 11 | return normalize_string( 12 | self.soup.find("h1", {"class": "articleHeadline"}).get_text() 13 | ) 14 | 15 | def total_time(self): 16 | sections = self.soup.findAll("div", {"class": "articleAttrSection"}) 17 | total_time = 0 18 | for section in sections: 19 | time = section.find(text="Total Time") 20 | if time: 21 | total_time += get_minutes(time.parent.parent) 22 | return total_time 23 | 24 | def yields(self): 25 | sections = self.soup.findAll("span", {"class": "attrLabel"}) 26 | for section in sections: 27 | serves = section.find(text="Serves") 28 | if serves: 29 | return get_yields(serves.parent.parent) 30 | raise Exception("Servings amount not found") 31 | 32 | def ingredients(self): 33 | ingredients = self.soup.find("div", {"class": "recipeIngredients"}) 34 | 35 | return [ 36 | normalize_string(ingredient.get_text()) 37 | for ingredient in ingredients.findAll("li") 38 | ] 39 | 40 | def instructions(self): 41 | instructions = self.soup.find("div", {"class": "stepByStepInstructionsDiv"}) 42 | 43 | return "\n".join( 44 | [ 45 | normalize_string(instruction.get_text()) 46 | for instruction in instructions.findAll("p") 47 | ] 48 | ) 49 | -------------------------------------------------------------------------------- /recipe_scrapers/joyfoodsunshine.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Joyfoodsunshine(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "joyfoodsunshine.com" 9 | 10 | def author(self): 11 | return self.soup.find("span", {"class": "entry-author-name"}).get_text() 12 | 13 | def title(self): 14 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 15 | 16 | def total_time(self): 17 | return get_minutes( 18 | self.soup.find("span", {"class": "wprm-recipe-total_time-minutes"}).text 19 | ) 20 | 21 | def yields(self): 22 | return get_yields( 23 | normalize_string( 24 | self.soup.find("span", {"class": "wprm-recipe-servings"}).text 25 | ) 26 | ) 27 | 28 | return self.schema.ingredients() 29 | 30 | def image(self): 31 | # span class = wprm-recipe-image find src 32 | return self.schema.image() 33 | 34 | def ingredients(self): 35 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 36 | 37 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 38 | 39 | def instructions(self): 40 | instructions = self.soup.findAll( 41 | "div", {"class": "wprm-recipe-instruction-text"} 42 | ) 43 | 44 | return "\n".join( 45 | [normalize_string(instruction.get_text()) for instruction in instructions] 46 | ) 47 | 48 | def ratings(self): 49 | data = self.soup.find("span", {"class": "wprm-recipe-rating-average"}) 50 | return round(float(data.get_text()), 2) if data else None 51 | -------------------------------------------------------------------------------- /recipe_scrapers/nutritionbynathalie.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from ._abstract import AbstractScraper 4 | 5 | BULLET_CHARACTER_ORD = 8226 6 | 7 | 8 | class NutritionByNathalie(AbstractScraper): 9 | ingredientMatch = re.compile(r"Ingredients:") 10 | 11 | @classmethod 12 | def host(cls): 13 | return "nutritionbynathalie.com" 14 | 15 | def title(self): 16 | return self.soup.find("h1").get_text() 17 | 18 | def total_time(self): 19 | return 0 20 | 21 | def yields(self): 22 | return None 23 | 24 | def image(self): 25 | try: 26 | return self.soup.find("img", {"id": re.compile(r"^innercomp_")})["src"] 27 | except Exception: 28 | return None 29 | 30 | def ingredients(self): 31 | ingredients = [] 32 | 33 | elements = self.soup.find_all(text=self.ingredientMatch) 34 | for outerElement in elements: 35 | title = outerElement.find_parent("p") 36 | if not title: 37 | continue 38 | element = title.nextSibling 39 | while element: 40 | ingredient = element.get_text() 41 | if len(ingredient) == 0 or ord(ingredient[0]) != BULLET_CHARACTER_ORD: 42 | break 43 | ingredients.append(ingredient[2:]) 44 | element = element.nextSibling 45 | 46 | return ingredients 47 | 48 | def instructions(self): 49 | title = self.soup.find(text="Directions:").find_parent("p") 50 | 51 | instructions = [] 52 | for child in title.nextSibling.find_all("li"): 53 | instructions.append(child.get_text()) 54 | 55 | return "\n".join(instructions) 56 | 57 | def ratings(self): 58 | return None 59 | -------------------------------------------------------------------------------- /recipe_scrapers/heb.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class HEB(AbstractScraper): 6 | @classmethod 7 | def host(self, domain="com"): 8 | return f"heb.{domain}" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "title"}).get_text() 12 | 13 | def total_time(self): 14 | minutes_tag = self.soup.find("div", {"itemprop": "totalTime"}) 15 | return get_minutes(minutes_tag.parent.get_text()) 16 | 17 | def yields(self): 18 | yields_tag = self.soup.find("div", {"itemprop": "recipeYield"}) 19 | return get_yields(yields_tag.parent.get_text()) 20 | 21 | def ingredients(self): 22 | ingredients_container = self.soup.find(class_="ingredientswrapper") 23 | ingredients = ingredients_container.findAll("div", {"class": "recipestepstxt"}) 24 | 25 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 26 | 27 | def _instructions_list(self): 28 | instructions_container = self.soup.find("div", {"class": "instructions"}) 29 | instructions = instructions_container.findAll( 30 | "span", {"class": "instructiontxt"} 31 | ) 32 | return [ 33 | normalize_string(instruction.get_text()) for instruction in instructions 34 | ] 35 | 36 | def instructions(self): 37 | data = self._instructions_list() 38 | return "\n".join(data) if data else None 39 | 40 | def image(self): 41 | container = self.soup.find("div", {"class": "recipeimage"}) 42 | if not container: 43 | return None 44 | 45 | image = container.find("img", {"src": True}) 46 | return image["src"] if image else None 47 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/opengraph_image_fetch.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | from ._interface import PluginInterface 7 | 8 | logging.basicConfig() 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class OpenGraphImageFetchPlugin(PluginInterface): 13 | """ 14 | If .image() method on whatever scraper return exception for some reason, 15 | do try to fetch the recipe image from the og:image on the page. 16 | 17 | Apply to .image() method on all scrapers if plugin is active. 18 | """ 19 | 20 | run_on_hosts = ("*",) 21 | run_on_methods = ("image",) 22 | 23 | @classmethod 24 | def run(cls, decorated): 25 | @functools.wraps(decorated) 26 | def decorated_method_wrapper(self, *args, **kwargs): 27 | logger.setLevel(settings.LOG_LEVEL) 28 | class_name = self.__class__.__name__ 29 | method_name = decorated.__name__ 30 | logger.debug( 31 | f"Decorating: {class_name}.{method_name}() with OpenGraphImageFetchPlugin" 32 | ) 33 | 34 | image = None 35 | try: 36 | image = decorated(self, *args, **kwargs) 37 | except Exception: 38 | pass 39 | 40 | if image: 41 | return image 42 | else: 43 | logger.info( 44 | f"{class_name}.{method_name}() did not manage to find recipe image. OpenGraphImageFetchPlugin will attempt to do its magic." 45 | ) 46 | image = self.soup.find( 47 | "meta", {"property": "og:image", "content": True} 48 | ) 49 | return image.get("content") 50 | 51 | return decorated_method_wrapper 52 | -------------------------------------------------------------------------------- /recipe_scrapers/allrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AllRecipes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "allrecipes.com" 8 | 9 | def author(self): 10 | # NB: In the schema.org 'Recipe' type, the 'author' property is a 11 | # single-value type, not an ItemList. 12 | # allrecipes.com seems to render the author property as a list 13 | # containing a single item under some circumstances. 14 | # In those cases, the SchemaOrg class will fail due to the unexpected 15 | # type, and this method is called as a fallback. 16 | # Rather than implement non-standard handling in SchemaOrg, this code 17 | # provides a (hopefully temporary!) allrecipes-specific workaround. 18 | author = self.schema.data.get("author") 19 | if author and isinstance(author, list) and len(author) == 1: 20 | author = author[0].get("name") 21 | return author 22 | 23 | def title(self): 24 | return self.schema.title() 25 | 26 | def cook_time(self): 27 | return self.schema.cook_time() 28 | 29 | def prep_time(self): 30 | return self.schema.prep_time() 31 | 32 | def total_time(self): 33 | return self.schema.total_time() 34 | 35 | def yields(self): 36 | return self.schema.yields() 37 | 38 | def image(self): 39 | return self.schema.image() 40 | 41 | def ingredients(self): 42 | return self.schema.ingredients() 43 | 44 | def instructions(self): 45 | return self.schema.instructions() 46 | 47 | def ratings(self): 48 | return self.schema.ratings() 49 | 50 | def cuisine(self): 51 | return self.schema.cuisine() 52 | 53 | def category(self): 54 | return self.schema.category() 55 | -------------------------------------------------------------------------------- /recipe_scrapers/zeitwochenmarkt.py: -------------------------------------------------------------------------------- 1 | import extruct 2 | 3 | from ._abstract import AbstractScraper 4 | from ._utils import normalize_string 5 | 6 | 7 | class ZeitWochenmarkt(AbstractScraper): 8 | def __init__(self, url, **kwargs): 9 | AbstractScraper.__init__(self, url, **kwargs) 10 | data = extruct.extract( 11 | self.soup.prettify(), syntaxes=["json-ld"], errors="log", uniform=True 12 | ) 13 | for item in data["json-ld"]: 14 | if item.get("@type") == "ItemList": 15 | self.schema.data = item["itemListElement"][0]["item"] 16 | 17 | @classmethod 18 | def host(cls): 19 | return "zeit.de" 20 | 21 | def title(self): 22 | return self.schema.title() 23 | 24 | def total_time(self): 25 | return self.schema.total_time() 26 | 27 | def yields(self): 28 | return self.schema.yields() 29 | 30 | def image(self): 31 | return self.schema.image() 32 | 33 | def ingredients(self): 34 | class_name = "recipe-list-collection__special-ingredient" 35 | special_ingredients = [ 36 | normalize_string(item.text) 37 | for item in self.soup.find_all("p", {"class": class_name}) 38 | ] 39 | return special_ingredients + self.schema.ingredients() 40 | 41 | def instructions(self): 42 | class_name = "article__subheading article__subheading--recipe article__item" 43 | subset = self.soup.find("h2", {"class": class_name}) 44 | class_name = "paragraph article__item" 45 | return "\n".join( 46 | [ 47 | normalize_string(item.text) 48 | for item in subset.find_all_next("p", {"class": class_name}) 49 | ] 50 | ) 51 | 52 | def ratings(self): 53 | return self.schema.ratings() 54 | -------------------------------------------------------------------------------- /recipe_scrapers/finedininglovers.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class FineDiningLovers(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "finedininglovers.com" 9 | 10 | def title(self): 11 | return self.soup.find("div", {"class": "recipe-detail"}).find("h3").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "timing"})) 15 | 16 | def yields(self): 17 | yields = self.soup.find( 18 | "div", {"class": "field--name-field-recipe-serving-num"} 19 | ) 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients_parent = self.soup.find("div", {"class": "ingredients-box"}) 25 | ingredients = ingredients_parent.findAll( 26 | "div", {"class": "paragraph--type--recipe-ingredient"} 27 | ) 28 | 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions_parent = self.soup.find( 33 | "div", {"class": "field--name-field-recipe-para-steps"} 34 | ) 35 | instructions = instructions_parent.findAll( 36 | "div", {"class": "paragraph--type--recipe-step"} 37 | ) 38 | 39 | return "\n".join( 40 | [normalize_string(instruction.get_text()) for instruction in instructions] 41 | ) 42 | 43 | def image(self): 44 | image = self.soup.select_one(".image-zone picture img") 45 | image_url = image["data-src"].split("?")[0] 46 | image_base_url = "https://www.finedininglovers.com" 47 | 48 | return "{}{}".format(image_base_url, image_url) if image else None 49 | -------------------------------------------------------------------------------- /recipe_scrapers/rezeptwelt.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._exceptions import SchemaOrgException 3 | from ._utils import normalize_string 4 | 5 | 6 | class Rezeptwelt(AbstractScraper): 7 | @classmethod 8 | def host(cls): 9 | return "rezeptwelt.de" 10 | 11 | def author(self): 12 | return normalize_string(self.soup.find("span", {"id": "viewRecipeAuthor"}).text) 13 | 14 | def title(self): 15 | return self.soup.find("meta", {"property": "og:title"})["content"] 16 | 17 | def category(self): 18 | return self.schema.category() 19 | 20 | def total_time(self): 21 | return self.schema.total_time() 22 | 23 | def yields(self): 24 | return self.schema.yields() 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | content = self.soup.find("ol", {"itemprop": "recipeInstructions"}).findAll( 34 | "div", {"itemprop": "itemListElement"} 35 | ) 36 | res = "" 37 | for i in content: 38 | steps = i.findAll("span", {"itemprop": "text"}) 39 | for step in steps: 40 | res += normalize_string(step.text) + "\n" 41 | return res 42 | 43 | def ratings(self): 44 | return self.schema.ratings() 45 | 46 | def cuisine(self): 47 | try: 48 | return self.schema.cuisine() 49 | except SchemaOrgException: 50 | return None 51 | 52 | def description(self): 53 | return self.schema.description().replace( 54 | " Mehr Thermomix ® Rezepte auf www.rezeptwelt.de", "" 55 | ) 56 | 57 | def language(self): 58 | return self.soup.find("meta", {"property": "og:locale"})["content"] 59 | -------------------------------------------------------------------------------- /tests/test_mykitchen101.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.mykitchen101 import MyKitchen101 2 | from tests import ScraperTest 3 | 4 | 5 | class TestMyKitchen101Scraper(ScraperTest): 6 | 7 | scraper_class = MyKitchen101 8 | 9 | def test_host(self): 10 | self.assertEqual("mykitchen101.com", self.harvester_class.host()) 11 | 12 | def test_author(self): 13 | self.assertEqual("清闲廚房 团队", self.harvester_class.author()) 14 | 15 | def test_title(self): 16 | self.assertEqual("古早味迷你烤鸡蛋糕", self.harvester_class.title()) 17 | 18 | def test_yields(self): 19 | self.assertEqual("30 serving(s)", self.harvester_class.yields()) 20 | 21 | def test_image(self): 22 | self.assertEqual( 23 | "https://mykitchen101.com/wp-content/uploads/2020/11/mini-baked-egg-sponge-cake-mykitchen101-feature1.jpg", 24 | self.harvester_class.image(), 25 | ) 26 | 27 | def test_ingredients(self): 28 | self.assertEqual( 29 | [ 30 | "4 个 蛋 (A级)", 31 | "125 克 细砂糖", 32 | "⅛ 茶匙 细盐", 33 | "115 克 普通面粉", 34 | "20 克 玉米淀粉", 35 | "30 克 溶化牛油", 36 | ], 37 | self.harvester_class.ingredients(), 38 | ) 39 | 40 | def test_instructions(self): 41 | self.assertEqual( 42 | "1 把烤炉预热至200°C/395°F。\n2 把蛋轻轻打散,加入盐和和糖,打至混合。\n3 把1公升的清水和400毫升的热水(热水炉取出的热水)混合在比搅拌碗大的钢盆以调出约45°C/113°F的温水。\n4 把搅拌碗浸泡在温水里,以中高速把蛋打至浓稠 (约5分钟)。(温馨提示:隔着温水打蛋糊可以缩短打发蛋糊的时间。)\n5 把普通面粉和玉米淀粉混合过筛2次。\n6 慢慢把粉类加入蛋糊里,以低速混合,再用刮刀翻拌至均匀。慢慢加入溶化牛油,用刮刀轻轻翻拌至混合。\n7 把面糊倒入裱花袋里。\n8 把迷你玛芬蛋糕模 (直径 = 4.8 cm,深 = 2.2 cm) 铺上杯子蛋糕纸托,装入面糊直到80%满。\n9 放入已预热的烤炉,以190°C/375°F 烘烤20-22分钟,直到呈金黄色。(温馨提示:不同烤炉的温度不一样,烘烤时间只供参考,可依个自的烤炉调整烘烤的时间。如果第一批烤24个蛋糕,第二批只有6个,那么第二批的烘烤时间可以缩短,只要蛋糕表面呈金黄色就可以了。)\n10 把鸡蛋糕脱模后放在铁架上至冷却。", 43 | self.harvester_class.instructions(), 44 | ) 45 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/schemaorg_priority.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from recipe_scrapers.settings import settings 4 | 5 | from .._exceptions import SchemaOrgException 6 | from ._interface import PluginInterface 7 | 8 | 9 | class SchemaOrgPriorityPlugin(PluginInterface): 10 | """ 11 | Plugin that if put into use will check if there's Schema.org 12 | available on the page being scraped. 13 | 14 | If Schema.org is available, it will ignore the methods in the 15 | site-specific scraper and use the SchemaOrg instead. 16 | 17 | If SchemaOrg raises SchemaOrgException for some reason, the plugin 18 | will fallback to the method used in the site-specific scraper. 19 | 20 | In the ideal future every site implements Schema.org, but we are no there yet. 21 | """ 22 | 23 | run_on_hosts = ("*",) 24 | run_on_methods = ( 25 | "author", 26 | "title", 27 | "total_time", 28 | "yields", 29 | "image", 30 | "ingredients", 31 | "instructions", 32 | "ratings", 33 | "reviews", 34 | "links", 35 | "language", 36 | "nutrients", 37 | ) 38 | 39 | @classmethod 40 | def run(cls, decorated): 41 | @functools.wraps(decorated) 42 | def decorated_method_wrapper(self, *args, **kwargs): 43 | function = getattr(self.schema, decorated.__name__) 44 | if self.schema.data and function: 45 | # TODO: write logging. Configure logging. 46 | settings.logger.debug("TODO: write", exc_info=True) 47 | try: 48 | return function(*args, **kwargs) 49 | except SchemaOrgException: 50 | return decorated(self, *args, **kwargs) 51 | else: 52 | return decorated(self, *args, **kwargs) 53 | 54 | return decorated_method_wrapper 55 | -------------------------------------------------------------------------------- /recipe_scrapers/panelinha.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from ._abstract import AbstractScraper 4 | from ._utils import get_minutes, normalize_string 5 | 6 | INSTRUCTIONS_NUMBERING_REGEX = re.compile(r"^\d{1,2}\.\s*") # noqa 7 | 8 | 9 | class Panelinha(AbstractScraper): 10 | @classmethod 11 | def host(cls): 12 | return "panelinha.com.br" 13 | 14 | def title(self): 15 | return normalize_string(self.soup.find("h1").get_text()) 16 | 17 | def total_time(self): 18 | return get_minutes( 19 | self.soup.find("span", string="Tempo de preparo").nextSibling 20 | ) 21 | 22 | def ingredients(self): 23 | ingredients = self.soup.find("h4", string="Ingredientes").nextSibling.findAll( 24 | "li" 25 | ) 26 | 27 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 28 | 29 | def instructions(self): 30 | instructions = self.soup.find( 31 | "h4", string="Modo de preparo" 32 | ).nextSibling.findAll("li") 33 | 34 | instructions = [ 35 | normalize_string(instruction.get_text()) for instruction in instructions 36 | ] 37 | 38 | # Some recipes pages have a different html structure. 39 | if not instructions: 40 | instructions = self.soup.find( 41 | "h4", string="Modo de preparo" 42 | ).nextSibling.p.strings 43 | 44 | instructions = ( 45 | normalize_string(instruction) for instruction in instructions 46 | ) 47 | 48 | instructions = map( 49 | lambda step: INSTRUCTIONS_NUMBERING_REGEX.sub("", step), instructions 50 | ) 51 | 52 | return "\n".join(instructions) 53 | 54 | def yields(self): 55 | return normalize_string( 56 | self.soup.find("span", string="Serve").nextSibling.get_text() 57 | ) 58 | -------------------------------------------------------------------------------- /tests/test_cookeatshare.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.cookeatshare import CookEatShare 2 | from tests import ScraperTest 3 | 4 | 5 | class TestCookEatShare(ScraperTest): 6 | 7 | scraper_class = CookEatShare 8 | 9 | def test_host(self): 10 | self.assertEqual("cookeatshare.com", self.harvester_class.host()) 11 | 12 | def test_title(self): 13 | self.assertEqual("Pork Steak Vegetable Bake", self.harvester_class.title()) 14 | 15 | def test_image(self): 16 | self.assertEqual( 17 | "https://assets.cookeatshare.com/assets/recipe-art/stock/3/full-f1e2882559f1146065432f8bdd182440.png", 18 | self.harvester_class.image(), 19 | ) 20 | 21 | def test_total_time(self): 22 | self.assertEqual(None, self.harvester_class.total_time()) 23 | 24 | def test_ingredients(self): 25 | self.assertCountEqual( 26 | [ 27 | '4 med. potatoes, pared & cut lengthwise, in 1/4" slices', 28 | "1 lg. carrot, pared & sliced", 29 | '4 pork steaks, cut 1/2" thick', 30 | "1/2 c. water", 31 | "1 env. dry onion soup mix or possibly 1 can cream of onion soup", 32 | "2 tbsp. soy sauce", 33 | ], 34 | self.harvester_class.ingredients(), 35 | ) 36 | 37 | def test_instructions(self): 38 | return self.assertEqual( 39 | 'Place potatoes and carrots in bottom of 11" x 7 1/2" x 1 3/4" baking dish. Trim fat from steaks. In large skillet, cook trimmings till about 2 Tbsp. oil accumulates; throw away trimmings.\nIn warm oil, brown steaks well on both sides. In small saucepan, bring to boil. Spoon 1/2 of onion soup mix over the potatoes and carrots; top with steaks. Spoon remaining soup mix over. Cover, bake in 350 degree oven for 1 hour. Uncover and bake 10 min more. Makes 4 servings.', 40 | self.harvester_class.instructions(), 41 | ) 42 | -------------------------------------------------------------------------------- /tests/test_thewoksoflife.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.thewoksoflife import Thewoksoflife 2 | from tests import ScraperTest 3 | 4 | 5 | class TestThewoksoflifeScraper(ScraperTest): 6 | 7 | scraper_class = Thewoksoflife 8 | 9 | def test_host(self): 10 | self.assertEqual("thewoksoflife.com", self.harvester_class.host()) 11 | 12 | def test_canonical_url(self): 13 | self.assertEqual( 14 | "https://thewoksoflife.com/whole-wheat-mantou/", 15 | self.harvester_class.canonical_url(), 16 | ) 17 | 18 | def test_title(self): 19 | self.assertEqual( 20 | self.harvester_class.title(), "The Perfect Whole Wheat Mantou Recipe" 21 | ) 22 | 23 | def test_yields(self): 24 | self.assertEqual("12 serving(s)", self.harvester_class.yields()) 25 | 26 | def test_image(self): 27 | self.assertEqual( 28 | "https://thewoksoflife.com/wp-content/uploads/2018/01/whole-wheat-mantou-9-1.jpg", 29 | self.harvester_class.image(), 30 | ) 31 | 32 | def test_ingredients(self): 33 | self.assertCountEqual( 34 | [ 35 | "1 \u2154 cups warm milk ((400 ml))", 36 | "1 teaspoon active dry yeast ((3 grams))", 37 | "1 tablespoon sugar ((12 grams))", 38 | "2 \u00be cups all-purpose flour ((400 grams))", 39 | "1\u00bc to 1\u00bd cups whole wheat flour ((about 170-200 grams; how much you\u2019ll need is dependent on the humidity in your kitchen))", 40 | ], 41 | self.harvester_class.ingredients(), 42 | ) 43 | 44 | def test_instructions(self): 45 | self.assertTrue( 46 | self.harvester_class.instructions().startswith( 47 | "Heat the milk until warm to the touch (not hot). Then " 48 | ) 49 | ) 50 | self.assertEqual(len(self.harvester_class.instructions()), 1786) 51 | -------------------------------------------------------------------------------- /recipe_scrapers/bbcfood.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class BBCFood(AbstractScraper): 6 | @classmethod 7 | def host(self, domain="com"): 8 | return f"bbc.{domain}" 9 | 10 | def title(self): 11 | return normalize_string(self.soup.find("h1").get_text()) 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes( 17 | self.soup.find("p", {"class": "recipe-metadata__prep-time"}) 18 | ), 19 | get_minutes( 20 | self.soup.find("p", {"class": "recipe-metadata__cook-time"}) 21 | ), 22 | ] 23 | ) 24 | 25 | def yields(self): 26 | return get_yields(self.soup.find("p", {"class": "recipe-metadata__serving"})) 27 | 28 | def author(self): 29 | container = self.soup.find("div", {"class": "chef__name"}) 30 | if not container: 31 | return None 32 | 33 | author = container.a 34 | return author.text if author else None 35 | 36 | def image(self): 37 | container = self.soup.find(True, {"class": "recipe-media__image"}) 38 | if not container: 39 | return None 40 | 41 | image = container.parent.find("img", {"src": True}) 42 | return image["src"] if image else None 43 | 44 | def ingredients(self): 45 | ingredients = self.soup.findAll( 46 | "li", {"class": "recipe-ingredients__list-item"} 47 | ) 48 | 49 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 50 | 51 | def instructions(self): 52 | instructions = self.soup.findAll( 53 | "p", {"class": "recipe-method__list-item-text"} 54 | ) 55 | 56 | return "\n".join( 57 | [normalize_string(instruction.get_text()) for instruction in instructions] 58 | ) 59 | -------------------------------------------------------------------------------- /recipe_scrapers/justbento.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class JustBento(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "justbento.com" 9 | 10 | def title(self): 11 | expected_prefix = "Recipe: " 12 | title = self.soup.find("meta", {"property": "og:title", "content": True}) 13 | return title.get("content").replace(expected_prefix, "") 14 | 15 | def total_time(self): 16 | time = self.soup.find( 17 | "div", {"class": "field-name-taxonomy-vocabulary-2"} 18 | ).find("a", {"typeof": "skos:Concept"}) 19 | return get_minutes(time) 20 | 21 | def yields(self): 22 | return "1" 23 | 24 | def ingredients(self): 25 | ingredients = ( 26 | self.soup.find("div", {"class": "field-name-body"}).find("ul").findAll("li") 27 | ) 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | elements_after_title = ( 32 | self.soup.find("div", {"class": "field-name-body"}) 33 | .find("h3") 34 | .find_next_sibling("ul") 35 | .find_next_siblings() 36 | ) 37 | 38 | instructions = [] 39 | for element in elements_after_title: 40 | if instructions and element.name != "p": 41 | break 42 | if element.name == "p": 43 | instructions.append(element.get_text()) 44 | instructions = [ 45 | normalize_string(instruction) for instruction in instructions 46 | ] 47 | 48 | return "\n".join(instructions) if instructions else None 49 | 50 | def image(self): 51 | image = self.soup.find("div", {"class": "field-name-body"}).find( 52 | "img", {"class": "centerimg", "src": True} 53 | ) 54 | return image["src"] if image else None 55 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/schemaorg_fill.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | from ._interface import PluginInterface 7 | 8 | logging.basicConfig() 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class SchemaOrgFillPlugin(PluginInterface): 13 | """ 14 | If any of the methods listed is invoked on a scraper class 15 | that happens not to be implement and Schema.org is available 16 | attempt to return the result from the schema available. 17 | """ 18 | 19 | run_on_hosts = ("*",) 20 | run_on_methods = ( 21 | "author", 22 | "title", 23 | "total_time", 24 | "yields", 25 | "image", 26 | "ingredients", 27 | "instructions", 28 | "ratings", 29 | "reviews", 30 | "links", 31 | "language", 32 | "nutrients", 33 | ) 34 | 35 | @classmethod 36 | def run(cls, decorated): 37 | @functools.wraps(decorated) 38 | def decorated_method_wrapper(self, *args, **kwargs): 39 | logger.setLevel(settings.LOG_LEVEL) 40 | class_name = self.__class__.__name__ 41 | method_name = decorated.__name__ 42 | logger.debug( 43 | f"Decorating: {class_name}.{method_name}() with SchemaOrgFillPlugin" 44 | ) 45 | try: 46 | return decorated(self, *args, **kwargs) 47 | except NotImplementedError as e: 48 | function = getattr(self.schema, decorated.__name__) 49 | if self.schema.data and function: 50 | logger.info( 51 | f"{class_name}.{method_name}() seems to not be implemented but .schema is available! Attempting to return result from .schema." 52 | ) 53 | return function(*args, **kwargs) 54 | else: 55 | raise e 56 | 57 | return decorated_method_wrapper 58 | -------------------------------------------------------------------------------- /recipe_scrapers/afghankitchenrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields 3 | 4 | 5 | class AfghanKitchenRecipes(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "afghankitchenrecipes.com" 9 | 10 | def author(self): 11 | given_name = self.soup.find("h5", {"class": "given-name"}) 12 | return given_name and given_name.find("a", {"rel": "author"}).get_text() 13 | 14 | def title(self): 15 | return self.schema.title() 16 | 17 | def total_time(self): 18 | ready_in = self.soup.find("li", {"class": "ready-in"}) 19 | if not ready_in: 20 | return 21 | ready_text = ready_in.find("span", {"class": "value"}).get_text() 22 | if not ready_text or not ready_text.endswith("h"): 23 | return 24 | hours, minutes = ready_text[:-2].split(":") 25 | return int(hours) * 60 + int(minutes) 26 | 27 | def yields(self): 28 | servings = self.soup.find("li", {"class": "servings"}) 29 | if not servings: 30 | return 31 | return get_yields(servings) 32 | 33 | def image(self): 34 | return self.schema.image() 35 | 36 | def ingredients(self): 37 | ingredient_elements = self.soup.findAll("li", {"class": "ingredient"}) 38 | return [ 39 | element.get_text() for element in ingredient_elements if element.get_text() 40 | ] 41 | 42 | def instructions(self): 43 | instruction_elements = self.soup.findAll("p", {"class": "instructions"}) 44 | return "\n".join( 45 | [ 46 | element.get_text().strip() 47 | for element in instruction_elements 48 | if element.get_text() 49 | ] 50 | ) 51 | 52 | def ratings(self): 53 | rating = self.soup.find("meta", {"itemprop": "ratingValue"}) 54 | return rating and rating.get("content") and round(float(rating["content"]), 2) 55 | -------------------------------------------------------------------------------- /tests/test_acouplecooks.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.acouplecooks import ACoupleCooks 2 | from tests import ScraperTest 3 | 4 | 5 | # test recipe's URL 6 | # https://www.acouplecooks.com/garlic-butter-shrimp/ 7 | class TestACoupleCooks(ScraperTest): 8 | 9 | scraper_class = ACoupleCooks 10 | 11 | def test_host(self): 12 | self.assertEqual("acouplecooks.com", self.harvester_class.host()) 13 | 14 | def test_canonical_url(self): 15 | self.assertEqual( 16 | "https://www.acouplecooks.com/garlic-butter-shrimp/", 17 | self.harvester_class.canonical_url(), 18 | ) 19 | 20 | def test_title(self): 21 | self.assertEqual( 22 | "Garlic Butter Shrimp (Fast & Easy Dinner!)", self.harvester_class.title() 23 | ) 24 | 25 | def test_total_time(self): 26 | self.assertEqual(8, self.harvester_class.total_time()) 27 | 28 | def test_yields(self): 29 | self.assertEqual("4 serving(s)", self.harvester_class.yields()) 30 | 31 | def test_ingredients(self): 32 | self.assertEqual( 33 | [ 34 | "1 pound large shrimp, deveined (peeled or unpeeled)", 35 | "3 garlic cloves", 36 | "1/2 teaspoon kosher salt", 37 | "3 tablespoons butter", 38 | "2 lemon wedges", 39 | "Fresh cilantro or parsley, for garnish", 40 | ], 41 | self.harvester_class.ingredients(), 42 | ) 43 | 44 | def test_instructions(self): 45 | self.assertEqual( 46 | "If frozen, thaw the shrimp (see the notes above).\nMince the garlic.\nPat the shrimp dry. In a medium bowl, mix the shrimp with the garlic and salt.\nIn a large skillet, heat the butter on medium high heat. Cook the shrimp for 1 to 2 minutes per side until opaque and cooked through, turning them with tongs.\nSpritz with juice of the lemon wedges and serve immediately.", 47 | self.harvester_class.instructions(), 48 | ) 49 | --------------------------------------------------------------------------------