├── tests ├── library │ ├── __init__.py │ ├── test_exceptions.py │ └── test_utils.py ├── test_data │ ├── .gitkeep │ ├── test_settings_module │ │ ├── __init__.py │ │ └── test_settings.py │ └── schemaorg.testhtml ├── conftest.py ├── __init__.py ├── test__settings_module.py ├── test_schemaorg.py ├── test_mykitchen101.py ├── test_cookeatshare.py ├── test_thewoksoflife.py └── test_acouplecooks.py ├── recipe_scrapers ├── __version__.py ├── settings │ ├── template.py │ ├── v12_settings.py │ └── default.py ├── bonappetit.py ├── cookeatshare.py ├── amazingribs.py ├── globo.py ├── kochbar.py ├── abril.py ├── cdkitchen.py ├── foodandwine.py ├── acouplecooks.py ├── budgetbytes.py ├── foodnetwork.py ├── eatwhattonight.py ├── archanaskitchen.py ├── justonecookbook.py ├── giallozafferano.py ├── thenutritiouskitchen.py ├── practicalselfreliance.py ├── plugins │ ├── __init__.py │ ├── _interface.py │ ├── bcp47_validate.py │ ├── normalize_string.py │ ├── template.py │ ├── opengraph_image_fetch.py │ ├── schemaorg_priority.py │ └── schemaorg_fill.py ├── g750g.py ├── misya.py ├── tasty.py ├── chefkoch.py ├── cookpad.py ├── gousto.py ├── blueapron.py ├── cuisineaz.py ├── kuchniadomowa.py ├── marmiton.py ├── myrecipes.py ├── zenbelly.py ├── averiecooks.py ├── cybercook.py ├── hassenchef.py ├── justataste.py ├── livelytable.py ├── nytimes.py ├── ohsheglows.py ├── purelypope.py ├── skinnytaste.py ├── bakingsense.py ├── castironketo.py ├── hostthetoast.py ├── bakingmischeif.py ├── domesticateme.py ├── gimmesomeoven.py ├── littlespicejar.py ├── lovingitvegan.py ├── recipietineats.py ├── simplywhisked.py ├── sweetcsdesigns.py ├── thewoksoflife.py ├── vanillaandbean.py ├── watchwhatueat.py ├── ambitiouskitchen.py ├── bowlofdelicious.py ├── fifteenspatulas.py ├── halfbakedharvest.py ├── lecremedelacrumb.py ├── melskitchencafe.py ├── minimalistbaker.py ├── primaledgehealth.py ├── spendwithpennies.py ├── theclevercarrot.py ├── thekitchenmagpie.py ├── atelierdeschefs.py ├── eatsmarter.py ├── wholefoods.py ├── nourishedbynutrition.py ├── sallysbakingaddiction.py ├── yemek.py ├── alltomat.py ├── purplecarrot.py ├── bbcgoodfood.py ├── redhousespice.py ├── downshiftology.py ├── eatingbirdfood.py ├── realfoodtesco.py ├── jimcooksfoodgood.py ├── rainbowplantlife.py ├── headbangerskitchen.py ├── paleorunningmomma.py ├── vegrecipesofindia.py ├── indianhealthyrecipes.py ├── dr.py ├── hellofresh.py ├── innit.py ├── bettycrocker.py ├── steamykitchen.py ├── seriouseats.py ├── inspiralized.py ├── food.py ├── tudogostoso.py ├── franzoesischkochen.py ├── marthastewart.py ├── closetcooking.py ├── copykat.py ├── whatsgabycooking.py ├── rachlmansfield.py ├── heinzbrasil.py ├── przepisy.py ├── hundredandonecookbooks.py ├── springlane.py ├── thekitchn.py ├── paninihappy.py ├── thepioneerwoman.py ├── foodrepublic.py ├── tasteofhome.py ├── tastesoflizzyt.py ├── cookinglight.py ├── cucchiaio.py ├── kennymcgovern.py ├── timesofindia.py ├── eatingwell.py ├── jamieoliver.py ├── countryliving.py ├── reishunger.py ├── food52.py ├── onehundredonecookbooks.py ├── realsimple.py ├── wikicookbook.py ├── tastykitchen.py ├── cookieandkate.py ├── simplyquinoa.py ├── mybakingaddiction.py ├── saveur.py ├── forksoverknives.py ├── thevintagemixer.py ├── sallysblog.py ├── _factory.py ├── comidinhasdochef.py ├── mykitchen101.py ├── kingarthur.py ├── _exceptions.py ├── motherthyme.py ├── twopeasandtheirpod.py ├── simplyrecipes.py ├── momswithcrockpots.py ├── kwestiasmaku.py ├── streetkitchen.py ├── fitmencook.py ├── geniuskitchen.py ├── mykitchen101en.py ├── ig.py ├── bigoven.py ├── woop.py ├── yummly.py ├── southernliving.py ├── cookstr.py ├── joyfoodsunshine.py ├── nutritionbynathalie.py ├── heb.py ├── allrecipes.py ├── zeitwochenmarkt.py ├── finedininglovers.py ├── rezeptwelt.py ├── panelinha.py ├── bbcfood.py ├── justbento.py └── afghankitchenrecipes.py ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── new_scraper.md │ └── scraper_bug_report.md └── workflows │ ├── linters.yaml │ ├── publish.yaml │ └── unittests.yaml ├── MANIFEST.in ├── requirements-dev.txt ├── .flake8 ├── run_tests.py ├── .coveragerc ├── .pre-commit-config.yaml ├── templates ├── scraper.py └── test_scraper.py ├── LICENSE └── setup.py /tests/library/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_data/test_settings_module/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /recipe_scrapers/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "13.12.1" 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | 4 | exclude requirements-dev.txt 5 | recursive-exclude tests * 6 | -------------------------------------------------------------------------------- /tests/test_data/test_settings_module/test_settings.py: -------------------------------------------------------------------------------- 1 | SUPPRESS_EXCEPTIONS = True 2 | TEST_MODE = True 3 | META_HTTP_EQUIV = True 4 | # LOG_LEVEL = 20 5 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | black>=21.4b2 3 | coverage>=4.5.1 4 | flake8>=3.8.3 5 | flake8-printf-formatting>=1.1.0 6 | pre-commit>=2.6.0 7 | pytest>=6.1.1 8 | unittest-parallel>=1.5.0 9 | # language-tags>=1.0.0 10 | # tld>=0.12.3 11 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # FS002 comes from pugin "flake8-use-fstring" 3 | # and would error on `str.format()` usage 4 | ignore = E203, E266, E501, W503, FS002 5 | max-line-length = 88 6 | max-complexity = 18 7 | select = B,C,E,F,W,T4,B9 8 | exclude = tests/test_data/* 9 | -------------------------------------------------------------------------------- /run_tests.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | if __name__ == "__main__": 4 | run_tests_command = ( 5 | "unittest-parallel -t . -s tests --coverage --coverage-rcfile .coveragerc" 6 | ) 7 | subprocess.run(run_tests_command.split(" "), check=True, text=True) 8 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/template.py: -------------------------------------------------------------------------------- 1 | SUPPRESS_EXCEPTIONS = True 2 | 3 | 4 | # the most powerful feature is adding custom plugins 5 | # for example to add "inner-most" plugin: 6 | # PLUGINS += ( 7 | # "path.to.my.custom_plugin" 8 | # ) 9 | # 10 | # and to add "outer-most" plugin: 11 | # PLUGINS = ( 12 | # "path.to.my.custom_plugin", 13 | # ) + PLUGINS 14 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = recipe_scrapers 4 | relative_files = True 5 | 6 | omit = recipe_scrapers/_abstract.py 7 | recipe_scrapers/__init__.py 8 | recipe_scrapers/__version__.py 9 | 10 | [report] 11 | exclude_lines = 12 | pragma: no cover 13 | 14 | # Don't complain if tests don't hit defensive assertion code: 15 | raise AttributeError 16 | raise NotImplementedError 17 | 18 | ignore_errors = True 19 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/v12_settings.py: -------------------------------------------------------------------------------- 1 | # Settings that will make recipe-scrapers>=13.0.0 act almost identical as recipe-scrapers<13.0.0 2 | SUPPRESS_EXCEPTIONS = True 3 | META_HTTP_EQUIV = True 4 | ON_EXCEPTION_RETURN_VALUES = { 5 | "title": "", 6 | "total_time": 0, 7 | "yields": "", 8 | "image": "", 9 | "ingredients": [], 10 | "instructions": "", 11 | "ratings": -1, 12 | "reviews": None, 13 | "links": [], 14 | "language": "en", 15 | "nutrients": {}, 16 | } 17 | -------------------------------------------------------------------------------- /recipe_scrapers/bonappetit.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BonAppetit(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bonappetit.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return None 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /recipe_scrapers/cookeatshare.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CookEatShare(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cookeatshare.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return None 14 | 15 | def image(self): 16 | return self.schema.image() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /recipe_scrapers/amazingribs.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AmazingRibs(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "amazingribs.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://gitlab.com/pycqa/flake8.git 9 | rev: 3.8.3 10 | hooks: 11 | - id: flake8 12 | additional_dependencies: [flake8-use-fstring] 13 | - repo: https://github.com/pycqa/isort 14 | rev: 5.7.0 15 | hooks: 16 | - id: isort 17 | args: ["--profile", "black", "--filter-files"] 18 | - repo: https://github.com/psf/black 19 | rev: 19.3b0 20 | hooks: 21 | - id: black 22 | -------------------------------------------------------------------------------- /recipe_scrapers/globo.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Globo(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "receitas.globo.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/kochbar.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Kochbar(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "kochbar.de" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/abril.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Abril(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "claudia.abril.com.br" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/cdkitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CdKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cdkitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/foodandwine.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FoodAndWine(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "foodandwine.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/acouplecooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ACoupleCooks(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "acouplecooks.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/budgetbytes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BudgetBytes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "budgetbytes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/foodnetwork.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FoodNetwork(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "foodnetwork.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def author(self): 13 | return self.schema.author() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/eatwhattonight.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class EatWhatTonight(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "eatwhattonight.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/archanaskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ArchanasKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "archanaskitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/justonecookbook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JustOneCookbook(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "justonecookbook.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /tests/library/test_exceptions.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from recipe_scrapers import ( 4 | NoSchemaFoundInWildMode, 5 | WebsiteNotImplementedError, 6 | scrape_me, 7 | ) 8 | 9 | 10 | class TestExceptions(unittest.TestCase): 11 | def test_WebsiteNotImplementedError(self): 12 | with self.assertRaises(WebsiteNotImplementedError): 13 | scrape_me("https://example.com/recipe") 14 | 15 | def test_NoSchemaFoundInWildMode(self): 16 | exception = NoSchemaFoundInWildMode("example.com") 17 | 18 | self.assertEqual(exception.url, "example.com") 19 | self.assertEqual(exception.message, "No Recipe Schema found at example.com.") 20 | -------------------------------------------------------------------------------- /recipe_scrapers/giallozafferano.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class GialloZafferano(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ricette.giallozafferano.it" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def ratings(self): 25 | return self.schema.ratings() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/thenutritiouskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheNutritiousKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thenutritiouskitchen.co" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /.github/workflows/linters.yaml: -------------------------------------------------------------------------------- 1 | name: linters 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | linters: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: "3.x" 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install -r requirements-dev.txt 24 | - name: black and flake checks 25 | run: | 26 | black --check . 27 | flake8 --count . 28 | -------------------------------------------------------------------------------- /recipe_scrapers/practicalselfreliance.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PracticalSelfReliance(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "practicalselfreliance.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | # from .bcp47_validate import Bcp47ValidatePlugin 2 | from .exception_handling import ExceptionHandlingPlugin 3 | from .html_tags_stripper import HTMLTagStripperPlugin 4 | from .normalize_string import NormalizeStringPlugin 5 | from .opengraph_image_fetch import OpenGraphImageFetchPlugin 6 | from .schemaorg_fill import SchemaOrgFillPlugin 7 | from .schemaorg_priority import SchemaOrgPriorityPlugin 8 | 9 | __all__ = [ 10 | # "Bcp47ValidatePlugin", 11 | "ExceptionHandlingPlugin", 12 | "HTMLTagStripperPlugin", 13 | "NormalizeStringPlugin", 14 | "OpenGraphImageFetchPlugin", 15 | "SchemaOrgFillPlugin", 16 | "SchemaOrgPriorityPlugin", 17 | ] 18 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | def pytest_addoption(parser): 5 | parser.addoption( 6 | "--online", 7 | action="store_true", 8 | default=False, 9 | help="run unit tests against online web content", 10 | ) 11 | 12 | 13 | @pytest.fixture(scope="session", autouse=True) 14 | def configure_online(request): 15 | seen = {None} 16 | session = request.node 17 | online = request.config.getoption("--online") 18 | for item in session.items: 19 | cls = item.getparent(pytest.Class) 20 | if cls not in seen: 21 | if hasattr(cls.obj, "online"): 22 | cls.obj.online = online 23 | seen.add(cls) 24 | -------------------------------------------------------------------------------- /recipe_scrapers/g750g.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class G750g(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "750g.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/misya.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Misya(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "misya.info" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/tasty.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Tasty(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "tasty.co" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/chefkoch.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Chefkoch(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "chefkoch.de" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cookpad.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CookPad(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cookpad.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/gousto.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Gousto(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "gousto.co.uk" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/blueapron.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BlueApron(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "blueapron.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cuisineaz.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CuisineAZ(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cuisineaz.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/kuchniadomowa.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class KuchniaDomowa(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "kuchnia-domowa.pl" 8 | 9 | def title(self): 10 | return self.soup.find("h2").get_text().strip() 11 | 12 | def image(self): 13 | urls = self.soup.findAll("img", {"class": "article-img", "id": "article-img-1"}) 14 | return f"https:{urls[1]['src']}" 15 | 16 | def instructions(self): 17 | instructions = self.soup.find("div", {"id": "recipe-instructions"}).findAll( 18 | "li" 19 | ) 20 | instructions = [x.text for x in instructions] 21 | return "\n".join(instructions) 22 | -------------------------------------------------------------------------------- /recipe_scrapers/marmiton.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Marmiton(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "marmiton.org" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/myrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class MyRecipes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "myrecipes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/zenbelly.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class ZenBelly(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "zenbelly.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/averiecooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AverieCooks(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "averiecooks.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/cybercook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Cybercook(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cybercook.com.br" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/hassenchef.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Hassanchef(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "hassanchef.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/justataste.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JustATaste(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "justataste.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/livelytable.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LivelyTable(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "livelytable.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/nytimes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class NYTimes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "cooking.nytimes.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/ohsheglows.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class OhSheGlows(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ohsheglows.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/purelypope.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PurelyPope(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "purelypope.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/skinnytaste.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SkinnyTaste(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "skinnytaste.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bakingsense.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BakingSense(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "baking-sense.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/castironketo.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class CastIronKeto(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "castironketo.net" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/hostthetoast.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Hostthetoast(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "hostthetoast.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bakingmischeif.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BakingMischeif(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bakingmischief.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/domesticateme.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class DomesticateMe(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "domesticate-me.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/gimmesomeoven.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class GimmeSomeOven(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "gimmesomeoven.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/littlespicejar.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LittleSpiceJar(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "littlespicejar.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/lovingitvegan.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Lovingitvegan(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "lovingitvegan.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/recipietineats.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RecipieTinEats(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "recipietineats.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/simplywhisked.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SimplyWhisked(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "simplywhisked.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/sweetcsdesigns.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SweetCsDesigns(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "sweetcsdesigns.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/thewoksoflife.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Thewoksoflife(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thewoksoflife.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/vanillaandbean.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class VanillaAndBean(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "vanillaandbean.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/watchwhatueat.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class WatchWhatUEat(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "watchwhatueat.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/ambitiouskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AmbitiousKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "ambitiouskitchen.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/bowlofdelicious.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class BowlOfDelicious(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "bowlofdelicious.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/fifteenspatulas.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FifteenSpatulas(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "fifteenspatulas.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/halfbakedharvest.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HalfBakedHarvest(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "halfbakedharvest.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/lecremedelacrumb.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class LeCremeDeLaCrumb(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "lecremedelacrumb.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/melskitchencafe.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class MelsKitchenCafe(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "melskitchencafe.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/minimalistbaker.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Minimalistbaker(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "minimalistbaker.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/primaledgehealth.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PrimalEdgeHealth(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "primaledgehealth.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/spendwithpennies.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SpendWithPennies(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "spendwithpennies.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/theclevercarrot.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheCleverCarrot(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "theclevercarrot.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/thekitchenmagpie.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class TheKitchenMagPie(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "thekitchenmagpie.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/atelierdeschefs.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AtelierDesChefs(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "atelierdeschefs.fr" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | yields = self.soup.find("option", {"class": "yield"}) 17 | return f"{yields.get('value')} Servings" 18 | 19 | def ingredients(self): 20 | return self.schema.ingredients() 21 | 22 | def instructions(self): 23 | return self.schema.instructions() 24 | 25 | def ratings(self): 26 | return self.schema.ratings() 27 | -------------------------------------------------------------------------------- /recipe_scrapers/eatsmarter.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Eatsmarter(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"eatsmarter.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: "3.x" 16 | - name: Build the package 17 | run: | 18 | python -m pip install --user --upgrade setuptools wheel 19 | python setup.py sdist bdist_wheel 20 | - name: Publish a Python distribution to PyPI 21 | uses: pypa/gh-action-pypi-publish@master 22 | with: 23 | user: __token__ 24 | password: ${{ secrets.PYPI_API_TOKEN }} 25 | verbose: true 26 | -------------------------------------------------------------------------------- /recipe_scrapers/wholefoods.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class WholeFoods(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"wholefoodsmarket.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/nourishedbynutrition.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class NourishedByNutrition(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "nourishedbynutrition.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/sallysbakingaddiction.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SallysBakingAddiction(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "sallysbakingaddiction.com" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def ingredients(self): 22 | return self.schema.ingredients() 23 | 24 | def instructions(self): 25 | return self.schema.instructions() 26 | 27 | def ratings(self): 28 | return self.schema.ratings() 29 | -------------------------------------------------------------------------------- /recipe_scrapers/yemek.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Yemek(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "yemek.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/alltomat.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class AllTomat(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "alltommat.se" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/purplecarrot.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class PurpleCarrot(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "purplecarrot.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | return self.schema.ingredients() 24 | 25 | def instructions(self): 26 | return normalize_string(self.schema.instructions()) 27 | 28 | def nutrients(self): 29 | return self.schema.nutrients() 30 | -------------------------------------------------------------------------------- /recipe_scrapers/bbcgoodfood.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._abstract import AbstractScraper 4 | 5 | 6 | class BBCGoodFood(AbstractScraper): 7 | @classmethod 8 | def host(cls): 9 | return "bbcgoodfood.com" 10 | 11 | def title(self): 12 | return self.schema.title() 13 | 14 | def total_time(self): 15 | return self.schema.total_time() 16 | 17 | def yields(self): 18 | return self.schema.yields() 19 | 20 | def image(self): 21 | return self.schema.image() 22 | 23 | def ingredients(self): 24 | return self.schema.ingredients() 25 | 26 | def instructions(self): 27 | inst = str(self.schema.instructions()) 28 | instSoup = BeautifulSoup(inst, features="html.parser") 29 | return instSoup.text 30 | -------------------------------------------------------------------------------- /recipe_scrapers/redhousespice.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RedHouseSpice(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "redhousespice.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/downshiftology.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Downshiftology(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "downshiftology.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/eatingbirdfood.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class EatingBirdFood(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "eatingbirdfood.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/realfoodtesco.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RealFoodTesco(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "realfood.tesco.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/jimcooksfoodgood.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class JimCooksFoodGood(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "jimcooksfoodgood.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/rainbowplantlife.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class RainbowPlantLife(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "rainbowplantlife.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/headbangerskitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HeadbangersKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "headbangerskitchen.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/paleorunningmomma.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class PaleoRunningMomma(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "paleorunningmomma.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/vegrecipesofindia.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class VegRecipesOfIndia(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "vegrecipesofindia.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /recipe_scrapers/indianhealthyrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class IndianHealthyRecipes(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "indianhealthyrecipes.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | return self.schema.instructions() 29 | 30 | def ratings(self): 31 | return self.schema.ratings() 32 | -------------------------------------------------------------------------------- /tests/library/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from recipe_scrapers._utils import get_minutes 4 | 5 | 6 | class TestUtils(unittest.TestCase): 7 | def test_get_minutes_english_description(self): 8 | text = "1 hour 15 mins" 9 | result = get_minutes(text) 10 | 11 | assert result == 75 12 | 13 | def test_get_minutes_english_abbreviation(self): 14 | text = "3h10m" 15 | self.assertEqual(190, get_minutes(text)) 16 | 17 | def test_get_minutes_short_iso_format(self): 18 | text = "PT2H30M" 19 | self.assertEqual(150, get_minutes(text)) 20 | 21 | def test_get_minutes_long_iso_format(self): 22 | text = "P0DT1H10M" 23 | self.assertEqual(70, get_minutes(text)) 24 | 25 | def test_get_minutes_int_in_string_literal(self): 26 | text = "90" 27 | self.assertEqual(90, get_minutes(text)) 28 | -------------------------------------------------------------------------------- /recipe_scrapers/dr.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Dr(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "dr.dk" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def image(self): 19 | return self.schema.image() 20 | 21 | def language(self): 22 | meta_language = self.soup.find( 23 | "meta", 24 | attrs={"name": lambda x: x and x.lower() == "language", "content": True}, 25 | ) 26 | 27 | return meta_language.get("content") 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new_scraper.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New website scraper request 3 | about: Add support for a new recipe website 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | We always enjoy adding support for new websites to the library! 11 | 12 | To help us out, please check that recipes published on the website you're requesting are public (we can't currently scrape recipes that require an account login) and add sample recipe URL(s) below: 13 | 14 | - https:// ... 15 | 16 | Can you write Python and would you like to help add the scraper yourself? We'd be glad for your assistance! We can provide you with guidance and code review in return. If so, tick any of the relevant boxes below: 17 | 18 | - [ ] I'd like to try adding this scraper myself 19 | - [ ] I'd like guidance to help me develop a scraper 20 | - [ ] I'd prefer if the `recipe-scrapers` community try to add this 21 | -------------------------------------------------------------------------------- /recipe_scrapers/hellofresh.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class HelloFresh(AbstractScraper): 5 | @classmethod 6 | def host(self, domain="com"): 7 | return f"hellofresh.{domain}" 8 | 9 | def title(self): 10 | return self.schema.title() 11 | 12 | def total_time(self): 13 | return self.schema.total_time() 14 | 15 | def yields(self): 16 | return self.schema.yields() 17 | 18 | def ingredients(self): 19 | return self.schema.ingredients() 20 | 21 | def instructions(self): 22 | return self.schema.instructions() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def nutrients(self): 28 | return self.schema.nutrients() 29 | 30 | def cuisine(self): 31 | return self.schema.cuisine() 32 | 33 | def category(self): 34 | return self.schema.category() 35 | -------------------------------------------------------------------------------- /recipe_scrapers/innit.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | """ 4 | Note that innit hosts recipes for several companies. I found it while looking at centralmarket.com 5 | """ 6 | 7 | 8 | class Innit(AbstractScraper): 9 | @classmethod 10 | def host(self, domain="com"): 11 | return f"innit.{domain}" 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | return self.schema.instructions() 30 | 31 | def ratings(self): 32 | return self.schema.ratings() 33 | 34 | def nutrients(self): 35 | return self.schema.nutrients() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/bettycrocker.py: -------------------------------------------------------------------------------- 1 | # BettyCrocker.com scraper 2 | # Written by G.D. Wallters 3 | # Freely released the code to recipe_scraper group 4 | # 18 January, 2020 5 | # ======================================================= 6 | 7 | 8 | from ._abstract import AbstractScraper 9 | 10 | 11 | class BettyCrocker(AbstractScraper): 12 | @classmethod 13 | def host(cls): 14 | return "bettycrocker.com" 15 | 16 | def title(self): 17 | return self.schema.title() 18 | 19 | def total_time(self): 20 | return self.schema.total_time() 21 | 22 | def yields(self): 23 | return self.schema.yields() 24 | 25 | def image(self): 26 | return self.schema.image() 27 | 28 | def ingredients(self): 29 | return self.schema.ingredients() 30 | 31 | def instructions(self): 32 | return self.schema.instructions() 33 | 34 | def ratings(self): 35 | return self.schema.ratings() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/steamykitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class SteamyKitchen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "steamykitchen.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | # Recipe section and schema have no image so stealing from the page 23 | return self.soup.find("img")["src"] 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | return self.schema.instructions() 30 | 31 | def ratings(self): 32 | # Schema has no ratings and I can't see any near the recipe 33 | return None 34 | -------------------------------------------------------------------------------- /templates/scraper.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Template(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "example.com" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def category(self): 16 | return self.schema.category() 17 | 18 | def total_time(self): 19 | return self.schema.total_time() 20 | 21 | def yields(self): 22 | return self.schema.yields() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | 36 | def cuisine(self): 37 | return self.schema.cuisine() 38 | 39 | def description(self): 40 | return self.schema.description() 41 | -------------------------------------------------------------------------------- /recipe_scrapers/seriouseats.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields 3 | 4 | 5 | class SeriousEats(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "seriouseats.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | recipe_servings = self.soup.find("div", {"class": "recipe-serving"}) 21 | recipe_yield = self.soup.find("div", {"class": "recipe-yield"}) 22 | return get_yields( 23 | (recipe_servings or recipe_yield).find("span", {"class": "meta-text__data"}) 24 | ) 25 | 26 | def ingredients(self): 27 | return self.schema.ingredients() 28 | 29 | def instructions(self): 30 | return self.schema.instructions() 31 | 32 | def ratings(self): 33 | return self.schema.ratings() 34 | -------------------------------------------------------------------------------- /recipe_scrapers/inspiralized.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Inspiralized(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "inspiralized.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("span", {"itemprop": "totalTime"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("span", {"itemprop": "servingSize"})) 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("li", {"class": "ingredient"}) 21 | 22 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 23 | 24 | def instructions(self): 25 | instructions = self.soup.findAll("li", {"class": "instruction"}) 26 | 27 | return "\n".join( 28 | [normalize_string(instruction.get_text()) for instruction in instructions] 29 | ) 30 | -------------------------------------------------------------------------------- /recipe_scrapers/food.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Food(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "food.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "recipe-facts__time"})) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.find("div", {"class": "recipe-facts__servings"}).get_text() 19 | ) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.findAll("li", {"class": "recipe-ingredients__item"}) 23 | 24 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 25 | 26 | def instructions(self): 27 | instructions = self.soup.findAll("li", {"class": "recipe-directions__step"}) 28 | 29 | return "\n".join([instruction.get_text() for instruction in instructions]) 30 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/_interface.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Iterable 3 | 4 | 5 | class PluginInterface(ABC): 6 | """ 7 | Interface that all "Plugins" (including the ones written by programmers 8 | using the package) should implement. 9 | 10 | Every plugin should have the following 2 methods implemented: 11 | 12 | - should_run 13 | - run 14 | """ 15 | 16 | run_on_hosts: Iterable[str] = ("*",) 17 | run_on_methods: Iterable[str] = ("title",) 18 | 19 | @classmethod 20 | @abstractmethod 21 | def run(cls, decorated): 22 | pass 23 | 24 | @classmethod 25 | def should_run(cls, host, method): 26 | return cls._should_run_host_check(host) and cls._should_run_method_check(method) 27 | 28 | @classmethod 29 | def _should_run_host_check(cls, host): 30 | return "*" in cls.run_on_hosts or host in cls.run_on_hosts 31 | 32 | @classmethod 33 | def _should_run_method_check(cls, method): 34 | return method in cls.run_on_methods 35 | -------------------------------------------------------------------------------- /recipe_scrapers/tudogostoso.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class TudoGostoso(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tudogostoso.com.br" 9 | 10 | def title(self): 11 | return normalize_string(self.soup.find("h1").get_text()) 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("time", {"class": "dt-duration"})) 15 | 16 | def ingredients(self): 17 | ingredients_html = self.soup.findAll("span", {"class": "p-ingredient"}) 18 | 19 | return [ 20 | normalize_string(ingredient.get_text()) for ingredient in ingredients_html 21 | ] 22 | 23 | def instructions(self): 24 | instructions_html = self.soup.findAll( 25 | "div", {"class": "instructions e-instructions"} 26 | ) 27 | 28 | return "\n".join( 29 | normalize_string(instruction.get_text()) 30 | for instruction in instructions_html 31 | ) 32 | -------------------------------------------------------------------------------- /.github/workflows/unittests.yaml: -------------------------------------------------------------------------------- 1 | name: unittests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, macos-latest, windows-latest] 17 | python-version: ["3.7", "3.8", "3.9", "3.10"] 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r requirements-dev.txt 28 | - name: Unittest and Coverage Report 29 | run: | 30 | python run_tests.py 31 | - name: coveralls.io 32 | uses: AndreMiras/coveralls-python-action@develop 33 | with: 34 | # coveralls repo token 35 | github-token: "SmlfzlVJy4ow55rduU7IU5GmmFCfAdGeq" 36 | -------------------------------------------------------------------------------- /recipe_scrapers/franzoesischkochen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class FranzoesischKochen(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "franzoesischkochen.de" 8 | 9 | def author(self): 10 | # TODO: check to see whether the pages begin using 'name' (lowercase initial) 11 | # if they do, then we can use self.schema.author() instead here 12 | return self.schema.data.get("author").get("Name") 13 | 14 | def title(self): 15 | return self.schema.title() 16 | 17 | def total_time(self): 18 | return self.schema.total_time() 19 | 20 | def yields(self): 21 | # TODO: can recipe yields / servings be retrieved from these pages? 22 | return None 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | -------------------------------------------------------------------------------- /recipe_scrapers/marthastewart.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes 3 | 4 | 5 | class MarthaStewart(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "marthastewart.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | s = ( 15 | self.soup.findAll("div", {"class": "two-subcol-content-wrapper"})[0] 16 | .find("div", {"class": "recipe-meta-item-body"}) 17 | .text.strip() 18 | ) 19 | return get_minutes(s) 20 | 21 | def yields(self): 22 | return ( 23 | self.soup.findAll("div", {"class": "two-subcol-content-wrapper"})[1] 24 | .find("div", {"class": "recipe-meta-item-body"}) 25 | .text.strip() 26 | ) 27 | 28 | def ingredients(self): 29 | return self.schema.ingredients() 30 | 31 | def instructions(self): 32 | return self.schema.instructions() 33 | 34 | def ratings(self): 35 | return self.schema.ratings() 36 | -------------------------------------------------------------------------------- /recipe_scrapers/closetcooking.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class ClosetCooking(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "closetcooking.com" 9 | 10 | def title(self): 11 | return normalize_string( 12 | self.soup.find("h1", {"class": "entry-title"}).get_text() 13 | ) 14 | 15 | def total_time(self): 16 | return get_minutes(self.soup.find(itemprop="totalTime").parent) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find(itemprop="recipeYield").parent) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.findAll("li", {"itemprop": "recipeIngredient"}) 23 | 24 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 25 | 26 | def instructions(self): 27 | instructions = self.soup.findAll("li", {"itemprop": "recipeInstructions"}) 28 | 29 | return "\n".join( 30 | [normalize_string(instruction.get_text()) for instruction in instructions] 31 | ) 32 | -------------------------------------------------------------------------------- /recipe_scrapers/copykat.py: -------------------------------------------------------------------------------- 1 | # copykat.py 2 | # Written by G.D. Wallters 3 | # Freely released the code to recipe_scraper group 4 | # 8 February, 2020 5 | # ======================================================= 6 | 7 | 8 | from ._abstract import AbstractScraper 9 | from ._utils import normalize_string 10 | 11 | 12 | class CopyKat(AbstractScraper): 13 | @classmethod 14 | def host(cls): 15 | return "copykat.com" 16 | 17 | def title(self): 18 | return self.schema.title() 19 | 20 | def total_time(self): 21 | return self.schema.total_time() 22 | 23 | def yields(self): 24 | return self.schema.yields() 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | 35 | def ratings(self): 36 | return self.schema.ratings() 37 | 38 | def description(self): 39 | d = normalize_string(self.soup.find("span", {"style": "display: block;"}).text) 40 | 41 | return d if d else None 42 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/bcp47_validate.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from language_tags import tags 4 | 5 | from ._interface import PluginInterface 6 | 7 | 8 | class Bcp47ValidatePlugin(PluginInterface): 9 | """ 10 | If you wish to use this plugin make sure you 11 | pip install language-tags>=1.0.0 12 | 13 | Validates if the value returned by .language() is a truthfully a language abbreviation 14 | For more info read https://github.com/OnroerendErfgoed/language-tags and the corresponding links there 15 | 16 | - https://tools.ietf.org/html/bcp47 17 | - https://tools.ietf.org/html/rfc5646 18 | - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 19 | """ 20 | 21 | run_on_hosts = ("*",) 22 | run_on_methods = ("language",) 23 | 24 | @classmethod 25 | def run(cls, decorated): 26 | @functools.wraps(decorated) 27 | def decorated_method_wrapper(self, *args, **kwargs): 28 | tag = tags.tag(decorated(self, *args, **kwargs)) 29 | return str(tag) if tag.valid else None 30 | 31 | return decorated_method_wrapper 32 | -------------------------------------------------------------------------------- /recipe_scrapers/whatsgabycooking.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class WhatsGabyCooking(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "whatsgabycooking.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("p", {"class": "header-recipe-time"})) 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 21 | 22 | return [ 23 | normalize_string(ingredient.get_text()) 24 | for ingredient in ingredients 25 | if len(ingredient) > 0 26 | ] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "wprm-recipe-instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/rachlmansfield.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes 3 | 4 | 5 | class RachlMansfield(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "rachlmansfield.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | # Total time is not reported correctly by the schema data. Using the sum of the prep and cook times 18 | # as a workaround instead. 19 | prep_time = get_minutes(self.schema.data.get("prepTime")) or 0 20 | cook_time = get_minutes(self.schema.data.get("cookTime")) or 0 21 | return prep_time + cook_time 22 | 23 | def yields(self): 24 | return self.schema.yields() 25 | 26 | def image(self): 27 | return self.schema.image() 28 | 29 | def ingredients(self): 30 | return self.schema.ingredients() 31 | 32 | def instructions(self): 33 | return self.schema.instructions() 34 | 35 | def ratings(self): 36 | return self.schema.ratings() 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Hristo Harsev 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /recipe_scrapers/heinzbrasil.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class HeinzBrasil(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "heinzbrasil.com.br" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "krRDPrecName"}).get_text() 12 | 13 | def total_time(self): 14 | return 0 15 | 16 | def image(self): 17 | return self.soup.find("img", {"class": "krBanImg"})["src"] 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("div", {"class": "krRDPIngreListText"}) 21 | 22 | return [ 23 | normalize_string( 24 | "{} {}".format(ingredient["qty"], ingredient["ingredientname"]) 25 | ) 26 | for ingredient in ingredients 27 | ] 28 | 29 | def instructions(self): 30 | instructions = ( 31 | self.soup.find("div", {"class": "krRecipeMakeItText"}) 32 | .findNext("div", {"class": "class"}) 33 | .nextSibling 34 | ) 35 | return normalize_string(instructions.get_text()) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/przepisy.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Przepisy(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "przepisy.pl" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "time-count"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("div", {"class": "person-count"})) 18 | 19 | def ingredients(self): 20 | ingredients = self.soup.findAll("span", {"class": "text-bg-white"}) 21 | 22 | return [ 23 | normalize_string(i.get_text()) + " " + normalize_string(j.get_text()) 24 | for i, j in zip(ingredients[0::2], ingredients[1::2]) 25 | ] 26 | 27 | def instructions(self): 28 | instructions = self.soup.findAll("p", {"class": "step-info-description"}) 29 | 30 | return "\n".join( 31 | [normalize_string(instruction.get_text()) for instruction in instructions] 32 | ) 33 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import pytest 5 | 6 | 7 | class ScraperTest(unittest.TestCase): 8 | 9 | maxDiff = None 10 | online = False 11 | test_file_name = None 12 | 13 | def setUp(self): 14 | os.environ[ 15 | "RECIPE_SCRAPERS_SETTINGS" 16 | ] = "tests.test_data.test_settings_module.test_settings" 17 | 18 | test_file_name = ( 19 | self.test_file_name 20 | if self.test_file_name 21 | else self.scraper_class.__name__.lower() 22 | ) 23 | with open( 24 | "tests/test_data/{}.testhtml".format(test_file_name), encoding="utf-8" 25 | ) as testfile: 26 | self.harvester_class = self.scraper_class(testfile) 27 | canonical_url = self.harvester_class.canonical_url() 28 | if self.online: 29 | if not canonical_url: 30 | pytest.skip( 31 | f"could not find canonical url for online test of scraper '{self.scraper_class.__name__}'" 32 | ) 33 | self.harvester_class = self.scraper_class(url=canonical_url) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/hundredandonecookbooks.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class HundredAndOneCookbooks(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "101cookbooks.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.findAll("div", {"class": "wprm-recipe-time"})[-1].get_text() 16 | ) 17 | 18 | def yields(self): 19 | return get_yields( 20 | self.soup.findAll("div", {"class": "wprm-recipe-time"})[0].get_text() 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "wprm-recipe-instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/normalize_string.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | from .._utils import normalize_string 7 | from ._interface import PluginInterface 8 | 9 | logging.basicConfig() 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class NormalizeStringPlugin(PluginInterface): 14 | """ 15 | Explicitly run the output from the methods listed through normalize_string 16 | """ 17 | 18 | decorate_hosts = ("*",) 19 | run_on_methods = ("title",) 20 | 21 | @classmethod 22 | def run(cls, decorated): 23 | @functools.wraps(decorated) 24 | def decorated_method_wrapper(self, *args, **kwargs): 25 | # TODO: write logging. Configure logging. 26 | logger.setLevel(settings.LOG_LEVEL) 27 | class_name = self.__class__.__name__ 28 | method_name = decorated.__name__ 29 | logger.debug( 30 | f"Decorating: {class_name}.{method_name}() with NormalizeStringPlugin" 31 | ) 32 | 33 | return normalize_string(decorated(self, *args, **kwargs)) 34 | 35 | return decorated_method_wrapper 36 | -------------------------------------------------------------------------------- /recipe_scrapers/springlane.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | 3 | 4 | class Springlane(AbstractScraper): 5 | @classmethod 6 | def host(cls): 7 | return "springlane.de" 8 | 9 | def author(self): 10 | return self.schema.author() 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def category(self): 16 | return self.schema.category() 17 | 18 | def cuisine(self): 19 | return self.schema.cuisine() 20 | 21 | def total_time(self): 22 | return self.schema.total_time() 23 | 24 | def cook_time(self): 25 | return self.schema.cook_time() 26 | 27 | def prep_time(self): 28 | return self.schema.prep_time() 29 | 30 | def yields(self): 31 | return self.schema.yields() 32 | 33 | def image(self): 34 | return self.schema.image() 35 | 36 | def nutrients(self): 37 | return self.schema.nutrients() 38 | 39 | def ingredients(self): 40 | return self.schema.ingredients() 41 | 42 | def instructions(self): 43 | return self.schema.instructions() 44 | 45 | def ratings(self): 46 | return self.schema.ratings() 47 | -------------------------------------------------------------------------------- /recipe_scrapers/thekitchn.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TheKitchn(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thekitchn.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "Recipe__title"}).get_text() 12 | 13 | def total_time(self): 14 | elements = self.soup.findAll("p", {"class": "Recipe__timeEntry"}) 15 | return sum([get_minutes(element) for element in elements]) 16 | 17 | def yields(self): 18 | return get_yields( 19 | self.soup.find("p", {"class": "jsx-1778438071 Recipe__yield"}) 20 | ) 21 | 22 | def ingredients(self): 23 | ingredients = self.soup.findAll("li", {"class": "Recipe__ingredient"}) 24 | 25 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 26 | 27 | def instructions(self): 28 | instructions = self.soup.findAll("li", {"class": "Recipe__instructionStep"}) 29 | 30 | return "\n".join( 31 | [normalize_string(instruction.get_text()) for instruction in instructions] 32 | ) 33 | -------------------------------------------------------------------------------- /recipe_scrapers/paninihappy.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class PaniniHappy(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "paninihappy.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("span", {"class": "duration"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("span", {"class": "yield"})) 18 | 19 | def image(self): 20 | image = self.soup.find("img", {"class": "post_image", "src": True}) 21 | return image["src"] if image else None 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("li", {"class": "instruction"}) 30 | 31 | return "\n".join( 32 | [normalize_string(instruction.get_text()) for instruction in instructions] 33 | ) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/thepioneerwoman.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class ThePioneerWoman(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thepioneerwoman.com" 9 | 10 | def title(self): 11 | return self.soup.find("h3", {"class": "recipe-title"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(dd) 17 | for dd in self.soup.find( 18 | "div", {"class": "recipe-summary-time"} 19 | ).findAll("dd") 20 | ] 21 | ) 22 | 23 | def yields(self): 24 | return get_yields(self.soup.find("span", {"itemprop": "recipeYield"})) 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.find("ul", {"class": "list-ingredients"}).findAll("li") 28 | 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.findAll("div", {"class": "panel-body"})[-1] 33 | 34 | return normalize_string(instructions.get_text()).replace(".", ".\n") 35 | -------------------------------------------------------------------------------- /recipe_scrapers/foodrepublic.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class FoodRepublic(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "foodrepublic.com" 9 | 10 | def title(self): 11 | return self.soup.find("h3", {"class": "recipe-title"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(self.soup.find("li", {"class": "prep-time"})), 17 | get_minutes(self.soup.find("li", {"class": "cook-time"})), 18 | ] 19 | ) 20 | 21 | def yields(self): 22 | return get_yields(self.soup.find("span", {"itemprop": "recipeYield"})) 23 | 24 | def ingredients(self): 25 | ingredients = self.soup.findAll("li", {"itemprop": "recipeIngredient"}) 26 | 27 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 28 | 29 | def instructions(self): 30 | instructions = self.soup.find("div", {"class": "directions"}).findAll("li") 31 | 32 | return "\n".join( 33 | [normalize_string(instruction.get_text()) for instruction in instructions] 34 | ) 35 | -------------------------------------------------------------------------------- /recipe_scrapers/tasteofhome.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class TasteOfHome(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tasteofhome.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return self.schema.yields() 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | return self.schema.ingredients() 24 | 25 | def instructions(self): 26 | instructions = self.soup.findAll("li", {"class": "recipe-directions__item"}) 27 | if instructions: 28 | return "\n".join( 29 | [ 30 | normalize_string(instruction.get_text()) 31 | for instruction in instructions 32 | ] 33 | ) 34 | else: 35 | # In case our HTML parsing doesn't find any instructions, fall back to what the schema provides. 36 | return self.schema.instructions() 37 | 38 | def ratings(self): 39 | return self.schema.ratings() 40 | -------------------------------------------------------------------------------- /recipe_scrapers/tastesoflizzyt.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TastesOfLizzyT(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tastesoflizzyt.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "wprm-recipe-total-time-container"}) 16 | ) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find("span", {"class": "wprm-recipe-servings"})) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.find( 23 | "ul", {"class": "wprm-recipe-ingredients"} 24 | ).findAll("li") 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.find( 30 | "ul", {"class": "wprm-recipe-instructions"} 31 | ).findAll("li") 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/cookinglight.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_yields, normalize_string 3 | 4 | 5 | class CookingLight(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cookinglight.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | return self.schema.total_time() 15 | 16 | def yields(self): 17 | return get_yields(self.schema.yields()) 18 | 19 | def image(self): 20 | return self.schema.image() 21 | 22 | def ingredients(self): 23 | ingredients = self.soup.find("div", {"class": "ingredients"}).ul.findAll("li") 24 | return "\n".join( 25 | [normalize_string(ingredient.get_text()) for ingredient in ingredients] 26 | ) 27 | 28 | def instructions(self): 29 | instructions = self.soup.find("div", {"class": "recipe-instructions"}).findAll( 30 | "div", {"class": "step"} 31 | ) 32 | return "\n".join([normalize_string(instr.get_text()) for instr in instructions]) 33 | 34 | def ratings(self): 35 | try: 36 | return self.schema.ratings() 37 | except Exception: 38 | return None 39 | -------------------------------------------------------------------------------- /recipe_scrapers/cucchiaio.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields 3 | 4 | 5 | class Cucchiaio(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cucchiaio.it" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | block = self.soup.find("div", {"class": "scheda-ricetta-new"}) 18 | if block: 19 | return sum(map(get_minutes, block.findAll("tr"))) 20 | return 0 21 | 22 | def yields(self): 23 | header = self.soup.find("td", text="PORZIONI") 24 | if header: 25 | value = header.find_next("td") 26 | return get_yields(value) 27 | return None 28 | 29 | def image(self): 30 | data = self.soup.find("div", {"class": "auto"}).find("img", {"class": "image"}) 31 | if data: 32 | data = data.get("src") 33 | return data 34 | 35 | def ingredients(self): 36 | return self.schema.ingredients() 37 | 38 | def instructions(self): 39 | return self.schema.instructions() 40 | 41 | def ratings(self): 42 | return None 43 | -------------------------------------------------------------------------------- /recipe_scrapers/kennymcgovern.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class KennyMcGovern(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "kennymcgovern.com" 9 | 10 | def title(self): 11 | return self.soup.find("div", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/timesofindia.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class TimesOfIndia(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "recipes.timesofindia.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | ingredients = self.soup.find_all("label", attrs={"class": "clearfix"}) 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | return self.schema.instructions() 32 | 33 | def ratings(self): 34 | return self.schema.ratings() 35 | 36 | def language(self): 37 | meta_language = self.soup.find("meta", attrs={"http-equiv": "content-language"}) 38 | 39 | return normalize_string(meta_language.get("content")) 40 | 41 | def cuisine(self): 42 | return self.schema.cuisine() 43 | -------------------------------------------------------------------------------- /tests/test__settings_module.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from recipe_scrapers.settings import settings 5 | 6 | 7 | class SettingsModuleTest(unittest.TestCase): 8 | def test_default_settings(self): 9 | 10 | os.environ["RECIPE_SCRAPERS_SETTINGS"] = "recipe_scrapers.settings.default" 11 | 12 | self.assertTrue( 13 | len(settings.PLUGINS) > 0, 14 | "There should be some plugins in the default project's settings", 15 | ) 16 | 17 | self.assertFalse( 18 | settings.SUPPRESS_EXCEPTIONS, 19 | "SUPPRESS_EXCEPTIONS should be set to False in the project's default settings", 20 | ) 21 | 22 | def test_settings_change_when_new_module_set(self): 23 | self.assertFalse( 24 | settings.SUPPRESS_EXCEPTIONS, 25 | "SUPPRESS_EXCEPTIONS should be set to False in the project's default settings", 26 | ) 27 | 28 | os.environ[ 29 | "RECIPE_SCRAPERS_SETTINGS" 30 | ] = "tests.test_data.test_settings_module.test_settings" 31 | 32 | self.assertTrue( 33 | settings.SUPPRESS_EXCEPTIONS, 34 | "SUPPRESS_EXCEPTIONS should be set to True after settings are changed with the testing ones", 35 | ) 36 | -------------------------------------------------------------------------------- /recipe_scrapers/eatingwell.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string, get_minutes, get_yields 3 | 4 | 5 | class EatingWell(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "eatingwell.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def image(self): 14 | return self.schema.image() 15 | 16 | def ingredients(self): 17 | return self.schema.ingredients() 18 | 19 | def instructions(self): 20 | return self.schema.instructions() 21 | 22 | def total_time(self): 23 | div = self.soup.findAll("div", {"class": "recipe-meta-item"}) 24 | d = { 25 | normalize_string(key): normalize_string(value) 26 | for key, value in [i.text.split(":") for i in div] 27 | if value is not None 28 | } 29 | return get_minutes(d.get("total")) 30 | 31 | def yields(self): 32 | div = self.soup.findAll("div", {"class": "recipe-meta-item"}) 33 | d = { 34 | normalize_string(key): normalize_string(value) 35 | for key, value in (i.text.split(":") for i in div) 36 | if value is not None 37 | } 38 | return get_yields(d.get("Servings")) 39 | -------------------------------------------------------------------------------- /recipe_scrapers/jamieoliver.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class JamieOliver(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "jamieoliver.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("div", {"class": "time"})) 15 | 16 | def yields(self): 17 | return get_yields(self.soup.find("div", {"class": "recipe-detail serves"})) 18 | 19 | def image(self): 20 | container = self.soup.find("div", {"class": "recipe-header-left"}) 21 | if not container: 22 | return None 23 | 24 | image = container.find("img", {"src": True}) 25 | return image["src"] if image else None 26 | 27 | def ingredients(self): 28 | ingredients = self.soup.find("ul", {"class", "ingred-list"}).findAll("li") 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.find("ol", {"class": "recipeSteps"}).findAll("li") 33 | return "\n".join([normalize_string(inst.get_text()) for inst in instructions]) 34 | -------------------------------------------------------------------------------- /recipe_scrapers/countryliving.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class CountryLiving(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "countryliving.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "content-hed recipe-hed"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "total-time-amount"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find( 20 | "div", {"class": "recipe-details-item yields"} 21 | ).get_text() 22 | 23 | return get_yields("{} servings".format(yields)) 24 | 25 | def ingredients(self): 26 | ingredients = self.soup.findAll("div", {"class": "ingredient-item"}) 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | instructions = self.soup.find("div", {"class": "direction-lists"}).find_all( 32 | "li" 33 | ) 34 | 35 | return "\n".join( 36 | [normalize_string(instruction.get_text()) for instruction in instructions] 37 | ) 38 | -------------------------------------------------------------------------------- /recipe_scrapers/reishunger.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class Reishunger(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "reishunger.de" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.schema.title() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | return self.schema.yields() 21 | 22 | def image(self): 23 | return self.schema.image() 24 | 25 | def ingredients(self): 26 | return self.schema.ingredients() 27 | 28 | def instructions(self): 29 | result = self.soup.find("section", {"class": "recipe-preparation"}) 30 | if result: 31 | result = "\n".join( 32 | normalize_string(i.get_text()) for i in result.findAll("p") 33 | ) 34 | return result 35 | 36 | def ratings(self): 37 | block = self.soup.find("div", {"id": "recipe-header"}).find( 38 | "div", {"class": "nrating"} 39 | ) 40 | if block: 41 | cnt = len(block.findAll("span", {"class": "fa-star"})) 42 | return cnt 43 | return block 44 | -------------------------------------------------------------------------------- /recipe_scrapers/food52.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers._abstract import AbstractScraper 2 | from recipe_scrapers._utils import get_minutes, normalize_string 3 | 4 | 5 | class Food52(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "food52.com" 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def total_time(self): 14 | ul = self.soup.find("ul", {"class": "recipe__details"}) 15 | total = 0 16 | for li in ul.find_all("li"): 17 | if li.span.get_text().lower() in ["prep time", "cook time"]: 18 | total += get_minutes(list(li.children)[2].strip()) 19 | return total 20 | 21 | def yields(self): 22 | return self.schema.yields() 23 | 24 | def image(self): 25 | return self.schema.image() 26 | 27 | def ingredients(self): 28 | return self.schema.ingredients() 29 | 30 | def instructions(self): 31 | instructions = self.soup.findAll("li", {"class": "recipe__list-step"}) 32 | 33 | return "\n".join( 34 | [ 35 | normalize_string(instruction.span.get_text()) 36 | for instruction in instructions 37 | ] 38 | ) 39 | 40 | def ratings(self): 41 | return self.schema.ratings() 42 | -------------------------------------------------------------------------------- /recipe_scrapers/onehundredonecookbooks.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from ._abstract import AbstractScraper 4 | 5 | 6 | class OneHundredOneCookBooks(AbstractScraper): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | self.soup = self.soup.find("div", id="recipe") 10 | 11 | @classmethod 12 | def host(cls): 13 | return "101cookbooks.com" 14 | 15 | def author(self): 16 | return self.schema.author() 17 | 18 | def title(self): 19 | return self.soup.find("h1").get_text() 20 | 21 | def total_time(self): 22 | return self.schema.total_time() 23 | 24 | def yields(self): 25 | data = self.soup.find_all("p", limit=3, recursive=False)[-1].get_text() 26 | extraction = re.search("([0-9]+) servings", data) 27 | return extraction.group(1) if extraction else None 28 | 29 | def image(self): 30 | return self.schema.image() 31 | 32 | def ingredients(self): 33 | ingredients = self.soup.find("blockquote").p.stripped_strings 34 | return list(ingredients) 35 | 36 | def instructions(self): 37 | return self.soup.find_all("p", limit=2, recursive=False)[1].get_text( 38 | "\n", strip=True 39 | ) 40 | 41 | def ratings(self): 42 | return None 43 | -------------------------------------------------------------------------------- /recipe_scrapers/realsimple.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class RealSimple(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "realsimple.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text(strip=True) 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.findAll("div", {"class": "recipe-meta-item"})[1]) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.findAll("div", {"class": "recipe-meta-item"})[2] 19 | .find("div", {"class": "recipe-meta-item-body"}) 20 | .get_text() 21 | ) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.find("div", {"class": "ingredients"}).findAll("li") 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll("div", {"class": "step"}) 30 | 31 | return "\n".join( 32 | [ 33 | normalize_string(instruction.find("p").get_text()) 34 | for instruction in instructions 35 | if instruction.find("p") is not None 36 | ] 37 | ) 38 | -------------------------------------------------------------------------------- /recipe_scrapers/plugins/template.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import logging 3 | 4 | from recipe_scrapers.plugins._interface import PluginInterface 5 | from recipe_scrapers.settings import settings 6 | 7 | logging.basicConfig() 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class TemplatePlugin(PluginInterface): 12 | """ 13 | Sample starting point to write your custom plugin. 14 | 15 | Check the available plugins implementations for more details. 16 | """ 17 | 18 | run_on_hosts = ("*",) 19 | run_on_methods = ( 20 | "title", 21 | # ... others 22 | ) 23 | 24 | @classmethod 25 | def run(cls, decorated): 26 | @functools.wraps(decorated) 27 | def decorated_method_wrapper(self, *args, **kwargs): 28 | # in here you'll have self.soup, self.schema and the other 29 | # instance attributes/methods you can work with. 30 | # check other plugins for examples 31 | logger.setLevel(settings.LOG_LEVEL) 32 | class_name = self.__class__.__name__ 33 | method_name = decorated.__name__ 34 | logger.debug( 35 | f"Decorating: {class_name}.{method_name}() with TemplatePlugin" 36 | ) 37 | return decorated(self, *args, **kwargs) 38 | 39 | return decorated_method_wrapper 40 | -------------------------------------------------------------------------------- /recipe_scrapers/wikicookbook.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class WikiCookbook(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "en.wikibooks.org" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text().replace("Cookbook:", "") 12 | 13 | def total_time(self): 14 | return get_minutes(self.soup.find("th", string="Time").find_next_sibling("td")) 15 | 16 | def yields(self): 17 | return get_yields( 18 | self.soup.find("th", string="Servings").find_next_sibling("td") 19 | ) 20 | 21 | def image(self): 22 | image = self.soup.find("a", {"class": "image"}).find("img", {"src": True}) 23 | return image["src"] if image else None 24 | 25 | def ingredients(self): 26 | ingredients = ( 27 | self.soup.find("span", {"id": "Ingredients"}).find_next("ul").findAll("li") 28 | ) 29 | 30 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 31 | 32 | def instructions(self): 33 | instructions = ( 34 | self.soup.find("span", {"id": "Procedure"}).find_next("ol").findAll("li") 35 | ) 36 | 37 | return "\n".join( 38 | [normalize_string(instruction.get_text()) for instruction in instructions] 39 | ) 40 | -------------------------------------------------------------------------------- /recipe_scrapers/tastykitchen.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TastyKitchen(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "tastykitchen.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"itemprop": "name"}).get_text() 12 | 13 | def total_time(self): 14 | return sum( 15 | [ 16 | get_minutes(self.soup.find("time", {"itemprop": "prepTime"})), 17 | get_minutes(self.soup.find("time", {"itemprop": "cookTime"})), 18 | ] 19 | ) 20 | 21 | def yields(self): 22 | return get_yields(self.soup.find("span", {"itemprop": "yield"})) 23 | 24 | def image(self): 25 | image = self.soup.find("img", {"class": "the_recipe_image", "src": True}) 26 | return image["src"] if image else None 27 | 28 | def ingredients(self): 29 | ingredients = self.soup.find("ul", {"class": "ingredients"}).findAll("li") 30 | 31 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 32 | 33 | def instructions(self): 34 | instructions = self.soup.find("span", {"itemprop": "instructions"}).findAll("p") 35 | 36 | return "\n".join( 37 | [normalize_string(direction.get_text()) for direction in instructions] 38 | ) 39 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/scraper_bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Scraper bug report 3 | about: Report a scraper that is not working correctly 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for filing a bug report with us! 11 | 12 | If your request is about a website that is not supported, please open a 'new scraper' issue request instead. 13 | 14 | To help get the issue fixed, please fill in the information below. 15 | 16 | **Pre-filing checks** 17 | 18 | - [ ] I have searched for open issues that report the same problem 19 | - [ ] I have checked that the bug affects the latest version of the library 20 | 21 | **The URL of the recipe(s) that are not being scraped correctly** 22 | 23 | - https:// ... 24 | 25 | **The version of Python you're using** 26 | 27 | ... 28 | 29 | **The operating system of your environment** 30 | 31 | ... 32 | 33 | **The results you expect to see** 34 | 35 | ... 36 | 37 | **The results (including any Python error messages) that you are seeing** 38 | 39 | ... 40 | 41 | Can you write Python and would you like to help fix the scraper yourself? We'd be glad for your assistance! We can provide you with guidance and code review in return. If so, tick any of the relevant boxes below: 42 | 43 | - [ ] I'd like to try fixing this scraper myself 44 | - [ ] I'd like guidance to help me develop a fix 45 | - [ ] I'd prefer if the `recipe-scrapers` team try to fix this 46 | -------------------------------------------------------------------------------- /recipe_scrapers/cookieandkate.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class CookieAndKate(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "cookieandkate.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "tasty-recipes-total-time"}) 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "tasty-recipes-yield"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.find( 25 | "div", {"class": "tasty-recipe-ingredients"} 26 | ).find_all("li") 27 | 28 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 29 | 30 | def instructions(self): 31 | instructions = self.soup.find( 32 | "div", {"class": "tasty-recipe-instructions"} 33 | ).find_all("li") 34 | 35 | return "\n".join( 36 | [normalize_string(instruction.get_text()) for instruction in instructions] 37 | ) 38 | 39 | def ratings(self): 40 | return round(float(self.soup.find("span", {"class": "average"}).get_text()), 2) 41 | -------------------------------------------------------------------------------- /recipe_scrapers/simplyquinoa.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class SimplyQuinoa(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "simplyquinoa.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | data = self.soup.find("span", {"class": "wprm-recipe-rating-average"}) 39 | return round(float(data.get_text()), 2) if data else None 40 | -------------------------------------------------------------------------------- /recipe_scrapers/mybakingaddiction.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class MyBakingAddiction(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "mybakingaddiction.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "mv-create-time-total"}).get_text() 16 | ) 17 | 18 | def yields(self): 19 | return get_yields(self.soup.find("div", {"class": "mv-create-time-yield"})) 20 | 21 | def ingredients(self): 22 | ingredients = self.soup.find("div", {"class": "mv-create-ingredients"}).findAll( 23 | "li" 24 | ) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.find( 30 | "div", {"class": "mv-create-instructions"} 31 | ).findAll("li") 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | rating = self.soup.find("div", {"class": "mv-create-reviews"}).attrs.get( 39 | "data-mv-create-rating", None 40 | ) 41 | 42 | return round(float(rating), 2) 43 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | about = {} 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | with open( 8 | os.path.join(here, "recipe_scrapers", "__version__.py"), "r", encoding="utf-8" 9 | ) as f: 10 | exec(f.read(), about) 11 | 12 | README = open(os.path.join(os.path.dirname(__file__), "README.rst")).read() 13 | 14 | os.chdir(os.path.normpath(os.path.join(os.path.abspath(__file__), os.pardir))) 15 | 16 | setup( 17 | name="recipe_scrapers", 18 | url="https://github.com/hhursev/recipe-scrapers/", 19 | version=about["__version__"], 20 | author="Hristo Harsev", 21 | author_email="r+pypi@hharsev.com", 22 | description="Python package, scraping recipes from all over the internet", 23 | keywords="python recipes scraper harvest recipe-scraper recipe-scrapers", 24 | long_description=README, 25 | long_description_content_type="text/x-rst", 26 | install_requires=["beautifulsoup4>=4.6.0", "extruct>=0.8.0", "requests>=2.19.1"], 27 | packages=find_packages(), 28 | package_data={"": ["LICENSE"]}, 29 | include_package_data=True, 30 | classifiers=[ 31 | "Programming Language :: Python :: 3", 32 | "License :: OSI Approved :: MIT License", 33 | "Intended Audience :: Developers", 34 | "Operating System :: OS Independent", 35 | "Topic :: Internet :: WWW/HTTP", 36 | ], 37 | python_requires=">=3.6", 38 | ) 39 | -------------------------------------------------------------------------------- /recipe_scrapers/saveur.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class Saveur(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "saveur.com" 9 | 10 | def author(self): 11 | return self.schema.author() 12 | 13 | def title(self): 14 | return self.soup.find("h1").get_text() 15 | 16 | def total_time(self): 17 | prep_time = self.soup.find("meta", {"property": "prepTime"}) 18 | cook_time = self.soup.find("meta", {"property": "cookTime"}) 19 | return sum( 20 | [ 21 | get_minutes(prep_time.get("content")) if prep_time else 0, 22 | get_minutes(cook_time.get("content")) if cook_time else 0, 23 | ] 24 | ) 25 | 26 | def yields(self): 27 | return get_yields( 28 | self.soup.find("span", {"property": "recipeYield"}).get_text() 29 | ) 30 | 31 | def ingredients(self): 32 | ingredients = self.soup.findAll("li", {"property": "recipeIngredient"}) 33 | 34 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 35 | 36 | def instructions(self): 37 | instructions = self.soup.findAll("li", {"property": "recipeInstructions"}) 38 | 39 | return "\n".join( 40 | [normalize_string(instruction.get_text()) for instruction in instructions] 41 | ) 42 | -------------------------------------------------------------------------------- /recipe_scrapers/forksoverknives.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class ForksOverKnives(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "forksoverknives.com" 9 | 10 | def author(self): 11 | author = self.soup.find("div", attrs={"class": "post-info"}).find("a") 12 | return normalize_string(author.get_text()) 13 | 14 | def title(self): 15 | return self.schema.title() 16 | 17 | def total_time(self): 18 | return self.schema.total_time() 19 | 20 | def yields(self): 21 | yields = normalize_string( 22 | self.soup.find("i", attrs={"class": "icon-serving"}).next_sibling.get_text() 23 | ) 24 | # Get the first string after "Makes". 25 | return yields.split(" ", 1)[1] 26 | 27 | def image(self): 28 | return self.schema.image() 29 | 30 | def ingredients(self): 31 | return self.schema.ingredients() 32 | 33 | def instructions(self): 34 | return self.schema.instructions() 35 | 36 | def ratings(self): 37 | ratings = normalize_string( 38 | self.soup.find("div", attrs={"class": "headline"}) 39 | .find("span", attrs={"class": "rated-count"}) 40 | .get_text() 41 | ) 42 | # Unwrap parens 43 | ratings = ratings[1:] 44 | # return the first element 45 | return float(ratings.split()[0]) 46 | -------------------------------------------------------------------------------- /recipe_scrapers/thevintagemixer.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class TheVintageMixer(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "thevintagemixer.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time-minutes"}).parent 16 | ) 17 | 18 | def image(self): 19 | container = self.soup.find("div", {"class": "wprm-recipe-image"}) 20 | if not container: 21 | return None 22 | 23 | image = container.find("img", {"src": True}) 24 | return image["src"] if image else None 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 28 | 29 | return [ 30 | normalize_string(ingredient.get_text()) 31 | for ingredient in ingredients 32 | if len(normalize_string(ingredient.get_text())) > 0 33 | ] 34 | 35 | def instructions(self): 36 | instructions = self.soup.findAll( 37 | "div", {"class": "wprm-recipe-instruction-text"} 38 | ) 39 | 40 | return "\n".join( 41 | [normalize_string(instruction.get_text()) for instruction in instructions] 42 | ) 43 | -------------------------------------------------------------------------------- /templates/test_scraper.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.template import Template 2 | from tests import ScraperTest 3 | 4 | 5 | class TestTemplateScraper(ScraperTest): 6 | 7 | scraper_class = Template 8 | 9 | def test_host(self): 10 | self.assertEqual("example.com", self.harvester_class.host()) 11 | 12 | def test_author(self): 13 | self.assertEqual(None, self.harvester_class.author()) 14 | 15 | def test_title(self): 16 | self.assertEqual(None, self.harvester_class.title()) 17 | 18 | def test_category(self): 19 | self.assertEqual(None, self.harvester_class.category()) 20 | 21 | def test_total_time(self): 22 | self.assertEqual(None, self.harvester_class.total_time()) 23 | 24 | def test_yields(self): 25 | self.assertEqual(None, self.harvester_class.yields()) 26 | 27 | def test_image(self): 28 | self.assertEqual(None, self.harvester_class.image()) 29 | 30 | def test_ingredients(self): 31 | self.assertEqual(None, self.harvester_class.ingredients()) 32 | 33 | def test_instructions(self): 34 | self.assertEqual(None, self.harvester_class.instructions()) 35 | 36 | def test_ratings(self): 37 | self.assertEqual(None, self.harvester_class.ratings()) 38 | 39 | def test_cuisine(self): 40 | self.assertEqual(None, self.harvester_class.cuisine()) 41 | 42 | def test_description(self): 43 | self.assertEqual(None, self.harvester_class.description()) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/sallysblog.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, normalize_string 3 | 4 | 5 | class SallysBlog(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "sallys-blog.de" 9 | 10 | def title(self): 11 | return normalize_string( 12 | self.soup.find("h1", {"class": "blog--detail-headline"}).get_text() 13 | ) 14 | 15 | def total_time(self): 16 | return get_minutes(self.soup.find("span", {"id": "zubereitungszeit"})) 17 | 18 | def yields(self): 19 | amount = self.soup.find("input", {"class": "float-left"}).get("value") 20 | unit = self.soup.find("span", {"id": "is_singular"}).get_text() 21 | 22 | return f"{amount} {unit}" 23 | 24 | def ingredients(self): 25 | ingredients = self.soup.findAll("li", {"class": "quantity"}) 26 | 27 | return [normalize_string(i.get_text()) for i in ingredients] 28 | 29 | def instructions(self): 30 | instructionBlock = self.soup.find( 31 | "div", {"class": "blog--detail-description block"} 32 | ) 33 | instructions = instructionBlock.findAll( 34 | "div", {"class": ["content_type_2", "content_type_3", "content_type_4"]} 35 | ) 36 | 37 | return "\n".join( 38 | [ 39 | normalize_string(instruction.find("p").get_text()) 40 | for instruction in instructions 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /recipe_scrapers/_factory.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_host_name 3 | 4 | 5 | class SchemaScraperFactory: 6 | class SchemaScraper(AbstractScraper): 7 | def host(self) -> str: 8 | return get_host_name(self.url) 9 | 10 | def title(self): 11 | return self.schema.title() 12 | 13 | def category(self): 14 | return self.schema.category() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def cook_time(self): 20 | return self.schema.cook_time() 21 | 22 | def prep_time(self): 23 | return self.schema.prep_time() 24 | 25 | def yields(self): 26 | return self.schema.yields() 27 | 28 | def image(self): 29 | return self.schema.image() 30 | 31 | def ingredients(self): 32 | return self.schema.ingredients() 33 | 34 | def instructions(self): 35 | return self.schema.instructions() 36 | 37 | def ratings(self): 38 | return self.schema.ratings() 39 | 40 | def author(self): 41 | return self.schema.author() 42 | 43 | def cuisine(self): 44 | return self.schema.cuisine() 45 | 46 | def description(self): 47 | return self.schema.description() 48 | 49 | @classmethod 50 | def generate(cls, url, **options): 51 | return cls.SchemaScraper(url, **options) 52 | -------------------------------------------------------------------------------- /recipe_scrapers/comidinhasdochef.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import normalize_string 3 | 4 | 5 | class ComidinhasDoChef(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "comidinhasdochef.com" 9 | 10 | def author(self): 11 | return self.soup.find("span", {"class": "theauthor"}).get_text(strip=True) 12 | 13 | def title(self): 14 | return self.soup.find("h1", {"class": "title"}).get_text() 15 | 16 | def total_time(self): 17 | return self.schema.total_time() 18 | 19 | def yields(self): 20 | yields = self.soup.find("span", {"itemprop": "recipeYield"}) 21 | return yields.get_text() if yields else None 22 | 23 | def image(self): 24 | return self.schema.image() 25 | 26 | def ingredients(self): 27 | return [ 28 | normalize_string(ingredient.get_text()) 29 | for ingredient in self.soup.find_all("li", {"itemprop": "recipeIngredient"}) 30 | ] 31 | 32 | def instructions(self): 33 | instructions = [ 34 | normalize_string(instruction.get_text(strip=True)) 35 | for instruction in self.soup.find_all( 36 | "li", {"itemprop": "recipeInstructions"} 37 | ) 38 | ] 39 | return "\n".join(instructions) 40 | 41 | def ratings(self): 42 | rating = self.soup.find("span", {"itemprop": "ratingValue"}).get_text() 43 | return round(float(rating), 2) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/mykitchen101.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from ._abstract import AbstractScraper 6 | from ._utils import get_yields, normalize_string 7 | 8 | 9 | class MyKitchen101(AbstractScraper): 10 | @classmethod 11 | def host(cls): 12 | return "mykitchen101.com" 13 | 14 | def author(self): 15 | return self.soup.find("a", {"rel": "author"}).get_text() 16 | 17 | def title(self): 18 | return self.soup.find("h1", {"class": "entry-title"}).get_text() 19 | 20 | def yields(self): 21 | return get_yields(self.soup.find("p", text=re.compile("分量:")).get_text()) 22 | 23 | def image(self): 24 | return self.schema.image() 25 | 26 | def ingredients(self): 27 | soup = BeautifulSoup(str(self.soup), features="html.parser") 28 | ingredients = ( 29 | soup.find(name="p", text=re.compile("材料:")).find_next("ul").find_all("li") 30 | ) 31 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 32 | 33 | def instructions(self): 34 | soup = BeautifulSoup(str(self.soup), features="html.parser") 35 | instructions = soup.find(name="p", text=re.compile("做法:")).find_all_next("p") 36 | return "\n".join( 37 | [ 38 | normalize_string(instruction.get_text()) 39 | for instruction in instructions 40 | if instruction.get_text()[:1].isdigit() 41 | ] 42 | ) 43 | -------------------------------------------------------------------------------- /recipe_scrapers/kingarthur.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._abstract import AbstractScraper 4 | from ._utils import normalize_string 5 | 6 | 7 | class KingArthur(AbstractScraper): 8 | @classmethod 9 | def host(cls): 10 | return "kingarthurbaking.com" 11 | 12 | def title(self): 13 | return self.schema.title() 14 | 15 | def total_time(self): 16 | return self.schema.total_time() 17 | 18 | def yields(self): 19 | return self.schema.yields() 20 | 21 | def image(self): 22 | return self.schema.image() 23 | 24 | def ingredients(self): 25 | return self.schema.ingredients() 26 | 27 | def instructions(self): 28 | """ 29 | King Arthur updated how they format their instructions to include html (instructions wrapped in
) in the 30 | `recipeInstructions`, parse the instructions assuming each step is wrapped in a
first, and fallback to just 31 | returning the schema instructions in case this is changed, again. 32 | """ 33 | schema_instructions = self.schema.instructions() 34 | soup = BeautifulSoup(schema_instructions, "html.parser") 35 | instruction_elms = soup.findAll("p") 36 | if instruction_elms: 37 | return "\n".join( 38 | [normalize_string(elm.get_text()) for elm in instruction_elms] 39 | ) 40 | return schema_instructions 41 | 42 | def ratings(self): 43 | return self.schema.ratings() 44 | -------------------------------------------------------------------------------- /recipe_scrapers/_exceptions.py: -------------------------------------------------------------------------------- 1 | class RecipeScrapersExceptions(Exception): 2 | def __init__(self, message): 3 | self.message = message 4 | super().__init__(message) 5 | 6 | def __str__(self): 7 | return f"recipe-scrapers exception: {self.message}" 8 | 9 | 10 | class WebsiteNotImplementedError(RecipeScrapersExceptions): 11 | """Error when website is not supported by this library.""" 12 | 13 | def __init__(self, domain): 14 | self.domain = domain 15 | message = f"Website ({self.domain}) not supported." 16 | super().__init__(message) 17 | 18 | 19 | class NoSchemaFoundInWildMode(RecipeScrapersExceptions): 20 | """Error when wild_mode fails to locate schema at the url""" 21 | 22 | def __init__(self, url): 23 | self.url = url 24 | message = f"No Recipe Schema found at {self.url}." 25 | super().__init__(message) 26 | 27 | 28 | class ElementNotFoundInHtml(RecipeScrapersExceptions): 29 | """Error when we cannot locate the HTML element on the page""" 30 | 31 | def __init__(self, element): 32 | self.element = element 33 | message = ( 34 | "Element not found in html (self.soup.find returned None). Check traceback." 35 | ) 36 | super().__init__(message) 37 | 38 | 39 | class SchemaOrgException(RecipeScrapersExceptions): 40 | """Error in parsing or missing portion of the Schema.org data org the page""" 41 | 42 | def __init__(self, message): 43 | super().__init__(message) 44 | -------------------------------------------------------------------------------- /recipe_scrapers/motherthyme.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class MotherThyme(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "motherthyme.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("span", {"class": "wprm-recipe-total_time"}).parent 16 | ) 17 | 18 | def yields(self): 19 | yields = self.soup.find("span", {"class": "wprm-recipe-servings"}).get_text() 20 | 21 | return get_yields("{} servings".format(yields)) 22 | 23 | def ingredients(self): 24 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 25 | 26 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 27 | 28 | def instructions(self): 29 | instructions = self.soup.findAll( 30 | "div", {"class": "wprm-recipe-instruction-text"} 31 | ) 32 | 33 | return "\n".join( 34 | [normalize_string(instruction.get_text()) for instruction in instructions] 35 | ) 36 | 37 | def ratings(self): 38 | return round( 39 | float( 40 | self.soup.find( 41 | "span", {"class": "wprm-recipe-rating-average"} 42 | ).get_text() 43 | ), 44 | 2, 45 | ) 46 | -------------------------------------------------------------------------------- /recipe_scrapers/twopeasandtheirpod.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class TwoPeasAndTheirPod(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "twopeasandtheirpod.com" 9 | 10 | def title(self): 11 | return self.soup.find("h2", {"class": "wprm-recipe-name"}).get_text() 12 | 13 | def total_time(self): 14 | minutes = self.soup.select_one(".wprm-recipe-total_time").get_text() 15 | unit = self.soup.select_one(".wprm-recipe-total_time-unit").get_text() 16 | 17 | return get_minutes("{} {}".format(minutes, unit)) 18 | 19 | def yields(self): 20 | return get_yields( 21 | self.soup.select_one( 22 | "div.wprm-recipe-details-container dl:nth-of-type(5) dd" 23 | ).get_text() 24 | ) 25 | 26 | def ingredients(self): 27 | ingredients = self.soup.findAll("li", {"class": "wprm-recipe-ingredient"}) 28 | 29 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 30 | 31 | def instructions(self): 32 | instructions = self.soup.select(".wprm-recipe-instruction-text") 33 | 34 | return "\n".join( 35 | [normalize_string(instruction.get_text()) for instruction in instructions] 36 | ) 37 | 38 | def image(self): 39 | image = self.soup.find("div", {"class": "wprm-recipe-image"}).find("img") 40 | 41 | return image["src"] if image else None 42 | -------------------------------------------------------------------------------- /recipe_scrapers/simplyrecipes.py: -------------------------------------------------------------------------------- 1 | from ._abstract import AbstractScraper 2 | from ._utils import get_minutes, get_yields, normalize_string 3 | 4 | 5 | class SimplyRecipes(AbstractScraper): 6 | @classmethod 7 | def host(cls): 8 | return "simplyrecipes.com" 9 | 10 | def title(self): 11 | return self.soup.find("h1").get_text() 12 | 13 | def total_time(self): 14 | return get_minutes( 15 | self.soup.find("div", {"class": "total-time"}) 16 | .find("span", {"class": "meta-text__data"}) 17 | .text 18 | ) 19 | 20 | def yields(self): 21 | return get_yields( 22 | normalize_string( 23 | self.soup.find("div", {"class": "recipe-serving"}) 24 | .find("span", {"class": "meta-text__data"}) 25 | .text 26 | ) 27 | ) 28 | 29 | def ingredients(self): 30 | ingredients = self.soup.find("ul", {"class": "ingredient-list"}).findAll("li") 31 | 32 | return [normalize_string(ingredient.get_text()) for ingredient in ingredients] 33 | 34 | def instructions(self): 35 | steps = self.soup.find( 36 | "div", {"class": "structured-project__steps"} 37 | ).ol.findAll("li") 38 | 39 | return "\n".join( 40 | [ 41 | normalize_string( 42 | step.div.text + ": " + "".join([p.text for p in step.findAll("p")]) 43 | ) 44 | for step in steps 45 | ] 46 | ) 47 | -------------------------------------------------------------------------------- /tests/test_data/schemaorg.testhtml: -------------------------------------------------------------------------------- 1 | 2 |
3 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /recipe_scrapers/settings/default.py: -------------------------------------------------------------------------------- 1 | from recipe_scrapers.plugins import ( # SchemaOrgPriorityPlugin,; Bcp47ValidatePlugin, 2 | ExceptionHandlingPlugin, 3 | HTMLTagStripperPlugin, 4 | NormalizeStringPlugin, 5 | OpenGraphImageFetchPlugin, 6 | SchemaOrgFillPlugin, 7 | ) 8 | 9 | # Plugins to be attached. 10 | # The upper most plugin is the "outer most" executed. 11 | # Check recipe_scrapers.settings.template.py for ways to extend. 12 | PLUGINS = ( 13 | ExceptionHandlingPlugin, 14 | HTMLTagStripperPlugin, 15 | NormalizeStringPlugin, 16 | OpenGraphImageFetchPlugin, 17 | SchemaOrgFillPlugin, 18 | # Bcp47ValidatePlugin, 19 | # SchemaOrgPriorityPlugin, 20 | ) 21 | 22 | META_HTTP_EQUIV = True 23 | 24 | 25 | SUPPRESS_EXCEPTIONS = False 26 | # Applicable only if SUPPRESS_EXCEPTIONS is True, otherwise ignored 27 | # silence