diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py index bee5cc178..043a55ba0 100644 --- a/mealie/services/scraper/scraper.py +++ b/mealie/services/scraper/scraper.py @@ -1,5 +1,4 @@ import json -from typing import List import requests import scrape_schema_recipe @@ -34,19 +33,15 @@ def create_from_url(url: str) -> Recipe: def extract_recipe_from_html(html: str, url: str) -> dict: - try: - scraped_recipes: List[dict] = scrape_schema_recipe.loads(html, python_objects=True) - dump_last_json(scraped_recipes) + scraped_recipes: list[dict] - if not scraped_recipes: - scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url, python_objects=True) + try: + scraped_recipes = scrape_schema_recipe.scrape_url(url) except Exception as e: print(e) - scraped_recipes: List[dict] = scrape_schema_recipe.loads(html) - dump_last_json(scraped_recipes) + scraped_recipes = scrape_schema_recipe.loads(html, python_objects=True) - if not scraped_recipes: - scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url) + dump_last_json(scraped_recipes) if scraped_recipes: new_recipe: dict = scraped_recipes[0] diff --git a/poetry.lock b/poetry.lock index 302e32393..cbf907df6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -939,6 +939,19 @@ python-versions = "*" [package.dependencies] rdflib = ">=4.2.2" +[[package]] +name = "recipe-scrapers" +version = "13.2.7" +description = "Python package, scraping recipes from all over the internet" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +beautifulsoup4 = ">=4.6.0" +extruct = ">=0.8.0" +requests = ">=2.19.1" + [[package]] name = "regex" version = "2021.4.4" @@ -1236,7 +1249,7 @@ python-versions = "*" [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "73bac73c62e64c90a29816dde9ef1d896e8ca0b4271e67cde6ca8cc56bd87efd" +content-hash = "8a123b6b0cf37c1d4a66ea4f137f79bba79f373c7019af879e1b06fb5ded0ed4" [metadata.files] aiofiles = [ @@ -1893,6 +1906,10 @@ rdflib = [ rdflib-jsonld = [ {file = "rdflib-jsonld-0.5.0.tar.gz", hash = "sha256:4f7d55326405071c7bce9acf5484643bcb984eadb84a6503053367da207105ed"}, ] +recipe-scrapers = [ + {file = "recipe_scrapers-13.2.7-py3-none-any.whl", hash = "sha256:e5b2a251bbba2ef319ce32a10c4073b23f483f0ee2db83da543204549b06dffe"}, + {file = "recipe_scrapers-13.2.7.tar.gz", hash = "sha256:e03d20a5c39f9c3dcb0185be1b6480ac0a086900d6aacf1699c77fa090944901"}, +] regex = [ {file = "regex-2021.4.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000"}, {file = "regex-2021.4.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711"}, diff --git a/pyproject.toml b/pyproject.toml index e4861c602..2099eb6a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ lxml = "4.6.2" Pillow = "^8.2.0" pathvalidate = "^2.4.1" apprise = "^0.9.2" +recipe-scrapers = "^13.2.7" [tool.poetry.dev-dependencies]