diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 0caaacc0c..a79169141 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -63,7 +63,7 @@ export default { }, created() { window.addEventListener("keyup", e => { - if (e.key == "/") { + if (e.key == "/" && !document.activeElement.id.startsWith('input') ) { this.search = !this.search; } }); diff --git a/frontend/src/api/backup.js b/frontend/src/api/backup.js index 70bf529c8..dd9d80b57 100644 --- a/frontend/src/api/backup.js +++ b/frontend/src/api/backup.js @@ -8,9 +8,9 @@ const backupURLs = { // Backup available: `${backupBase}available`, createBackup: `${backupBase}export/database`, - importBackup: (fileName) => `${backupBase}${fileName}/import`, - deleteBackup: (fileName) => `${backupBase}${fileName}/delete`, - downloadBackup: (fileName) => `${backupBase}${fileName}/download`, + importBackup: fileName => `${backupBase}${fileName}/import`, + deleteBackup: fileName => `${backupBase}${fileName}/delete`, + downloadBackup: fileName => `${backupBase}${fileName}/download`, }; export default { diff --git a/frontend/src/components/Settings/Backup/AvailableBackupCard.vue b/frontend/src/components/Settings/Backup/AvailableBackupCard.vue index 55cbd9ac1..2f30e5823 100644 --- a/frontend/src/components/Settings/Backup/AvailableBackupCard.vue +++ b/frontend/src/components/Settings/Backup/AvailableBackupCard.vue @@ -69,10 +69,9 @@ export default { this.$emit("loading"); let response = await api.backups.import(data.name, data); - let failed = response.data.failed; - let succesful = response.data.successful; + let importData = response.data; - this.$emit("finished", succesful, failed); + this.$emit("finished", importData); }, deleteBackup(data) { this.$emit("loading"); diff --git a/frontend/src/components/Settings/Backup/ImportSummaryDialog/DataTable.vue b/frontend/src/components/Settings/Backup/ImportSummaryDialog/DataTable.vue new file mode 100644 index 000000000..604615572 --- /dev/null +++ b/frontend/src/components/Settings/Backup/ImportSummaryDialog/DataTable.vue @@ -0,0 +1,47 @@ + + + + + + {{ item.status ? "Imported" : "Failed" }} + + + + + + + {{ item.exception }} + + + + + + + + + + \ No newline at end of file diff --git a/frontend/src/components/Settings/Backup/ImportSummaryDialog/index.vue b/frontend/src/components/Settings/Backup/ImportSummaryDialog/index.vue new file mode 100644 index 000000000..718a2dfd2 --- /dev/null +++ b/frontend/src/components/Settings/Backup/ImportSummaryDialog/index.vue @@ -0,0 +1,152 @@ + + + + + Import Summary + + + + + + Recipes + + + Success: {{ recipeNumbers.success }} + + + Failed: {{ recipeNumbers.failure }} + + + + + + + Themes + + + Success: {{ themeNumbers.success }} + + + Failed: {{ themeNumbers.failure }} + + + + + + + Settings + + + Success: {{ settingsNumbers.success }} + + + Failed: {{ settingsNumbers.failure }} + + + + + + + Recipes + Themes + Settings + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/frontend/src/components/Settings/Backup/index.vue b/frontend/src/components/Settings/Backup/index.vue index d28f64615..56d2998f6 100644 --- a/frontend/src/components/Settings/Backup/index.vue +++ b/frontend/src/components/Settings/Backup/index.vue @@ -41,6 +41,7 @@ :failed-header="$t('settings.backup.failed-imports')" :failed="failedImports" /> + @@ -48,6 +49,7 @@ diff --git a/mealie/models/import_models.py b/mealie/models/import_models.py new file mode 100644 index 000000000..5b183572f --- /dev/null +++ b/mealie/models/import_models.py @@ -0,0 +1,20 @@ +from typing import Optional + +from pydantic.main import BaseModel + + +class RecipeImport(BaseModel): + name: Optional[str] + slug: str + status: bool + exception: Optional[str] + +class ThemeImport(BaseModel): + name: str + status: bool + exception: Optional[str] + +class SettingsImport(BaseModel): + name: str + status: bool + exception: Optional[str] \ No newline at end of file diff --git a/mealie/routes/recipe/recipe_crud_routes.py b/mealie/routes/recipe/recipe_crud_routes.py index 613b2d216..142c8d3a4 100644 --- a/mealie/routes/recipe/recipe_crud_routes.py +++ b/mealie/routes/recipe/recipe_crud_routes.py @@ -1,10 +1,11 @@ from db.db_setup import generate_session -from fastapi import APIRouter, Depends, File, Form, HTTPException, Query +from fastapi import APIRouter, Depends, File, Form, HTTPException +from fastapi.logger import logger from fastapi.responses import FileResponse from models.recipe_models import RecipeURLIn from services.image_services import read_image, write_image from services.recipe_services import Recipe -from services.scrape_services import create_from_url +from services.scraper.scraper import create_from_url from sqlalchemy.orm.session import Session from utils.snackbar import SnackResponse @@ -27,6 +28,7 @@ def parse_recipe_url(url: RecipeURLIn, db: Session = Depends(generate_session)): """ Takes in a URL and attempts to scrape data and load it into the database """ recipe = create_from_url(url.url) + recipe.save_to_db(db) return recipe.slug diff --git a/mealie/services/backups/imports.py b/mealie/services/backups/imports.py index 85c7f345d..71810e0fd 100644 --- a/mealie/services/backups/imports.py +++ b/mealie/services/backups/imports.py @@ -1,12 +1,13 @@ import json import shutil import zipfile -from logging import error +from logging import error, exception from pathlib import Path from typing import List from app_config import BACKUP_DIR, IMG_DIR, TEMP_DIR from db.database import db +from models.import_models import RecipeImport, SettingsImport, ThemeImport from models.theme_models import SiteTheme from services.recipe_services import Recipe from services.settings_services import SiteSettings @@ -57,23 +58,29 @@ class ImportDatabase: raise Exception("Import file does not exist") def run(self): - report = {} + recipe_report = [] + settings_report = [] + theme_report = [] if self.imp_recipes: - report = self.import_recipes() + recipe_report = self.import_recipes() if self.imp_settings: - self.import_settings() + settings_report = self.import_settings() if self.imp_themes: - self.import_themes() + theme_report = self.import_themes() self.clean_up() - return report if report else None + return { + "recipeImports": recipe_report, + "settingsReport": settings_report, + "themeReport": theme_report, + } def import_recipes(self): recipe_dir: Path = self.import_dir.joinpath("recipes") + imports = [] successful_imports = [] - failed_imports = [] for recipe in recipe_dir.glob("*.json"): with open(recipe, "r") as f: @@ -82,16 +89,27 @@ class ImportDatabase: try: recipe_obj = Recipe(**recipe_dict) recipe_obj.save_to_db(self.session) + import_status = RecipeImport( + name=recipe_obj.name, slug=recipe_obj.slug, status=True + ) + imports.append(import_status) successful_imports.append(recipe.stem) logger.info(f"Imported: {recipe.stem}") + except Exception as inst: logger.error(inst) logger.info(f"Failed Import: {recipe.stem}") - failed_imports.append(recipe.stem) + import_status = RecipeImport( + name=recipe.stem, + slug=recipe.stem, + status=False, + exception=str(inst), + ) + imports.append(import_status) self._import_images(successful_imports) - return {"successful": successful_imports, "failed": failed_imports} + return imports @staticmethod def _recipe_migration(recipe_dict: dict) -> dict: @@ -130,7 +148,7 @@ class ImportDatabase: def import_themes(self): themes_file = self.import_dir.joinpath("themes", "themes.json") - + theme_imports = [] with open(themes_file, "r") as f: themes: list[dict] = json.loads(f.read()) for theme in themes: @@ -138,17 +156,38 @@ class ImportDatabase: continue new_theme = SiteTheme(**theme) try: + db.themes.create(self.session, new_theme.dict()) - except: + theme_imports.append(ThemeImport(name=new_theme.name, status=True)) + except Exception as inst: logger.info(f"Unable Import Theme {new_theme.name}") + theme_imports.append( + ThemeImport(name=new_theme.name, status=False, exception=str(inst)) + ) + + return theme_imports def import_settings(self): settings_file = self.import_dir.joinpath("settings", "settings.json") + settings_imports = [] with open(settings_file, "r") as f: settings: dict = json.loads(f.read()) - db.settings.update(self.session, settings.get("name"), settings) + name = settings.get("name") + + try: + db.settings.update(self.session, name, settings) + import_status = SettingsImport(name=name, status=True) + + except Exception as inst: + import_status = SettingsImport( + name=name, status=False, exception=str(inst) + ) + + settings_imports.append(import_status) + + return settings_imports def clean_up(self): shutil.rmtree(TEMP_DIR) diff --git a/mealie/services/migrations/nextcloud.py b/mealie/services/migrations/nextcloud.py index 5e356cd03..155d1d120 100644 --- a/mealie/services/migrations/nextcloud.py +++ b/mealie/services/migrations/nextcloud.py @@ -6,7 +6,7 @@ from pathlib import Path from app_config import IMG_DIR, MIGRATION_DIR, TEMP_DIR from services.recipe_services import Recipe -from services.scrape_services import normalize_data, process_recipe_data +from services.scraper.cleaner import Cleaner from app_config import IMG_DIR, TEMP_DIR @@ -34,8 +34,7 @@ def import_recipes(recipe_dir: Path) -> Recipe: with open(recipe_file, "r") as f: recipe_dict = json.loads(f.read()) - recipe_dict = process_recipe_data(recipe_dict) - recipe_data = normalize_data(recipe_dict) + recipe_data = Cleaner.clean(recipe_dict) image_name = None if image: diff --git a/mealie/services/recipe_services.py b/mealie/services/recipe_services.py index 1d0c254fd..913ad785d 100644 --- a/mealie/services/recipe_services.py +++ b/mealie/services/recipe_services.py @@ -38,8 +38,8 @@ class Recipe(BaseModel): tags: Optional[List[str]] = [] dateAdded: Optional[datetime.date] notes: Optional[List[RecipeNote]] = [] - rating: Optional[int] - orgURL: Optional[str] + rating: Optional[int] = 0 + orgURL: Optional[str] = "" extras: Optional[dict] = {} class Config: diff --git a/mealie/services/scrape_services.py b/mealie/services/scrape_services.py deleted file mode 100644 index caa955972..000000000 --- a/mealie/services/scrape_services.py +++ /dev/null @@ -1,246 +0,0 @@ -import html -import json -import re -from typing import List, Tuple - -import extruct -import requests -import scrape_schema_recipe -from app_config import DEBUG_DIR -from slugify import slugify -from utils.logger import logger -from w3lib.html import get_base_url - -from services.image_services import scrape_image -from services.recipe_services import Recipe - -LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json") - - -def cleanhtml(raw_html): - cleanr = re.compile("<.*?>") - cleantext = re.sub(cleanr, "", raw_html) - return cleantext - - -def normalize_image_url(image) -> str: - if type(image) == list: - return image[0] - elif type(image) == dict: - return image["url"] - elif type(image) == str: - return image - else: - raise Exception(f"Unrecognised image URL format: {image}") - - -def normalize_instructions(instructions) -> List[dict]: - if not instructions: - return [] - - # One long string split by (possibly multiple) new lines - if type(instructions) == str: - return [ - {"text": normalize_instruction(line)} - for line in instructions.splitlines() - if line - ] - - # Plain strings in a list - elif type(instructions) == list and type(instructions[0]) == str: - return [{"text": normalize_instruction(step)} for step in instructions] - - # Dictionaries (let's assume it's a HowToStep) in a list - elif type(instructions) == list and type(instructions[0]) == dict: - try: - # If HowToStep is under HowToSection - sectionSteps = [] - for step in instructions: - if step["@type"] == "HowToSection": - for item in step["itemListElement"]: - sectionSteps.append(item) - - if len(sectionSteps) > 0: - return [ - {"text": normalize_instruction(step["text"])} - for step in sectionSteps - if step["@type"] == "HowToStep" - ] - - return [ - {"text": normalize_instruction(step["text"])} - for step in instructions - if step["@type"] == "HowToStep" - ] - except Exception as e: - # Not "@type", try "type" - return [ - {"text": normalize_instruction(step["properties"]["text"])} - for step in instructions - if step["type"].find("HowToStep") > -1 - ] - - else: - raise Exception(f"Unrecognised instruction format: {instructions}") - - -def normalize_instruction(line) -> str: - l = cleanhtml(line.strip()) - # Some sites erroneously escape their strings on multiple levels - while not l == (l := html.unescape(l)): - pass - return l - - -def normalize_ingredient(ingredients: list) -> str: - - return [cleanhtml(html.unescape(ing)) for ing in ingredients] - - -def normalize_yield(yld) -> str: - if type(yld) == list: - return yld[-1] - else: - return yld - - -def normalize_time(time_entry) -> str: - if type(time_entry) == type(None): - return None - elif type(time_entry) != str: - return str(time_entry) - - -def normalize_data(recipe_data: dict) -> dict: - recipe_data["totalTime"] = normalize_time(recipe_data.get("totalTime")) - recipe_data["description"] = cleanhtml(recipe_data.get("description", "")) - recipe_data["prepTime"] = normalize_time(recipe_data.get("prepTime")) - recipe_data["performTime"] = normalize_time(recipe_data.get("performTime")) - recipe_data["recipeYield"] = normalize_yield(recipe_data.get("recipeYield")) - recipe_data["recipeIngredient"] = normalize_ingredient( - recipe_data.get("recipeIngredient") - ) - recipe_data["recipeInstructions"] = normalize_instructions( - recipe_data["recipeInstructions"] - ) - recipe_data["image"] = normalize_image_url(recipe_data["image"]) - return recipe_data - - -def process_recipe_data(new_recipe: dict, url=None) -> dict: - slug = slugify(new_recipe["name"]) - mealie_tags = { - "slug": slug, - "orgURL": url, - "categories": [], - "tags": [], - "dateAdded": None, - "notes": [], - "extras": [], - } - - new_recipe.update(mealie_tags) - - return new_recipe - - -def extract_recipe_from_html(html: str, url: str) -> dict: - try: - scraped_recipes: List[dict] = scrape_schema_recipe.loads( - html, python_objects=True - ) - dump_last_json(scraped_recipes) - - if not scraped_recipes: - scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url( - url, python_objects=True - ) - except Exception as e: - # trying without python_objects - scraped_recipes: List[dict] = scrape_schema_recipe.loads(html) - dump_last_json(scraped_recipes) - - if not scraped_recipes: - scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url) - - if scraped_recipes: - new_recipe: dict = scraped_recipes[0] - logger.info(f"Recipe Scraped From Web: {new_recipe}") - - if not new_recipe: - return "fail" # TODO: Return Better Error Here - - new_recipe = process_recipe_data(new_recipe, url=url) - new_recipe = normalize_data(new_recipe) - else: - new_recipe = basic_recipe_from_opengraph(html, url) - logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}") - - return new_recipe - - -def download_image_for_recipe(recipe: dict) -> dict: - try: - img_path = scrape_image(recipe.get("image"), recipe.get("slug")) - recipe["image"] = img_path.name - except: - recipe["image"] = "no image" - - return recipe - - -def og_field(properties: dict, field_name: str) -> str: - return next((val for name, val in properties if name == field_name), None) - - -def og_fields(properties: List[Tuple[str, str]], field_name: str) -> List[str]: - return list({val for name, val in properties if name == field_name}) - - -def basic_recipe_from_opengraph(html: str, url: str) -> dict: - base_url = get_base_url(html, url) - data = extruct.extract(html, base_url=base_url) - try: - properties = data["opengraph"][0]["properties"] - except: - return - - return { - "name": og_field(properties, "og:title"), - "description": og_field(properties, "og:description"), - "image": og_field(properties, "og:image"), - "recipeYield": "", - # FIXME: If recipeIngredient is an empty list, mongodb's data verification fails. - "recipeIngredient": ["Could not detect ingredients"], - # FIXME: recipeInstructions is allowed to be empty but message this is added for user sanity. - "recipeInstructions": [{"text": "Could not detect instructions"}], - "slug": slugify(og_field(properties, "og:title")), - "orgURL": og_field(properties, "og:url"), - "categories": [], - "tags": og_fields(properties, "og:article:tag"), - "dateAdded": None, - "notes": [], - "extras": [], - } - - -def dump_last_json(recipe_data: dict): - with open(LAST_JSON, "w") as f: - f.write(json.dumps(recipe_data, indent=4, default=str)) - - return - - -def process_recipe_url(url: str) -> dict: - r = requests.get(url) - new_recipe = extract_recipe_from_html(r.text, url) - new_recipe = download_image_for_recipe(new_recipe) - return new_recipe - - -def create_from_url(url: str) -> Recipe: - recipe_data = process_recipe_url(url) - - recipe = Recipe(**recipe_data) - - return recipe diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py new file mode 100644 index 000000000..37083d9f2 --- /dev/null +++ b/mealie/services/scraper/cleaner.py @@ -0,0 +1,151 @@ +import html +import re +from typing import List + +from slugify import slugify + + +class Cleaner: + """A Namespace for utility function to clean recipe data extracted + from a url and returns a dictionary that is ready for import into + the database. Cleaner.clean is the main entrypoint + + + """ + + @staticmethod + def clean(recipe_data: dict, url=None) -> dict: + print(recipe_data) + """Main entrypoint to clean a recipe extracted from the web + and format the data into an accectable format for the database + + Args: + recipe_data (dict): raw recipe dicitonary + + Returns: + dict: cleaned recipe dictionary + """ + recipe_data["totalTime"] = Cleaner.time(recipe_data.get("totalTime")) + recipe_data["description"] = Cleaner.html(recipe_data.get("description", "")) + recipe_data["prepTime"] = Cleaner.time(recipe_data.get("prepTime")) + recipe_data["performTime"] = Cleaner.time(recipe_data.get("performTime")) + recipe_data["recipeYield"] = Cleaner.yield_amount( + recipe_data.get("recipeYield") + ) + recipe_data["recipeIngredient"] = Cleaner.ingredient( + recipe_data.get("recipeIngredient") + ) + recipe_data["recipeInstructions"] = Cleaner.instructions( + recipe_data["recipeInstructions"] + ) + recipe_data["image"] = Cleaner.image(recipe_data["image"]) + recipe_data["slug"] = slugify(recipe_data["name"]) + recipe_data["orgURL"] = url + + return recipe_data + + @staticmethod + def html(raw_html): + cleanr = re.compile("<.*?>") + cleantext = re.sub(cleanr, "", raw_html) + return cleantext + + @staticmethod + def image(image) -> str: + if type(image) == list: + return image[0] + elif type(image) == dict: + return image["url"] + elif type(image) == str: + return image + else: + raise Exception(f"Unrecognised image URL format: {image}") + + @staticmethod + def instructions(instructions) -> List[dict]: + if not instructions: + return [] + + # One long string split by (possibly multiple) new lines + print(instructions) + if type(instructions) == str: + return [ + {"text": Cleaner._instruction(line)} + for line in instructions.splitlines() + if line + ] + + # Plain strings in a list + elif type(instructions) == list and type(instructions[0]) == str: + return [{"text": Cleaner._instruction(step)} for step in instructions] + + # Dictionaries (let's assume it's a HowToStep) in a list + elif type(instructions) == list and type(instructions[0]) == dict: + # Try List of Dictionary without "@type" or "type" + if not instructions[0].get("@type", False) and not instructions[0].get( + "type", False + ): + return [ + {"text": Cleaner._instruction(step["text"])} + for step in instructions + ] + + try: + # If HowToStep is under HowToSection + sectionSteps = [] + for step in instructions: + if step["@type"] == "HowToSection": + [sectionSteps.append(item) for item in step["itemListELement"]] + + if len(sectionSteps) > 0: + return [ + {"text": Cleaner._instruction(step["text"])} + for step in sectionSteps + if step["@type"] == "HowToStep" + ] + + return [ + {"text": Cleaner._instruction(step["text"])} + for step in instructions + if step["@type"] == "HowToStep" + ] + except Exception as e: + # Not "@type", try "type" + try: + return [ + {"text": Cleaner._instruction(step["properties"]["text"])} + for step in instructions + if step["type"].find("HowToStep") > -1 + ] + except: + pass + + else: + raise Exception(f"Unrecognised instruction format: {instructions}") + + @staticmethod + def _instruction(line) -> str: + l = Cleaner.html(line.strip()) + # Some sites erroneously escape their strings on multiple levels + while not l == (l := html.unescape(l)): + pass + return l + + @staticmethod + def ingredient(ingredients: list) -> str: + + return [Cleaner.html(html.unescape(ing)) for ing in ingredients] + + @staticmethod + def yield_amount(yld) -> str: + if type(yld) == list: + return yld[-1] + else: + return yld + + @staticmethod + def time(time_entry) -> str: + if type(time_entry) == type(None): + return None + elif type(time_entry) != str: + return str(time_entry) diff --git a/mealie/services/scraper/open_graph.py b/mealie/services/scraper/open_graph.py new file mode 100644 index 000000000..49d1072ef --- /dev/null +++ b/mealie/services/scraper/open_graph.py @@ -0,0 +1,43 @@ +from typing import Tuple + +import extruct +from app_config import DEBUG_DIR +from slugify import slugify +from w3lib.html import get_base_url + +LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json") + + +def og_field(properties: dict, field_name: str) -> str: + return next((val for name, val in properties if name == field_name), None) + + +def og_fields(properties: list[Tuple[str, str]], field_name: str) -> list[str]: + return list({val for name, val in properties if name == field_name}) + + +def basic_recipe_from_opengraph(html: str, url: str) -> dict: + base_url = get_base_url(html, url) + data = extruct.extract(html, base_url=base_url) + try: + properties = data["opengraph"][0]["properties"] + except: + return + + return { + "name": og_field(properties, "og:title"), + "description": og_field(properties, "og:description"), + "image": og_field(properties, "og:image"), + "recipeYield": "", + # FIXME: If recipeIngredient is an empty list, mongodb's data verification fails. + "recipeIngredient": ["Could not detect ingredients"], + # FIXME: recipeInstructions is allowed to be empty but message this is added for user sanity. + "recipeInstructions": [{"text": "Could not detect instructions"}], + "slug": slugify(og_field(properties, "og:title")), + "orgURL": og_field(properties, "og:url"), + "categories": [], + "tags": og_fields(properties, "og:article:tag"), + "dateAdded": None, + "notes": [], + "extras": [], + } diff --git a/mealie/services/scraper/scraper.py b/mealie/services/scraper/scraper.py new file mode 100644 index 000000000..a13ed79e2 --- /dev/null +++ b/mealie/services/scraper/scraper.py @@ -0,0 +1,84 @@ +import json +from typing import List + +import requests +import scrape_schema_recipe +from app_config import DEBUG_DIR +from services.image_services import scrape_image +from services.recipe_services import Recipe +from services.scraper import open_graph +from services.scraper.cleaner import Cleaner +from utils.logger import logger + +LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json") + + +def create_from_url(url: str) -> Recipe: + """Main entry point for generating a recipe from a URL. Pass in a URL and + a Recipe object will be returned if successful. + + Args: + url (str): a valid string representing a URL + + Returns: + Recipe: Recipe Object + """ + r = requests.get(url) + new_recipe = extract_recipe_from_html(r.text, url) + new_recipe = Cleaner.clean(new_recipe) + new_recipe = download_image_for_recipe(new_recipe) + + recipe = Recipe(**new_recipe) + + return recipe + + +def extract_recipe_from_html(html: str, url: str) -> dict: + try: + scraped_recipes: List[dict] = scrape_schema_recipe.loads( + html, python_objects=True + ) + dump_last_json(scraped_recipes) + + if not scraped_recipes: + scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url( + url, python_objects=True + ) + except Exception as e: + # trying without python_objects + scraped_recipes: List[dict] = scrape_schema_recipe.loads(html) + dump_last_json(scraped_recipes) + + if not scraped_recipes: + scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url) + + if scraped_recipes: + new_recipe: dict = scraped_recipes[0] + logger.info(f"Recipe Scraped From Web: {new_recipe}") + + if not new_recipe: + return "fail" # TODO: Return Better Error Here + + new_recipe = Cleaner.clean(new_recipe, url) + else: + new_recipe = open_graph.basic_recipe_from_opengraph(html, url) + logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}") + + return new_recipe + + +def download_image_for_recipe(recipe: dict) -> dict: + try: + img_path = scrape_image(recipe.get("image"), recipe.get("slug")) + recipe["image"] = img_path.name + except: + recipe["image"] = "no image" + + return recipe + + +def dump_last_json(recipe_data: dict): + with open(LAST_JSON, "w") as f: + f.write(json.dumps(recipe_data, indent=4, default=str)) + + return diff --git a/mealie/tests/test_services/test_scraper/test_scraper.py b/mealie/tests/test_services/test_scraper/test_scraper.py index 817db7535..c348bb5bd 100644 --- a/mealie/tests/test_services/test_scraper/test_scraper.py +++ b/mealie/tests/test_services/test_scraper/test_scraper.py @@ -2,11 +2,8 @@ import json import re import pytest -from services.scrape_services import ( - extract_recipe_from_html, - normalize_data, - normalize_instructions, -) +from services.scraper.cleaner import Cleaner +from services.scraper.scraper import extract_recipe_from_html from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES # https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45 @@ -42,7 +39,7 @@ url_validation_regex = re.compile( ], ) def test_normalize_data(json_file, num_steps): - recipe_data = normalize_data(json.load(open(TEST_RAW_RECIPES.joinpath(json_file)))) + recipe_data = Cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file)))) assert len(recipe_data["recipeInstructions"]) == num_steps @@ -58,7 +55,7 @@ def test_normalize_data(json_file, num_steps): ], ) def test_normalize_instructions(instructions): - assert normalize_instructions(instructions) == [ + assert Cleaner.instructions(instructions) == [ {"text": "A"}, {"text": "B"}, {"text": "C"},