refactor/scrapper (#175)

* API Endpoint * shopping list added to the UI * fixed category sidebar on mobile * fix category sidebar hidden all the time * adjust mobile view on times * remove console.logs * actually remove console.logs * Fixed varying card height on smaller screens * change style of meal planner categories * bug/fixed open search on '/' when on input * Improved import summary dialog * url validation * refactor/split reciper scraper into seperate fiels Co-authored-by: hay-kot <hay-kot@pm.me>
2025-08-22 06:23:34 -07:00 · 2021-02-20 13:58:06 -09:00 · 2021-02-20 13:58:06 -09:00 · a48547126a
commit a48547126a
parent 7f3ceb9377
17 changed files with 593 additions and 295 deletions
--- a/frontend/src/App.vue
+++ b/frontend/src/App.vue
@ -63,7 +63,7 @@ export default {
  },
  created() {
    window.addEventListener("keyup", e => {
-      if (e.key == "/") {
+      if (e.key == "/" && !document.activeElement.id.startsWith('input') ) {
        this.search = !this.search;
      }
    });
--- a/frontend/src/api/backup.js
+++ b/frontend/src/api/backup.js
@ -8,9 +8,9 @@ const backupURLs = {
  // Backup
  available: `${backupBase}available`,
  createBackup: `${backupBase}export/database`,
-  importBackup: (fileName) => `${backupBase}${fileName}/import`,
-  deleteBackup: (fileName) => `${backupBase}${fileName}/delete`,
-  downloadBackup: (fileName) => `${backupBase}${fileName}/download`,
+  importBackup: fileName => `${backupBase}${fileName}/import`,
+  deleteBackup: fileName => `${backupBase}${fileName}/delete`,
+  downloadBackup: fileName => `${backupBase}${fileName}/download`,
 };

 export default {
--- a/frontend/src/components/Settings/Backup/AvailableBackupCard.vue
+++ b/frontend/src/components/Settings/Backup/AvailableBackupCard.vue
@ -69,10 +69,9 @@ export default {
      this.$emit("loading");
      let response = await api.backups.import(data.name, data);

-      let failed = response.data.failed;
-      let succesful = response.data.successful;
+      let importData = response.data;

-      this.$emit("finished", succesful, failed);
+      this.$emit("finished", importData);
    },
    deleteBackup(data) {
      this.$emit("loading");
--- a/frontend/src/components/Settings/Backup/ImportSummaryDialog/DataTable.vue
+++ b/frontend/src/components/Settings/Backup/ImportSummaryDialog/DataTable.vue
@ -0,0 +1,47 @@
+<template>
+  <div>
+    <v-data-table
+      dense
+      :headers="dataHeaders"
+      :items="dataSet"
+      item-key="name"
+      class="elevation-1 mt-2"
+      show-expand
+      :expanded.sync="expanded"
+      :footer-props="{
+        'items-per-page-options': [100, 200, 300, 400, -1],
+      }"
+      :items-per-page="100"
+    >
+      <template v-slot:item.status="{ item }">
+        <div :class="item.status ? 'success--text' : 'error--text'">
+          {{ item.status ? "Imported" : "Failed" }}
+        </div>
+      </template>
+
+      <template v-slot:expanded-item="{ headers, item }">
+        <td :colspan="headers.length">
+          <div class="ma-2">
+            {{ item.exception }}
+          </div>
+        </td>
+      </template>
+    </v-data-table>
+  </div>
+</template>
+
+<script>
+export default {
+  props: {
+    dataSet: Array,
+    dataHeaders: Array,
+  },
+  data: () => ({
+    singleExpand: false,
+    expanded: [],
+  }),
+};
+</script>
+
+<style>
+</style>
--- a/frontend/src/components/Settings/Backup/ImportSummaryDialog/index.vue
+++ b/frontend/src/components/Settings/Backup/ImportSummaryDialog/index.vue
@ -0,0 +1,152 @@
+<template>
+  <div class="text-center">
+    <v-dialog v-model="dialog" width="70%">
+      <v-card>
+        <v-card-title> Import Summary </v-card-title>
+        <v-card-text>
+          <v-row class="mb-n9">
+            <v-card flat>
+              <v-card-text>
+                <div>
+                  <h3>Recipes</h3>
+                </div>
+                <div class="success--text">
+                  Success: {{ recipeNumbers.success }}
+                </div>
+                <div class="error--text">
+                  Failed: {{ recipeNumbers.failure }}
+                </div>
+              </v-card-text>
+            </v-card>
+            <v-card flat>
+              <v-card-text>
+                <div>
+                  <h3>Themes</h3>
+                </div>
+                <div class="success--text">
+                  Success: {{ themeNumbers.success }}
+                </div>
+                <div class="error--text">
+                  Failed: {{ themeNumbers.failure }}
+                </div>
+              </v-card-text>
+            </v-card>
+            <v-card flat>
+              <v-card-text>
+                <div>
+                  <h3>Settings</h3>
+                </div>
+                <div class="success--text">
+                  Success: {{ settingsNumbers.success }}
+                </div>
+                <div class="error--text">
+                  Failed: {{ settingsNumbers.failure }}
+                </div>
+              </v-card-text>
+            </v-card>
+          </v-row>
+        </v-card-text>
+        <v-tabs v-model="tab">
+          <v-tab>Recipes</v-tab>
+          <v-tab>Themes</v-tab>
+          <v-tab>Settings</v-tab>
+        </v-tabs>
+        <v-tabs-items v-model="tab">
+          <v-tab-item>
+            <v-card flat>
+              <DataTable :data-headers="recipeHeaders" :data-set="recipeData" />
+            </v-card>
+          </v-tab-item>
+          <v-tab-item>
+            <v-card>
+              <DataTable
+                :data-headers="recipeHeaders"
+                :data-set="themeData"
+              /> </v-card
+          ></v-tab-item>
+          <v-tab-item>
+            <v-card
+              ><DataTable
+                :data-headers="recipeHeaders"
+                :data-set="settingsData"
+              />
+            </v-card>
+          </v-tab-item>
+        </v-tabs-items>
+      </v-card>
+    </v-dialog>
+  </div>
+</template>
+
+<script>
+import DataTable from "./DataTable";
+export default {
+  components: {
+    DataTable,
+  },
+  data: () => ({
+    tab: null,
+    dialog: false,
+    recipeData: [],
+    themeData: [],
+    settingsData: [],
+    recipeHeaders: [
+      {
+        text: "Status",
+        value: "status",
+      },
+      {
+        text: "Name",
+        align: "start",
+        sortable: true,
+        value: "name",
+      },
+
+      { text: "Exception", value: "data-table-expand", align: "center" },
+    ],
+    allDataTables: [],
+  }),
+
+  computed: {
+    recipeNumbers() {
+      let numbers = { success: 0, failure: 0 };
+      this.recipeData.forEach(element => {
+        if (element.status) {
+          numbers.success++;
+        } else numbers.failure++;
+      });
+      return numbers;
+    },
+    settingsNumbers() {
+      let numbers = { success: 0, failure: 0 };
+      this.settingsData.forEach(element => {
+        if (element.status) {
+          numbers.success++;
+        } else numbers.failure++;
+      });
+      return numbers;
+    },
+    themeNumbers() {
+      let numbers = { success: 0, failure: 0 };
+      this.themeData.forEach(element => {
+        if (element.status) {
+          numbers.success++;
+        } else numbers.failure++;
+      });
+      return numbers;
+    },
+  },
+
+  methods: {
+    open(importData) {
+      this.recipeData = importData.recipeImports;
+      this.themeData = importData.themeReport;
+      this.settingsData = importData.settingsReport;
+      this.dialog = true;
+    },
+  },
+};
+</script>
+
+<style>
+</style>
--- a/frontend/src/components/Settings/Backup/index.vue
+++ b/frontend/src/components/Settings/Backup/index.vue
@ -41,6 +41,7 @@
        :failed-header="$t('settings.backup.failed-imports')"
        :failed="failedImports"
      />
+      <ImportSummaryDialog ref="report" :import-data="importData" />
    </v-card-text>
  </v-card>
 </template>
@ -48,6 +49,7 @@
 <script>
 import api from "@/api";
 import SuccessFailureAlert from "../../UI/SuccessFailureAlert";
+import ImportSummaryDialog from "./ImportSummaryDialog";
 import UploadBtn from "../../UI/UploadBtn";
 import AvailableBackupCard from "./AvailableBackupCard";
 import NewBackupCard from "./NewBackupCard";
@ -58,6 +60,7 @@ export default {
    UploadBtn,
    AvailableBackupCard,
    NewBackupCard,
+    ImportSummaryDialog,
  },
  data() {
    return {
@ -65,6 +68,7 @@ export default {
      successfulImports: [],
      backupLoading: false,
      availableBackups: [],
+      importData: [],
    };
  },
  mounted() {
@ -87,12 +91,10 @@ export default {
        this.backupLoading = false;
      }
    },
-    processFinished(successful = null, failed = null) {
+    processFinished(data) {
      this.getAvailableBackups();
      this.backupLoading = false;
-      this.successfulImports = successful;
-      this.failedImports = failed;
-      this.$refs.report.open();
+      this.$refs.report.open(data);
    },
  },
 };
--- a/frontend/src/components/UI/AddRecipeFab.vue
+++ b/frontend/src/components/UI/AddRecipeFab.vue
@ -7,10 +7,13 @@
        </v-card-title>

        <v-card-text>
-          <v-form>
+          <v-form ref="urlForm">
            <v-text-field
              v-model="recipeURL"
              :label="$t('new-recipe.recipe-url')"
+              required
+              validate-on-blur
+              :rules="[isValidWebUrl]"
            ></v-text-field>
          </v-form>

@ -64,18 +67,20 @@ export default {

  methods: {
    async createRecipe() {
-      this.processing = true;
-      let response = await api.recipes.createByURL(this.recipeURL);
-      if (response.status !== 201) {
-        this.error = true;
-        this.processing = false;
-        return;
-      }
+      if (this.$refs.urlForm.validate()) {
+        this.processing = true;
+        let response = await api.recipes.createByURL(this.recipeURL);
+        if (response.status !== 201) {
+          this.error = true;
+          this.processing = false;
+          return;
+        }

-      this.addRecipe = false;
-      this.processing = false;
-      this.recipeURL = "";
-      this.$router.push(`/recipe/${response.data}`);
+        this.addRecipe = false;
+        this.processing = false;
+        this.recipeURL = "";
+        this.$router.push(`/recipe/${response.data}`);
+      }
    },

    navCreate() {
@ -89,6 +94,10 @@ export default {
      this.recipeURL = "";
      this.processing = false;
    },
+    isValidWebUrl(url) {
+      let regEx = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)$/gm;
+      return regEx.test(url) ? true : "Must be a Valid URL";
+    },
  },
 };
 </script>
--- a/mealie/models/import_models.py
+++ b/mealie/models/import_models.py
@ -0,0 +1,20 @@
+from typing import Optional
+
+from pydantic.main import BaseModel
+
+
+class RecipeImport(BaseModel):
+    name: Optional[str]
+    slug: str
+    status: bool
+    exception: Optional[str]
+
+class ThemeImport(BaseModel):
+    name: str
+    status: bool
+    exception: Optional[str]
+
+class SettingsImport(BaseModel):
+    name: str
+    status: bool
+    exception: Optional[str]
--- a/mealie/routes/recipe/recipe_crud_routes.py
+++ b/mealie/routes/recipe/recipe_crud_routes.py
@ -1,10 +1,11 @@
 from db.db_setup import generate_session
-from fastapi import APIRouter, Depends, File, Form, HTTPException, Query
+from fastapi import APIRouter, Depends, File, Form, HTTPException
+from fastapi.logger import logger
 from fastapi.responses import FileResponse
 from models.recipe_models import RecipeURLIn
 from services.image_services import read_image, write_image
 from services.recipe_services import Recipe
-from services.scrape_services import create_from_url
+from services.scraper.scraper import create_from_url
 from sqlalchemy.orm.session import Session
 from utils.snackbar import SnackResponse

@ -27,6 +28,7 @@ def parse_recipe_url(url: RecipeURLIn, db: Session = Depends(generate_session)):
    """ Takes in a URL and attempts to scrape data and load it into the database """

    recipe = create_from_url(url.url)
+
    recipe.save_to_db(db)

    return recipe.slug
--- a/mealie/services/backups/imports.py
+++ b/mealie/services/backups/imports.py
@ -1,12 +1,13 @@
 import json
 import shutil
 import zipfile
-from logging import error
+from logging import error, exception
 from pathlib import Path
 from typing import List

 from app_config import BACKUP_DIR, IMG_DIR, TEMP_DIR
 from db.database import db
+from models.import_models import RecipeImport, SettingsImport, ThemeImport
 from models.theme_models import SiteTheme
 from services.recipe_services import Recipe
 from services.settings_services import SiteSettings
@ -57,23 +58,29 @@ class ImportDatabase:
            raise Exception("Import file does not exist")

    def run(self):
-        report = {}
+        recipe_report = []
+        settings_report = []
+        theme_report = []
        if self.imp_recipes:
-            report = self.import_recipes()
+            recipe_report = self.import_recipes()
        if self.imp_settings:
-            self.import_settings()
+            settings_report = self.import_settings()
        if self.imp_themes:
-            self.import_themes()
+            theme_report = self.import_themes()

        self.clean_up()

-        return report if report else None
+        return {
+            "recipeImports": recipe_report,
+            "settingsReport": settings_report,
+            "themeReport": theme_report,
+        }

    def import_recipes(self):
        recipe_dir: Path = self.import_dir.joinpath("recipes")

+        imports = []
        successful_imports = []
-        failed_imports = []

        for recipe in recipe_dir.glob("*.json"):
            with open(recipe, "r") as f:
@ -82,16 +89,27 @@ class ImportDatabase:
            try:
                recipe_obj = Recipe(**recipe_dict)
                recipe_obj.save_to_db(self.session)
+                import_status = RecipeImport(
+                    name=recipe_obj.name, slug=recipe_obj.slug, status=True
+                )
+                imports.append(import_status)
                successful_imports.append(recipe.stem)
                logger.info(f"Imported: {recipe.stem}")
+
            except Exception as inst:
                logger.error(inst)
                logger.info(f"Failed Import: {recipe.stem}")
-                failed_imports.append(recipe.stem)
+                import_status = RecipeImport(
+                    name=recipe.stem,
+                    slug=recipe.stem,
+                    status=False,
+                    exception=str(inst),
+                )
+                imports.append(import_status)

        self._import_images(successful_imports)

-        return {"successful": successful_imports, "failed": failed_imports}
+        return imports

    @staticmethod
    def _recipe_migration(recipe_dict: dict) -> dict:
@ -130,7 +148,7 @@ class ImportDatabase:

    def import_themes(self):
        themes_file = self.import_dir.joinpath("themes", "themes.json")
-
+        theme_imports = []
        with open(themes_file, "r") as f:
            themes: list[dict] = json.loads(f.read())
        for theme in themes:
@ -138,17 +156,38 @@ class ImportDatabase:
                continue
            new_theme = SiteTheme(**theme)
            try:
+
                db.themes.create(self.session, new_theme.dict())
-            except:
+                theme_imports.append(ThemeImport(name=new_theme.name, status=True))
+            except Exception as inst:
                logger.info(f"Unable Import Theme {new_theme.name}")
+                theme_imports.append(
+                    ThemeImport(name=new_theme.name, status=False, exception=str(inst))
+                )
+
+        return theme_imports

    def import_settings(self):
        settings_file = self.import_dir.joinpath("settings", "settings.json")
+        settings_imports = []

        with open(settings_file, "r") as f:
            settings: dict = json.loads(f.read())

-            db.settings.update(self.session, settings.get("name"), settings)
+            name = settings.get("name")
+
+            try:
+                db.settings.update(self.session, name, settings)
+                import_status = SettingsImport(name=name, status=True)
+
+            except Exception as inst:
+                import_status = SettingsImport(
+                    name=name, status=False, exception=str(inst)
+                )
+
+            settings_imports.append(import_status)
+
+        return settings_imports

    def clean_up(self):
        shutil.rmtree(TEMP_DIR)
--- a/mealie/services/migrations/nextcloud.py
+++ b/mealie/services/migrations/nextcloud.py
@ -6,7 +6,7 @@ from pathlib import Path

 from app_config import IMG_DIR, MIGRATION_DIR, TEMP_DIR
 from services.recipe_services import Recipe
-from services.scrape_services import normalize_data, process_recipe_data
+from services.scraper.cleaner import Cleaner
 from app_config import IMG_DIR, TEMP_DIR


@ -34,8 +34,7 @@ def import_recipes(recipe_dir: Path) -> Recipe:
    with open(recipe_file, "r") as f:
        recipe_dict = json.loads(f.read())

-    recipe_dict = process_recipe_data(recipe_dict)
-    recipe_data = normalize_data(recipe_dict)
+    recipe_data = Cleaner.clean(recipe_dict)

    image_name = None
    if image:
--- a/mealie/services/recipe_services.py
+++ b/mealie/services/recipe_services.py
@ -38,8 +38,8 @@ class Recipe(BaseModel):
    tags: Optional[List[str]] = []
    dateAdded: Optional[datetime.date]
    notes: Optional[List[RecipeNote]] = []
-    rating: Optional[int]
-    orgURL: Optional[str]
+    rating: Optional[int] = 0
+    orgURL: Optional[str] = ""
    extras: Optional[dict] = {}

    class Config:
--- a/mealie/services/scrape_services.py
+++ b/mealie/services/scrape_services.py
@ -1,246 +0,0 @@
-import html
-import json
-import re
-from typing import List, Tuple
-
-import extruct
-import requests
-import scrape_schema_recipe
-from app_config import DEBUG_DIR
-from slugify import slugify
-from utils.logger import logger
-from w3lib.html import get_base_url
-
-from services.image_services import scrape_image
-from services.recipe_services import Recipe
-
-LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json")
-
-
-def cleanhtml(raw_html):
-    cleanr = re.compile("<.*?>")
-    cleantext = re.sub(cleanr, "", raw_html)
-    return cleantext
-
-
-def normalize_image_url(image) -> str:
-    if type(image) == list:
-        return image[0]
-    elif type(image) == dict:
-        return image["url"]
-    elif type(image) == str:
-        return image
-    else:
-        raise Exception(f"Unrecognised image URL format: {image}")
-
-
-def normalize_instructions(instructions) -> List[dict]:
-    if not instructions:
-        return []
-
-    # One long string split by (possibly multiple) new lines
-    if type(instructions) == str:
-        return [
-            {"text": normalize_instruction(line)}
-            for line in instructions.splitlines()
-            if line
-        ]
-
-    # Plain strings in a list
-    elif type(instructions) == list and type(instructions[0]) == str:
-        return [{"text": normalize_instruction(step)} for step in instructions]
-
-    # Dictionaries (let's assume it's a HowToStep) in a list
-    elif type(instructions) == list and type(instructions[0]) == dict:
-        try:
-            # If HowToStep is under HowToSection
-            sectionSteps = []
-            for step in instructions:
-                if step["@type"] == "HowToSection":
-                    for item in step["itemListElement"]:
-                        sectionSteps.append(item)
-
-            if len(sectionSteps) > 0:
-                return [
-                    {"text": normalize_instruction(step["text"])}
-                    for step in sectionSteps
-                    if step["@type"] == "HowToStep"
-                ]
-
-            return [
-                {"text": normalize_instruction(step["text"])}
-                for step in instructions
-                if step["@type"] == "HowToStep"
-            ]
-        except Exception as e:
-            # Not "@type", try "type"
-            return [
-                {"text": normalize_instruction(step["properties"]["text"])}
-                for step in instructions
-                if step["type"].find("HowToStep") > -1
-            ]
-
-    else:
-        raise Exception(f"Unrecognised instruction format: {instructions}")
-
-
-def normalize_instruction(line) -> str:
-    l = cleanhtml(line.strip())
-    # Some sites erroneously escape their strings on multiple levels
-    while not l == (l := html.unescape(l)):
-        pass
-    return l
-
-
-def normalize_ingredient(ingredients: list) -> str:
-
-    return [cleanhtml(html.unescape(ing)) for ing in ingredients]
-
-
-def normalize_yield(yld) -> str:
-    if type(yld) == list:
-        return yld[-1]
-    else:
-        return yld
-
-
-def normalize_time(time_entry) -> str:
-    if type(time_entry) == type(None):
-        return None
-    elif type(time_entry) != str:
-        return str(time_entry)
-
-
-def normalize_data(recipe_data: dict) -> dict:
-    recipe_data["totalTime"] = normalize_time(recipe_data.get("totalTime"))
-    recipe_data["description"] = cleanhtml(recipe_data.get("description", ""))
-    recipe_data["prepTime"] = normalize_time(recipe_data.get("prepTime"))
-    recipe_data["performTime"] = normalize_time(recipe_data.get("performTime"))
-    recipe_data["recipeYield"] = normalize_yield(recipe_data.get("recipeYield"))
-    recipe_data["recipeIngredient"] = normalize_ingredient(
-        recipe_data.get("recipeIngredient")
-    )
-    recipe_data["recipeInstructions"] = normalize_instructions(
-        recipe_data["recipeInstructions"]
-    )
-    recipe_data["image"] = normalize_image_url(recipe_data["image"])
-    return recipe_data
-
-
-def process_recipe_data(new_recipe: dict, url=None) -> dict:
-    slug = slugify(new_recipe["name"])
-    mealie_tags = {
-        "slug": slug,
-        "orgURL": url,
-        "categories": [],
-        "tags": [],
-        "dateAdded": None,
-        "notes": [],
-        "extras": [],
-    }
-
-    new_recipe.update(mealie_tags)
-
-    return new_recipe
-
-
-def extract_recipe_from_html(html: str, url: str) -> dict:
-    try:
-        scraped_recipes: List[dict] = scrape_schema_recipe.loads(
-            html, python_objects=True
-        )
-        dump_last_json(scraped_recipes)
-
-        if not scraped_recipes:
-            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
-                url, python_objects=True
-            )
-    except Exception as e:
-        # trying without python_objects
-        scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
-        dump_last_json(scraped_recipes)
-
-        if not scraped_recipes:
-            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)
-
-    if scraped_recipes:
-        new_recipe: dict = scraped_recipes[0]
-        logger.info(f"Recipe Scraped From Web: {new_recipe}")
-
-        if not new_recipe:
-            return "fail"  # TODO: Return Better Error Here
-
-        new_recipe = process_recipe_data(new_recipe, url=url)
-        new_recipe = normalize_data(new_recipe)
-    else:
-        new_recipe = basic_recipe_from_opengraph(html, url)
-        logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")
-
-    return new_recipe
-
-
-def download_image_for_recipe(recipe: dict) -> dict:
-    try:
-        img_path = scrape_image(recipe.get("image"), recipe.get("slug"))
-        recipe["image"] = img_path.name
-    except:
-        recipe["image"] = "no image"
-
-    return recipe
-
-
-def og_field(properties: dict, field_name: str) -> str:
-    return next((val for name, val in properties if name == field_name), None)
-
-
-def og_fields(properties: List[Tuple[str, str]], field_name: str) -> List[str]:
-    return list({val for name, val in properties if name == field_name})
-
-
-def basic_recipe_from_opengraph(html: str, url: str) -> dict:
-    base_url = get_base_url(html, url)
-    data = extruct.extract(html, base_url=base_url)
-    try:
-        properties = data["opengraph"][0]["properties"]
-    except:
-        return
-
-    return {
-        "name": og_field(properties, "og:title"),
-        "description": og_field(properties, "og:description"),
-        "image": og_field(properties, "og:image"),
-        "recipeYield": "",
-        # FIXME: If recipeIngredient is an empty list, mongodb's data verification fails.
-        "recipeIngredient": ["Could not detect ingredients"],
-        # FIXME: recipeInstructions is allowed to be empty but message this is added for user sanity.
-        "recipeInstructions": [{"text": "Could not detect instructions"}],
-        "slug": slugify(og_field(properties, "og:title")),
-        "orgURL": og_field(properties, "og:url"),
-        "categories": [],
-        "tags": og_fields(properties, "og:article:tag"),
-        "dateAdded": None,
-        "notes": [],
-        "extras": [],
-    }
-
-
-def dump_last_json(recipe_data: dict):
-    with open(LAST_JSON, "w") as f:
-        f.write(json.dumps(recipe_data, indent=4, default=str))
-
-    return
-
-
-def process_recipe_url(url: str) -> dict:
-    r = requests.get(url)
-    new_recipe = extract_recipe_from_html(r.text, url)
-    new_recipe = download_image_for_recipe(new_recipe)
-    return new_recipe
-
-
-def create_from_url(url: str) -> Recipe:
-    recipe_data = process_recipe_url(url)
-
-    recipe = Recipe(**recipe_data)
-
-    return recipe
--- a/mealie/services/scraper/cleaner.py
+++ b/mealie/services/scraper/cleaner.py
@ -0,0 +1,151 @@
+import html
+import re
+from typing import List
+
+from slugify import slugify
+
+
+class Cleaner:
+    """A Namespace for utility function to clean recipe data extracted
+    from a url and returns a dictionary that is ready for import into
+    the database. Cleaner.clean is the main entrypoint
+
+
+    """
+
+    @staticmethod
+    def clean(recipe_data: dict, url=None) -> dict:
+        print(recipe_data)
+        """Main entrypoint to clean a recipe extracted from the web
+        and format the data into an accectable format for the database
+
+        Args:
+            recipe_data (dict): raw recipe dicitonary
+
+        Returns:
+            dict: cleaned recipe dictionary
+        """
+        recipe_data["totalTime"] = Cleaner.time(recipe_data.get("totalTime"))
+        recipe_data["description"] = Cleaner.html(recipe_data.get("description", ""))
+        recipe_data["prepTime"] = Cleaner.time(recipe_data.get("prepTime"))
+        recipe_data["performTime"] = Cleaner.time(recipe_data.get("performTime"))
+        recipe_data["recipeYield"] = Cleaner.yield_amount(
+            recipe_data.get("recipeYield")
+        )
+        recipe_data["recipeIngredient"] = Cleaner.ingredient(
+            recipe_data.get("recipeIngredient")
+        )
+        recipe_data["recipeInstructions"] = Cleaner.instructions(
+            recipe_data["recipeInstructions"]
+        )
+        recipe_data["image"] = Cleaner.image(recipe_data["image"])
+        recipe_data["slug"] = slugify(recipe_data["name"])
+        recipe_data["orgURL"] = url
+
+        return recipe_data
+
+    @staticmethod
+    def html(raw_html):
+        cleanr = re.compile("<.*?>")
+        cleantext = re.sub(cleanr, "", raw_html)
+        return cleantext
+
+    @staticmethod
+    def image(image) -> str:
+        if type(image) == list:
+            return image[0]
+        elif type(image) == dict:
+            return image["url"]
+        elif type(image) == str:
+            return image
+        else:
+            raise Exception(f"Unrecognised image URL format: {image}")
+
+    @staticmethod
+    def instructions(instructions) -> List[dict]:
+        if not instructions:
+            return []
+
+        # One long string split by (possibly multiple) new lines
+        print(instructions)
+        if type(instructions) == str:
+            return [
+                {"text": Cleaner._instruction(line)}
+                for line in instructions.splitlines()
+                if line
+            ]
+
+        # Plain strings in a list
+        elif type(instructions) == list and type(instructions[0]) == str:
+            return [{"text": Cleaner._instruction(step)} for step in instructions]
+
+        # Dictionaries (let's assume it's a HowToStep) in a list
+        elif type(instructions) == list and type(instructions[0]) == dict:
+            # Try List of Dictionary without "@type" or "type"
+            if not instructions[0].get("@type", False) and not instructions[0].get(
+                "type", False
+            ):
+                return [
+                    {"text": Cleaner._instruction(step["text"])}
+                    for step in instructions
+                ]
+
+            try:
+                # If HowToStep is under HowToSection
+                sectionSteps = []
+                for step in instructions:
+                    if step["@type"] == "HowToSection":
+                        [sectionSteps.append(item) for item in step["itemListELement"]]
+
+                if len(sectionSteps) > 0:
+                    return [
+                        {"text": Cleaner._instruction(step["text"])}
+                        for step in sectionSteps
+                        if step["@type"] == "HowToStep"
+                    ]
+
+                return [
+                    {"text": Cleaner._instruction(step["text"])}
+                    for step in instructions
+                    if step["@type"] == "HowToStep"
+                ]
+            except Exception as e:
+                # Not "@type", try "type"
+                try:
+                    return [
+                        {"text": Cleaner._instruction(step["properties"]["text"])}
+                        for step in instructions
+                        if step["type"].find("HowToStep") > -1
+                    ]
+                except:
+                    pass
+
+        else:
+            raise Exception(f"Unrecognised instruction format: {instructions}")
+
+    @staticmethod
+    def _instruction(line) -> str:
+        l = Cleaner.html(line.strip())
+        # Some sites erroneously escape their strings on multiple levels
+        while not l == (l := html.unescape(l)):
+            pass
+        return l
+
+    @staticmethod
+    def ingredient(ingredients: list) -> str:
+
+        return [Cleaner.html(html.unescape(ing)) for ing in ingredients]
+
+    @staticmethod
+    def yield_amount(yld) -> str:
+        if type(yld) == list:
+            return yld[-1]
+        else:
+            return yld
+
+    @staticmethod
+    def time(time_entry) -> str:
+        if type(time_entry) == type(None):
+            return None
+        elif type(time_entry) != str:
+            return str(time_entry)
--- a/mealie/services/scraper/open_graph.py
+++ b/mealie/services/scraper/open_graph.py
@ -0,0 +1,43 @@
+from typing import Tuple
+
+import extruct
+from app_config import DEBUG_DIR
+from slugify import slugify
+from w3lib.html import get_base_url
+
+LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json")
+
+
+def og_field(properties: dict, field_name: str) -> str:
+    return next((val for name, val in properties if name == field_name), None)
+
+
+def og_fields(properties: list[Tuple[str, str]], field_name: str) -> list[str]:
+    return list({val for name, val in properties if name == field_name})
+
+
+def basic_recipe_from_opengraph(html: str, url: str) -> dict:
+    base_url = get_base_url(html, url)
+    data = extruct.extract(html, base_url=base_url)
+    try:
+        properties = data["opengraph"][0]["properties"]
+    except:
+        return
+
+    return {
+        "name": og_field(properties, "og:title"),
+        "description": og_field(properties, "og:description"),
+        "image": og_field(properties, "og:image"),
+        "recipeYield": "",
+        # FIXME: If recipeIngredient is an empty list, mongodb's data verification fails.
+        "recipeIngredient": ["Could not detect ingredients"],
+        # FIXME: recipeInstructions is allowed to be empty but message this is added for user sanity.
+        "recipeInstructions": [{"text": "Could not detect instructions"}],
+        "slug": slugify(og_field(properties, "og:title")),
+        "orgURL": og_field(properties, "og:url"),
+        "categories": [],
+        "tags": og_fields(properties, "og:article:tag"),
+        "dateAdded": None,
+        "notes": [],
+        "extras": [],
+    }
--- a/mealie/services/scraper/scraper.py
+++ b/mealie/services/scraper/scraper.py
@ -0,0 +1,84 @@
+import json
+from typing import List
+
+import requests
+import scrape_schema_recipe
+from app_config import DEBUG_DIR
+from services.image_services import scrape_image
+from services.recipe_services import Recipe
+from services.scraper import  open_graph
+from services.scraper.cleaner import Cleaner
+from utils.logger import logger
+
+LAST_JSON = DEBUG_DIR.joinpath("last_recipe.json")
+
+
+def create_from_url(url: str) -> Recipe:
+    """Main entry point for generating a recipe from a URL. Pass in a URL and
+    a Recipe object will be returned if successful.
+
+    Args:
+        url (str): a valid string representing a URL
+
+    Returns:
+        Recipe: Recipe Object
+    """
+    r = requests.get(url)
+    new_recipe = extract_recipe_from_html(r.text, url)
+    new_recipe = Cleaner.clean(new_recipe)
+    new_recipe = download_image_for_recipe(new_recipe)
+
+    recipe = Recipe(**new_recipe)
+
+    return recipe
+
+
+def extract_recipe_from_html(html: str, url: str) -> dict:
+    try:
+        scraped_recipes: List[dict] = scrape_schema_recipe.loads(
+            html, python_objects=True
+        )
+        dump_last_json(scraped_recipes)
+
+        if not scraped_recipes:
+            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
+                url, python_objects=True
+            )
+    except Exception as e:
+        # trying without python_objects
+        scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
+        dump_last_json(scraped_recipes)
+
+        if not scraped_recipes:
+            scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)
+
+    if scraped_recipes:
+        new_recipe: dict = scraped_recipes[0]
+        logger.info(f"Recipe Scraped From Web: {new_recipe}")
+
+        if not new_recipe:
+            return "fail"  # TODO: Return Better Error Here
+
+        new_recipe = Cleaner.clean(new_recipe, url)
+    else:
+        new_recipe = open_graph.basic_recipe_from_opengraph(html, url)
+        logger.info(f"Recipe Scraped from opengraph metadata: {new_recipe}")
+
+    return new_recipe
+
+
+def download_image_for_recipe(recipe: dict) -> dict:
+    try:
+        img_path = scrape_image(recipe.get("image"), recipe.get("slug"))
+        recipe["image"] = img_path.name
+    except:
+        recipe["image"] = "no image"
+
+    return recipe
+
+
+def dump_last_json(recipe_data: dict):
+    with open(LAST_JSON, "w") as f:
+        f.write(json.dumps(recipe_data, indent=4, default=str))
+
+    return
--- a/mealie/tests/test_services/test_scraper/test_scraper.py
+++ b/mealie/tests/test_services/test_scraper/test_scraper.py
@ -2,11 +2,8 @@ import json
 import re

 import pytest
-from services.scrape_services import (
-    extract_recipe_from_html,
-    normalize_data,
-    normalize_instructions,
-)
+from services.scraper.cleaner import Cleaner
+from services.scraper.scraper import extract_recipe_from_html
 from tests.test_config import TEST_RAW_HTML, TEST_RAW_RECIPES

 # https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
@ -42,7 +39,7 @@ url_validation_regex = re.compile(
    ],
 )
 def test_normalize_data(json_file, num_steps):
-    recipe_data = normalize_data(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
+    recipe_data = Cleaner.clean(json.load(open(TEST_RAW_RECIPES.joinpath(json_file))))
    assert len(recipe_data["recipeInstructions"]) == num_steps


@ -58,7 +55,7 @@ def test_normalize_data(json_file, num_steps):
    ],
 )
 def test_normalize_instructions(instructions):
-    assert normalize_instructions(instructions) == [
+    assert Cleaner.instructions(instructions) == [
        {"text": "A"},
        {"text": "B"},
        {"text": "C"},