mirror of
https://github.com/hay-kot/mealie.git
synced 2025-08-22 14:33:33 -07:00
fixed instrucitons on parse
This commit is contained in:
parent
8d0604da3a
commit
9efd9399d9
3 changed files with 69 additions and 123 deletions
|
@ -1,91 +1,16 @@
|
|||
{
|
||||
"@context": "http://schema.org/",
|
||||
"@type": "Recipe",
|
||||
"name": "Jalape\u00f1o Popper Dip",
|
||||
"author": {
|
||||
"@type": "Person",
|
||||
"name": "Michelle"
|
||||
},
|
||||
"description": "Jalapeno Popper Dip is creamy, cheesy and has just the perfect amount of kick. Great appetizer for your next party or watching the big game!",
|
||||
"datePublished": "2016-02-22 00:01:37+00:00",
|
||||
"image": "jalapeno-popper-dip.jpg",
|
||||
"recipeYield": [
|
||||
"10",
|
||||
"10 to 12 servings"
|
||||
],
|
||||
"prepTime": "0:15:00",
|
||||
"cookTime": "0:30:00",
|
||||
"totalTime": "0:45:00",
|
||||
"name": "Carottes Rapp\u00e9s with Rice and Sunflower Seeds \u2014 FEED THE SWIMMERS",
|
||||
"description": " Carottes R\u00e2p\u00e9es with Rice and Sunflower Seeds thanks to @think_rice and @thefeedfeed. Carottes R\u00e2p\u00e9es is a classic French Salad found ready to go (think picnic) at every charcuterie and on most cafe menus. This is one of those insanely simple salads that explode with flavor! The carrots ar",
|
||||
"image": "carottes-rappes-with-rice-and-sunflower-seeds-feed-the-swimmers.JPG?format=1500w",
|
||||
"recipeYield": "",
|
||||
"recipeIngredient": [
|
||||
"16 ounces cream cheese (at room temperature)",
|
||||
"1 cup mayonnaise",
|
||||
"8 pieces of bacon (cooked and chopped)",
|
||||
"6 jalape\u00f1os (seeded and minced (if you can't get fresh, substitute a 4-ounce can diced jalape\u00f1o peppers, drained))",
|
||||
"2 cloves garlic (minced)",
|
||||
"\u00bd teaspoon cumin",
|
||||
"6 ounces cheddar cheese (shredded (about 1\u00bd cups))",
|
||||
"1 cup panko breadcrumbs",
|
||||
"1 cup grated Parmesan cheese",
|
||||
"4 tablespoons unsalted butter, melted"
|
||||
"Could not detect ingredients"
|
||||
],
|
||||
"recipeInstructions": [
|
||||
{
|
||||
"@type": "HowToStep",
|
||||
"text": "Preheat oven to 375 degrees F.",
|
||||
"name": "Preheat oven to 375 degrees F.",
|
||||
"url": "https://www.browneyedbaker.com/jalapeno-popper-dip/#wprm-recipe-44993-step-0-0"
|
||||
},
|
||||
{
|
||||
"@type": "HowToStep",
|
||||
"text": "Combine the cream cheese, mayonnaise, bacon, jalapenos, garlic, cumin and cheddar cheese in a mixing bowl. Transfer the mixture into 2-quart baking dish.",
|
||||
"name": "Combine the cream cheese, mayonnaise, bacon, jalapenos, garlic, cumin and cheddar cheese in a mixing bowl. Transfer the mixture into 2-quart baking dish.",
|
||||
"url": "https://www.browneyedbaker.com/jalapeno-popper-dip/#wprm-recipe-44993-step-0-1"
|
||||
},
|
||||
{
|
||||
"@type": "HowToStep",
|
||||
"text": "Combine the panko breadcrumbs, Parmesan cheese and melted butter in a small bowl, tossing with a fork until the mixture is evenly moistened. Sprinkle evenly over the cream cheese mixture.",
|
||||
"name": "Combine the panko breadcrumbs, Parmesan cheese and melted butter in a small bowl, tossing with a fork until the mixture is evenly moistened. Sprinkle evenly over the cream cheese mixture.",
|
||||
"url": "https://www.browneyedbaker.com/jalapeno-popper-dip/#wprm-recipe-44993-step-0-2"
|
||||
},
|
||||
{
|
||||
"@type": "HowToStep",
|
||||
"text": "Bake in the preheated oven for 25 to 30 minutes, until the top is golden brown and the dip is bubbling. Let rest for 5 minutes before serving. Serve with your favorite tortilla chips, crackers, vegetables, etc.",
|
||||
"name": "Bake in the preheated oven for 25 to 30 minutes, until the top is golden brown and the dip is bubbling. Let rest for 5 minutes before serving. Serve with your favorite tortilla chips, crackers, vegetables, etc.",
|
||||
"url": "https://www.browneyedbaker.com/jalapeno-popper-dip/#wprm-recipe-44993-step-0-3"
|
||||
}
|
||||
"Could not detect instructions"
|
||||
],
|
||||
"aggregateRating": {
|
||||
"@type": "AggregateRating",
|
||||
"ratingValue": "4.34",
|
||||
"ratingCount": "15"
|
||||
},
|
||||
"recipeCategory": [
|
||||
"Appetizer"
|
||||
],
|
||||
"recipeCuisine": [
|
||||
"American"
|
||||
],
|
||||
"keywords": "cheese dip, game day food, party food",
|
||||
"nutrition": {
|
||||
"@type": "NutritionInformation",
|
||||
"calories": "560 kcal",
|
||||
"carbohydrateContent": "7 g",
|
||||
"proteinContent": "14 g",
|
||||
"fatContent": "52 g",
|
||||
"saturatedFatContent": "21 g",
|
||||
"cholesterolContent": "109 mg",
|
||||
"sodiumContent": "707 mg",
|
||||
"sugarContent": "2 g",
|
||||
"servingSize": "1 serving"
|
||||
},
|
||||
"@id": "https://www.browneyedbaker.com/jalapeno-popper-dip/#recipe",
|
||||
"isPartOf": {
|
||||
"@id": "https://www.browneyedbaker.com/jalapeno-popper-dip/#article"
|
||||
},
|
||||
"mainEntityOfPage": "https://www.browneyedbaker.com/jalapeno-popper-dip/#webpage",
|
||||
"url": "https://www.browneyedbaker.com/jalapeno-popper-dip/",
|
||||
"slug": "jalapeno-popper-dip",
|
||||
"orgURL": "http://www.browneyedbaker.com/2011/08/03/jalapeno-popper-dip/",
|
||||
"slug": "carottes-rappes-with-rice-and-sunflower-seeds-feed-the-swimmers",
|
||||
"orgURL": "https://www.feedtheswimmers.com/blog/2019/6/5/carottes-rapps-with-rice-and-sunflower-seeds",
|
||||
"categories": [],
|
||||
"tags": [],
|
||||
"dateAdded": null,
|
||||
|
|
|
@ -1,15 +1,13 @@
|
|||
from typing import List, Tuple
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from typing import List, Tuple
|
||||
|
||||
import extruct
|
||||
import requests
|
||||
from w3lib.html import get_base_url
|
||||
import scrape_schema_recipe
|
||||
from slugify import slugify
|
||||
from utils.logger import logger
|
||||
from w3lib.html import get_base_url
|
||||
|
||||
from services.image_services import scrape_image
|
||||
from services.recipe_services import Recipe
|
||||
|
@ -85,7 +83,7 @@ def process_recipe_data(new_recipe: dict, url=None) -> dict:
|
|||
return new_recipe
|
||||
|
||||
|
||||
def extract_recipe_from_html(html:str, url: str) -> dict:
|
||||
def extract_recipe_from_html(html: str, url: str) -> dict:
|
||||
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html, python_objects=True)
|
||||
if scraped_recipes:
|
||||
new_recipe: dict = scraped_recipes[0]
|
||||
|
@ -116,13 +114,15 @@ def download_image_for_recipe(recipe: dict) -> dict:
|
|||
def og_field(properties: dict, field_name: str) -> str:
|
||||
return next((val for name, val in properties if name == field_name), None)
|
||||
|
||||
|
||||
def og_fields(properties: List[Tuple[str, str]], field_name: str) -> List[str]:
|
||||
return list({val for name, val in properties if name == field_name})
|
||||
|
||||
|
||||
def basic_recipe_from_opengraph(html: str, url: str) -> dict:
|
||||
base_url = get_base_url(html, url)
|
||||
data = extruct.extract(html, base_url=base_url)
|
||||
properties = data["opengraph"][0]['properties']
|
||||
properties = data["opengraph"][0]["properties"]
|
||||
return {
|
||||
"name": og_field(properties, "og:title"),
|
||||
"description": og_field(properties, "og:description"),
|
||||
|
@ -131,7 +131,7 @@ def basic_recipe_from_opengraph(html: str, url: str) -> dict:
|
|||
# FIXME: If recipeIngredient is an empty list, mongodb's data verification fails.
|
||||
"recipeIngredient": ["Could not detect ingredients"],
|
||||
# FIXME: recipeInstructions is allowed to be empty but message this is added for user sanity.
|
||||
"recipeInstructions": ["Could not detect instructions"],
|
||||
"recipeInstructions": [{"text": "Could not detect instructions"}],
|
||||
"slug": slugify(og_field(properties, "og:title")),
|
||||
"orgURL": og_field(properties, "og:url"),
|
||||
"categories": [],
|
||||
|
|
|
@ -3,7 +3,11 @@ import re
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from services.scrape_services import normalize_data, normalize_instructions, extract_recipe_from_html
|
||||
from services.scrape_services import (
|
||||
extract_recipe_from_html,
|
||||
normalize_data,
|
||||
normalize_instructions,
|
||||
)
|
||||
|
||||
CWD = Path(__file__).parent
|
||||
RAW_RECIPE_DIR = CWD.joinpath("data", "recipes-raw")
|
||||
|
@ -11,42 +15,58 @@ RAW_HTML_DIR = CWD.joinpath("data", "html-raw")
|
|||
|
||||
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
|
||||
url_validation_regex = re.compile(
|
||||
r'^(?:http|ftp)s?://' # http:// or https://
|
||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
|
||||
r'localhost|' #localhost...
|
||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
|
||||
r'(?::\d+)?' # optional port
|
||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||
r"^(?:http|ftp)s?://" # http:// or https://
|
||||
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # domain...
|
||||
r"localhost|" # localhost...
|
||||
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
|
||||
r"(?::\d+)?" # optional port
|
||||
r"(?:/?|[/?]\S+)$",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("json_file,num_steps", [
|
||||
("best-homemade-salsa-recipe.json", 2),
|
||||
("blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json", 3),
|
||||
("bon_appetit.json", 8),
|
||||
("chunky-apple-cake.json", 4),
|
||||
("dairy-free-impossible-pumpkin-pie.json", 7),
|
||||
("how-to-make-instant-pot-spaghetti.json", 8),
|
||||
("instant-pot-chicken-and-potatoes.json", 4),
|
||||
("instant-pot-kerala-vegetable-stew.json", 13),
|
||||
("jalapeno-popper-dip.json", 4),
|
||||
("microwave_sweet_potatoes_04783.json", 4),
|
||||
("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
|
||||
("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 3),
|
||||
])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"json_file,num_steps",
|
||||
[
|
||||
("best-homemade-salsa-recipe.json", 2),
|
||||
(
|
||||
"blue-cheese-stuffed-turkey-meatballs-with-raspberry-balsamic-glaze-2.json",
|
||||
3,
|
||||
),
|
||||
("bon_appetit.json", 8),
|
||||
("chunky-apple-cake.json", 4),
|
||||
("dairy-free-impossible-pumpkin-pie.json", 7),
|
||||
("how-to-make-instant-pot-spaghetti.json", 8),
|
||||
("instant-pot-chicken-and-potatoes.json", 4),
|
||||
("instant-pot-kerala-vegetable-stew.json", 13),
|
||||
("jalapeno-popper-dip.json", 4),
|
||||
("microwave_sweet_potatoes_04783.json", 4),
|
||||
("moroccan-skirt-steak-with-roasted-pepper-couscous.json", 4),
|
||||
("Pizza-Knoblauch-Champignon-Paprika-vegan.html.json", 3),
|
||||
],
|
||||
)
|
||||
def test_normalize_data(json_file, num_steps):
|
||||
recipe_data = normalize_data(json.load(open(RAW_RECIPE_DIR.joinpath(json_file))))
|
||||
assert len(recipe_data["recipeInstructions"]) == num_steps
|
||||
|
||||
|
||||
@pytest.mark.parametrize("instructions", [
|
||||
"A\n\nB\n\nC\n\n",
|
||||
"A\nB\nC\n",
|
||||
"A\r\n\r\nB\r\n\r\nC\r\n\r\n",
|
||||
"A\r\nB\r\nC\r\n",
|
||||
["A","B","C"],
|
||||
[{"@type": "HowToStep", "text": x} for x in ["A","B","C"]]
|
||||
])
|
||||
@pytest.mark.parametrize(
|
||||
"instructions",
|
||||
[
|
||||
"A\n\nB\n\nC\n\n",
|
||||
"A\nB\nC\n",
|
||||
"A\r\n\r\nB\r\n\r\nC\r\n\r\n",
|
||||
"A\r\nB\r\nC\r\n",
|
||||
["A", "B", "C"],
|
||||
[{"@type": "HowToStep", "text": x} for x in ["A", "B", "C"]],
|
||||
],
|
||||
)
|
||||
def test_normalize_instructions(instructions):
|
||||
assert normalize_instructions(instructions) == [{"text": "A"}, {"text": "B"}, {"text": "C"}]
|
||||
assert normalize_instructions(instructions) == [
|
||||
{"text": "A"},
|
||||
{"text": "B"},
|
||||
{"text": "C"},
|
||||
]
|
||||
|
||||
|
||||
def test_html_no_recipe_data():
|
||||
|
@ -59,8 +79,10 @@ def test_html_no_recipe_data():
|
|||
assert recipe_data["orgURL"] == url
|
||||
assert len(recipe_data["description"]) > 100
|
||||
assert url_validation_regex.match(recipe_data["image"])
|
||||
assert recipe_data["recipeIngredient"] == []
|
||||
assert recipe_data["recipeInstructions"] == []
|
||||
assert recipe_data["recipeIngredient"] == ["Could not detect ingredients"]
|
||||
assert recipe_data["recipeInstructions"] == [
|
||||
{"text": "Could not detect instructions"}
|
||||
]
|
||||
|
||||
|
||||
def test_html_with_recipe_data():
|
||||
|
@ -75,4 +97,3 @@ def test_html_with_recipe_data():
|
|||
assert url_validation_regex.match(recipe_data["image"])
|
||||
assert len(recipe_data["recipeIngredient"]) == 13
|
||||
assert len(recipe_data["recipeInstructions"]) == 4
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue