mirror of
https://github.com/hay-kot/mealie.git
synced 2025-08-22 14:33:33 -07:00
parent
98f5439907
commit
d8f226092e
1 changed files with 40 additions and 22 deletions
|
@ -40,11 +40,7 @@ def normalize_instructions(instructions) -> List[dict]:
|
||||||
|
|
||||||
# One long string split by (possibly multiple) new lines
|
# One long string split by (possibly multiple) new lines
|
||||||
if type(instructions) == str:
|
if type(instructions) == str:
|
||||||
return [
|
return [{"text": normalize_instruction(line)} for line in instructions.splitlines() if line]
|
||||||
{"text": normalize_instruction(line)}
|
|
||||||
for line in instructions.splitlines()
|
|
||||||
if line
|
|
||||||
]
|
|
||||||
|
|
||||||
# Plain strings in a list
|
# Plain strings in a list
|
||||||
elif type(instructions) == list and type(instructions[0]) == str:
|
elif type(instructions) == list and type(instructions[0]) == str:
|
||||||
|
@ -52,11 +48,31 @@ def normalize_instructions(instructions) -> List[dict]:
|
||||||
|
|
||||||
# Dictionaries (let's assume it's a HowToStep) in a list
|
# Dictionaries (let's assume it's a HowToStep) in a list
|
||||||
elif type(instructions) == list and type(instructions[0]) == dict:
|
elif type(instructions) == list and type(instructions[0]) == dict:
|
||||||
return [
|
try:
|
||||||
{"text": normalize_instruction(step["text"])}
|
# If HowToStep is under HowToSection
|
||||||
for step in instructions
|
sectionSteps = []
|
||||||
if step["@type"] == "HowToStep"
|
for step in instructions:
|
||||||
]
|
if step["@type"] == "HowToSection":
|
||||||
|
for item in step["itemListElement"]:
|
||||||
|
sectionSteps.append(item)
|
||||||
|
|
||||||
|
if len(sectionSteps) > 0:
|
||||||
|
return [
|
||||||
|
{"text": normalize_instruction(step["text"])}
|
||||||
|
for step in sectionSteps
|
||||||
|
if step["@type"] == "HowToStep"
|
||||||
|
]
|
||||||
|
|
||||||
|
return [
|
||||||
|
{"text": normalize_instruction(step["text"])} for step in instructions if step["@type"] == "HowToStep"
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
# Not "@type", try "type"
|
||||||
|
return [
|
||||||
|
{"text": normalize_instruction(step["properties"]["text"])}
|
||||||
|
for step in instructions
|
||||||
|
if step["type"].find("HowToStep") > -1
|
||||||
|
]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Unrecognised instruction format: {instructions}")
|
raise Exception(f"Unrecognised instruction format: {instructions}")
|
||||||
|
@ -95,12 +111,8 @@ def normalize_data(recipe_data: dict) -> dict:
|
||||||
recipe_data["prepTime"] = normalize_time(recipe_data.get("prepTime"))
|
recipe_data["prepTime"] = normalize_time(recipe_data.get("prepTime"))
|
||||||
recipe_data["performTime"] = normalize_time(recipe_data.get("performTime"))
|
recipe_data["performTime"] = normalize_time(recipe_data.get("performTime"))
|
||||||
recipe_data["recipeYield"] = normalize_yield(recipe_data.get("recipeYield"))
|
recipe_data["recipeYield"] = normalize_yield(recipe_data.get("recipeYield"))
|
||||||
recipe_data["recipeIngredient"] = normalize_ingredient(
|
recipe_data["recipeIngredient"] = normalize_ingredient(recipe_data.get("recipeIngredient"))
|
||||||
recipe_data.get("recipeIngredient")
|
recipe_data["recipeInstructions"] = normalize_instructions(recipe_data["recipeInstructions"])
|
||||||
)
|
|
||||||
recipe_data["recipeInstructions"] = normalize_instructions(
|
|
||||||
recipe_data["recipeInstructions"]
|
|
||||||
)
|
|
||||||
recipe_data["image"] = normalize_image_url(recipe_data["image"])
|
recipe_data["image"] = normalize_image_url(recipe_data["image"])
|
||||||
return recipe_data
|
return recipe_data
|
||||||
|
|
||||||
|
@ -123,13 +135,19 @@ def process_recipe_data(new_recipe: dict, url=None) -> dict:
|
||||||
|
|
||||||
|
|
||||||
def extract_recipe_from_html(html: str, url: str) -> dict:
|
def extract_recipe_from_html(html: str, url: str) -> dict:
|
||||||
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html, python_objects=True)
|
try:
|
||||||
dump_last_json(scraped_recipes)
|
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html, python_objects=True)
|
||||||
|
dump_last_json(scraped_recipes)
|
||||||
|
|
||||||
if not scraped_recipes:
|
if not scraped_recipes:
|
||||||
scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(
|
scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url, python_objects=True)
|
||||||
url, python_objects=True
|
except Exception as e:
|
||||||
)
|
# trying without python_objects
|
||||||
|
scraped_recipes: List[dict] = scrape_schema_recipe.loads(html)
|
||||||
|
dump_last_json(scraped_recipes)
|
||||||
|
|
||||||
|
if not scraped_recipes:
|
||||||
|
scraped_recipes: List[dict] = scrape_schema_recipe.scrape_url(url)
|
||||||
|
|
||||||
if scraped_recipes:
|
if scraped_recipes:
|
||||||
new_recipe: dict = scraped_recipes[0]
|
new_recipe: dict = scraped_recipes[0]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue