feat: Migrate from CRF++ to Ingredient Parser (a Python package) (#5061)

This commit is contained in:
Michael Genson 2025-02-28 08:17:28 -06:00 committed by GitHub
parent ec1a9d78ac
commit b12aea8272
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 367 additions and 592 deletions

View file

@ -31,6 +31,4 @@ venv
*/mealie/.temp
/mealie/frontend/
model.crfmodel
crowdin.yml

2
.gitignore vendored
View file

@ -157,10 +157,8 @@ dev/data/backups/dev_sample_data*.zip
dev/data/recipes/*
dev/scripts/output/app_routes.py
dev/scripts/output/javascriptAPI/*
mealie/services/scraper/ingredient_nlp/model.crfmodel
dev/code-generation/generated/openapi.json
dev/code-generation/generated/test_routes.py
mealie/services/parser_services/crfpp/model.crfmodel
lcov.info
dev/code-generation/openapi.json

View file

@ -61,26 +61,11 @@ tasks:
- pyproject.toml
- .pre-commit-config.yaml
setup:model:
desc: setup nlp model
vars:
MODEL_URL: https://github.com/mealie-recipes/nlp-model/releases/download/v1.0.0/model.crfmodel
OUTPUT: ./mealie/services/parser_services/crfpp/model.crfmodel
sources:
# using pyproject.toml as the dependency since this should only ever need to run once
# during setup. There is perhaps a better way to do this.
- ./pyproject.toml
generates:
- ./mealie/services/parser_services/crfpp/model.crfmodel
cmds:
- curl -L0 {{ .MODEL_URL }} --output {{ .OUTPUT }}
setup:
desc: setup all dependencies
deps:
- setup:ui
- setup:py
- setup:model
dev:generate:
desc: run code generators

View file

@ -116,13 +116,6 @@ COPY --from=packages * /dist/
RUN . $VENV_PATH/bin/activate \
&& pip install --require-hashes -r /dist/requirements.txt --find-links /dist
###############################################
# CRFPP Image
###############################################
FROM hkotel/crfpp as crfpp
RUN echo "crfpp-container"
###############################################
# Production Image
###############################################
@ -145,19 +138,9 @@ RUN apt-get update \
# create directory used for Docker Secrets
RUN mkdir -p /run/secrets
# copy CRF++ and add it to the library path
ENV LD_LIBRARY_PATH=/usr/local/lib
COPY --from=crfpp /usr/local/lib/ /usr/local/lib
COPY --from=crfpp /usr/local/bin/crf_learn /usr/local/bin/crf_learn
COPY --from=crfpp /usr/local/bin/crf_test /usr/local/bin/crf_test
# Copy venv into image. It contains a fully-installed mealie backend and frontend.
COPY --from=venv-builder $VENV_PATH $VENV_PATH
# Grab CRF++ Model Release
RUN python -m mealie.scripts.install_model
VOLUME [ "$MEALIE_HOME/data/" ]
ENV APP_PORT=9000

View file

@ -646,7 +646,6 @@
"nextStep": "Next step",
"recipe-actions": "Recipe Actions",
"parser": {
"experimental-alert-text": "Mealie uses natural language processing to parse and create units and food items for your recipe ingredients. This feature is experimental and may not always work as expected. If you prefer not to use the parsed results, you can select 'Cancel' and your changes will not be saved.",
"ingredient-parser": "Ingredient Parser",
"explanation": "To use the ingredient parser, click the 'Parse All' button to start the process. Once the processed ingredients are available, you can review the items and verify that they were parsed correctly. The model's confidence score is displayed on the right of the item title. This score is an average of all the individual scores and may not always be completely accurate.",
"alerts-explainer": "Alerts will be displayed if a matching foods or unit is found but does not exists in the database.",

View file

@ -1,13 +1,6 @@
<template>
<v-container v-if="recipe">
<v-container>
<v-alert dismissible border="left" colored-border type="warning" elevation="2" :icon="$globals.icons.alert">
<b>{{ $tc("banner-experimental.title") }}</b>
<div>
{{ $tc("recipe.parser.experimental-alert-text") }}
</div>
</v-alert>
<BaseCardSectionTitle :title="$tc('recipe.parser.ingredient-parser')">
<div class="mt-4">{{ $tc("recipe.parser.explanation") }}</div>

View file

@ -250,7 +250,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
match_key = match_key or self.primary_key
result = self._query_one(value, match_key)
results_as_model = self.schema.model_validate(result)
result_as_model = self.schema.model_validate(result)
try:
self.session.delete(result)
@ -259,10 +259,10 @@ class RepositoryGeneric(Generic[Schema, Model]):
self.session.rollback()
raise e
return results_as_model
return result_as_model
def delete_many(self, values: Iterable) -> Schema:
query = self._query().filter(self.model.id.in_(values)) # type: ignore
def delete_many(self, values: Iterable) -> list[Schema]:
query = self._query().filter(self.model.id.in_(values))
results = self.session.execute(query).unique().scalars().all()
results_as_model = [self.schema.model_validate(result) for result in results]
@ -277,7 +277,7 @@ class RepositoryGeneric(Generic[Schema, Model]):
self.session.rollback()
raise e
return results_as_model # type: ignore
return results_as_model
def delete_all(self) -> None:
delete(self.model)

View file

@ -1,5 +1,5 @@
import re as re
from collections.abc import Sequence
from collections.abc import Iterable, Sequence
from random import randint
from typing import Self, cast
from uuid import UUID
@ -103,6 +103,51 @@ class RepositoryRecipes(HouseholdRepositoryGeneric[Recipe, RecipeModel]):
if i >= max_retries:
raise
def _delete_recipe(self, recipe: RecipeModel) -> Recipe:
recipe_as_model = self.schema.model_validate(recipe)
# first remove UserToRecipe entries so we don't run into stale data errors
try:
user_to_recipe_delete_query = sa.delete(UserToRecipe).where(UserToRecipe.recipe_id == recipe.id)
self.session.execute(user_to_recipe_delete_query)
self.session.commit()
except Exception:
self.session.rollback()
raise
# remove the recipe
try:
self.session.delete(recipe)
self.session.commit()
except Exception:
self.session.rollback()
raise
return recipe_as_model
def delete(self, value, match_key: str | None = None) -> Recipe:
match_key = match_key or self.primary_key
recipe_in_db = self._query_one(value, match_key)
return self._delete_recipe(recipe_in_db)
def delete_many(self, values: Iterable) -> list[Recipe]:
query = self._query().filter(self.model.id.in_(values))
recipes_in_db = self.session.execute(query).unique().scalars().all()
results: list[Recipe] = []
# we create a delete statement for each row
# we don't delete the whole query in one statement because postgres doesn't cascade correctly
for recipe_in_db in recipes_in_db:
results.append(self._delete_recipe(recipe_in_db))
try:
self.session.commit()
except Exception as e:
self.session.rollback()
raise e
return results
def update_image(self, slug: str, _: str | None = None) -> int:
entry: RecipeModel = self._query_one(match_value=slug)
entry.image = randint(0, 255)

View file

@ -1,21 +0,0 @@
import requests
from mealie.services.parser_services import crfpp
MODEL_URL = "https://github.com/mealie-recipes/nlp-model/releases/download/v1.0.0/model.crfmodel"
def main():
"""
Install the model into the crfpp directory
"""
r = requests.get(MODEL_URL, stream=True, allow_redirects=True)
with open(crfpp.MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
if __name__ == "__main__":
main()

View file

@ -1 +0,0 @@
from .processor import *

View file

@ -1,69 +0,0 @@
import re
from mealie.services.parser_services.parser_utils import convert_vulgar_fractions_to_regular_fractions
replace_abbreviations = {
"cup": " cup ",
"g": " gram ",
"kg": " kilogram ",
"lb": " pound ",
"ml": " milliliter ",
"oz": " ounce ",
"pint": " pint ",
"qt": " quart ",
"tbsp": " tablespoon ",
"tbs": " tablespoon ", # Order Matters!, 'tsb' must come after 'tbsp' in case of duplicate matches
"tsp": " teaspoon ",
}
def replace_common_abbreviations(string: str) -> str:
for k, v in replace_abbreviations.items():
regex = rf"(?<=\d)\s?({k}\bs?)"
string = re.sub(regex, v, string)
return string
def remove_periods(string: str) -> str:
"""Removes periods not sournded by digets"""
return re.sub(r"(?<!\d)\.(?!\d)", "", string)
def wrap_or_clause(string: str):
"""
Attempts to wrap or clauses in ()
Examples:
'1 tsp. Diamond Crystal or ½ tsp. Morton kosher salt, plus more'
-> '1 teaspoon diamond crystal (or 1/2 teaspoon morton kosher salt), plus more'
"""
# TODO: Needs more adequite testing to be sure this doesn't have side effects.
split_by_or = string.split(" or ")
split_by_comma = split_by_or[1].split(",")
if len(split_by_comma) > 0:
return f"{split_by_or[0]} (or {split_by_comma[0]}),{''.join(split_by_comma[1:])}".strip().removesuffix(",")
return string
def pre_process_string(string: str) -> str:
"""
Series of preprocessing functions to make best use of the CRF++ model. The ideal string looks something like...
{qty} {unit} {food}, {additional}
1 tbs. wine, expensive or other white wine, plus more
"""
string = string.lower()
string = convert_vulgar_fractions_to_regular_fractions(string)
string = remove_periods(string)
string = replace_common_abbreviations(string)
if " or " in string:
string = wrap_or_clause(string)
return string

View file

@ -1,63 +0,0 @@
import os
import subprocess
import tempfile
from fractions import Fraction
from pathlib import Path
from typing import Annotated
from pydantic import BaseModel, Field, field_validator
from pydantic_core.core_schema import ValidationInfo
from mealie.schema._mealie.types import NoneFloat
from . import utils
from .pre_processor import pre_process_string
CWD = Path(__file__).parent
MODEL_PATH = os.getenv("CRF_MODEL_PATH", default=CWD / "model.crfmodel")
class CRFConfidence(BaseModel):
average: float = 0.0
comment: NoneFloat = None
name: NoneFloat = None
unit: NoneFloat = None
qty: Annotated[NoneFloat, Field(validate_default=True)] = None
class CRFIngredient(BaseModel):
input: str = ""
name: str = ""
other: str = ""
qty: Annotated[str, Field(validate_default=True)] = ""
comment: str = ""
unit: str = ""
confidence: CRFConfidence
@field_validator("qty", mode="before")
def validate_qty(cls, qty, info: ValidationInfo):
if qty is not None and qty != "":
return qty
# Check if other contains a fraction
try:
if info.data["other"] is not None and info.data["other"].find("/") != -1:
return str(round(float(Fraction(info.data["other"])), 3))
else:
return "0"
except Exception:
return ""
def _exec_crf_test(input_text):
with tempfile.NamedTemporaryFile(mode="w") as input_file:
input_file.write(utils.export_data(input_text))
input_file.flush()
return subprocess.check_output(["crf_test", "--verbose=1", "--model", MODEL_PATH, input_file.name]).decode(
"utf-8"
)
def convert_list_to_crf_model(list_of_ingrdeint_text: list[str]):
crf_output = _exec_crf_test([pre_process_string(x) for x in list_of_ingrdeint_text])
return [CRFIngredient(**ingredient) for ingredient in utils.import_data(crf_output.split("\n"))]

View file

@ -1,38 +0,0 @@
import re
def clumpFractions(s):
"""
Replaces the whitespace between the integer and fractional part of a quantity
with a dollar sign, so it's interpreted as a single token. The rest of the
string is left alone.
clumpFractions("aaa 1 2/3 bbb")
# => "aaa 1$2/3 bbb"
"""
return re.sub(r"(\d+)\s+(\d)/(\d)", r"\1$\2/\3", s)
def tokenize(s):
"""
Tokenize on parenthesis, punctuation, spaces and American units followed by a slash.
We sometimes give American units and metric units for baking recipes. For example:
* 2 tablespoons/30 mililiters milk or cream
* 2 1/2 cups/300 grams all-purpose flour
The recipe database only allows for one unit, and we want to use the American one.
But we must split the text on "cups/" etc. in order to pick it up.
"""
# handle abbreviation like "100g" by treating it as "100 grams"
s = re.sub(r"(\d+)g", r"\1 grams", s)
s = re.sub(r"(\d+)oz", r"\1 ounces", s)
s = re.sub(r"(\d+)ml", r"\1 milliliters", s, flags=re.IGNORECASE)
# TODO: Replace american_units with list of units from database?
american_units = ["cup", "tablespoon", "teaspoon", "pound", "ounce", "quart", "pint"]
# The following removes slashes following American units and replaces it with a space.
for unit in american_units:
s = s.replace(unit + "/", unit + " ")
s = s.replace(unit + "s/", unit + "s ")
return [token.strip() for token in re.split(r"([,()\s]{1})", clumpFractions(s)) if token and token.strip()]

View file

@ -1,266 +0,0 @@
import re
from statistics import mean
from . import tokenizer
def joinLine(columns):
return "\t".join(columns)
def unclump(s):
"""
Replacess $'s with spaces. The reverse of clumpFractions.
"""
return re.sub(r"\$", " ", s)
def getFeatures(token, index, tokens):
"""
Returns a list of features for a given token.
"""
length = len(tokens)
return [
f"I{index}",
f"L{lengthGroup(length)}",
f"{'Yes' if isCapitalized(token) else 'No'}CAP",
f"{'Yes' if insideParenthesis(token, tokens) else 'No'}PAREN",
]
def singularize(word):
"""
A poor replacement for the pattern.en singularize function, but ok for now.
"""
units = {
"cups": "cup",
"tablespoons": "tablespoon",
"teaspoons": "teaspoon",
"pounds": "pound",
"ounces": "ounce",
"cloves": "clove",
"sprigs": "sprig",
"pinches": "pinch",
"bunches": "bunch",
"slices": "slice",
"grams": "gram",
"heads": "head",
"quarts": "quart",
"stalks": "stalk",
"pints": "pint",
"pieces": "piece",
"sticks": "stick",
"dashes": "dash",
"fillets": "fillet",
"cans": "can",
"ears": "ear",
"packages": "package",
"strips": "strip",
"bulbs": "bulb",
"bottles": "bottle",
}
if word in units.keys():
return units[word]
else:
return word
def isCapitalized(token):
"""
Returns true if a given token starts with a capital letter.
"""
return re.match(r"^[A-Z]", token) is not None
def lengthGroup(actualLength):
"""
Buckets the length of the ingredient into 6 buckets.
"""
for n in [4, 8, 12, 16, 20]:
if actualLength < n:
return str(n)
return "X"
def insideParenthesis(token, tokens):
"""
Returns true if the word is inside parenthesis in the phrase.
"""
if token in ["(", ")"]:
return True
else:
line = " ".join(tokens)
return (
re.match(r".*\(.*" + re.escape(token) + r".*\).*", line) is not None # - invalid dscape sequence
)
def displayIngredient(ingredient):
"""
Format a list of (tag, [tokens]) tuples as an HTML string for display.
displayIngredient([("qty", ["1"]), ("name", ["cat", "pie"])])
# => <span class='qty'>1</span> <span class='name'>cat pie</span>
"""
return "".join(["<span class='{}'>{}</span>".format(tag, " ".join(tokens)) for tag, tokens in ingredient])
# HACK: fix this
def smartJoin(words):
"""
Joins list of words with spaces, but is smart about not adding spaces
before commas.
"""
input = " ".join(words)
# replace " , " with ", "
input = input.replace(" , ", ", ")
# replace " ( " with " ("
input = input.replace("( ", "(")
# replace " ) " with ") "
input = input.replace(" )", ")")
return input
def import_data(lines):
"""
This thing takes the output of CRF++ and turns it into an actual
data structure.
"""
data = [{}]
display = [[]]
prevTag = None
confidence_all = [{}]
#
# iterate lines in the data file, which looks like:
#
# # 0.511035
# 1/2 I1 L12 NoCAP X B-QTY/0.982850
# teaspoon I2 L12 NoCAP X B-UNIT/0.982200
# fresh I3 L12 NoCAP X B-COMMENT/0.716364
# thyme I4 L12 NoCAP X B-NAME/0.816803
# leaves I5 L12 NoCAP X I-NAME/0.960524
# , I6 L12 NoCAP X B-COMMENT/0.772231
# finely I7 L12 NoCAP X I-COMMENT/0.825956
# chopped I8 L12 NoCAP X I-COMMENT/0.893379
#
# # 0.505999
# Black I1 L8 YesCAP X B-NAME/0.765461
# pepper I2 L8 NoCAP X I-NAME/0.756614
# , I3 L8 NoCAP X OTHER/0.798040
# to I4 L8 NoCAP X B-COMMENT/0.683089
# taste I5 L8 NoCAP X I-COMMENT/0.848617
#
# i.e. the output of crf_test -v 1
#
for line in lines:
# blank line starts a new ingredient
if line in ("", "\n"):
data.append({})
display.append([])
prevTag = None
confidence_all.append({})
# ignore comments
elif line[0] == "#":
pass
# otherwise it's a token
# e.g.: potato \t I2 \t L5 \t NoCAP \t B-NAME/0.978253
else:
columns = re.split("\t", line.strip())
token = columns[0].strip()
# unclump fractions
token = unclump(token)
# turn B-NAME/123 back into "name"
tag, confidence = re.split(r"/", columns[-1], maxsplit=1)
tag = re.sub(r"^[BI]\-", "", tag).lower() # - invalid dscape sequence
# ====================
# Confidence Getter
if prevTag != tag:
if confidence_all[-1].get(tag):
confidence_all[-1][tag].append(confidence)
else:
confidence_all[-1][tag] = [confidence]
else:
if confidence_all[-1].get(tag):
confidence_all[-1][tag].append(confidence)
else:
confidence_all[-1][tag] = [confidence]
# ---- DISPLAY ----
# build a structure which groups each token by its tag, so we can
# rebuild the original display name later.
if prevTag != tag:
display[-1].append((tag, [token]))
prevTag = tag
else:
display[-1][-1][1].append(token)
# ^- token
# ^---- tag
# ^-------- ingredient
# ---- DATA ----
# build a dict grouping tokens by their tag
# initialize this attribute if this is the first token of its kind
if tag not in data[-1]:
data[-1][tag] = []
# HACK: If this token is a unit, singularize it so Scoop accepts it.
if tag == "unit":
token = singularize(token)
data[-1][tag].append(token)
# reassemble the output into a list of dicts.
output = [{k: smartJoin(tokens) for k, tokens in ingredient.items()} for ingredient in data if len(ingredient)]
# Preclean Confidence
for i, c in enumerate(confidence_all):
avg_of_all = []
for k, v in c.items():
v = [float(x) for x in v]
avg = round(mean(v), 2)
avg_of_all.append(avg)
confidence_all[i][k] = avg
if avg_of_all:
confidence_all[i]["average"] = round(mean(avg_of_all), 2)
# Add the raw ingredient phrase
for i, _ in enumerate(output):
output[i]["input"] = smartJoin([" ".join(tokens) for _, tokens in display[i]])
output[i]["confidence"] = confidence_all[i]
return output
def export_data(lines):
"""Parse "raw" ingredient lines into CRF-ready output"""
output = []
for line in lines:
line_clean = re.sub("<[^<]+?>", "", line)
tokens = tokenizer.tokenize(line_clean)
for i, token in enumerate(tokens):
features = getFeatures(token, i + 1, tokens)
output.append(joinLine([token, *features]))
output.append("")
return "\n".join(output)

View file

@ -1,12 +1,12 @@
from fractions import Fraction
from ingredient_parser import parse_ingredient
from ingredient_parser.dataclasses import CompositeIngredientAmount, IngredientAmount
from ingredient_parser.dataclasses import ParsedIngredient as IngredientParserParsedIngredient
from pydantic import UUID4
from sqlalchemy.orm import Session
from mealie.core.root_logger import get_logger
from mealie.schema.recipe import RecipeIngredient
from mealie.schema.recipe.recipe_ingredient import (
MAX_INGREDIENT_DENOMINATOR,
CreateIngredientFood,
CreateIngredientUnit,
IngredientConfidence,
@ -14,8 +14,9 @@ from mealie.schema.recipe.recipe_ingredient import (
RegisteredParser,
)
from . import brute, crfpp, openai
from . import brute, openai
from ._base import ABCIngredientParser
from .parser_utils import extract_quantity_from_string
logger = get_logger(__name__)
@ -47,50 +48,110 @@ class BruteForceParser(ABCIngredientParser):
class NLPParser(ABCIngredientParser):
"""
Class for CRFPP ingredient parsers.
Class for Ingredient Parser library
"""
def _crf_to_ingredient(self, crf_model: crfpp.CRFIngredient) -> ParsedIngredient:
ingredient = None
@staticmethod
def _extract_amount(ingredient: IngredientParserParsedIngredient) -> IngredientAmount:
if not (ingredient_amounts := ingredient.amount):
return IngredientAmount(quantity=0, quantity_max=0, unit="", text="", confidence=0, starting_index=-1)
try:
ingredient = RecipeIngredient(
title="",
note=crf_model.comment,
unit=CreateIngredientUnit(name=crf_model.unit),
food=CreateIngredientFood(name=crf_model.name),
disable_amount=False,
quantity=float(
sum(Fraction(s).limit_denominator(MAX_INGREDIENT_DENOMINATOR) for s in crf_model.qty.split())
),
)
except Exception as e:
logger.error(f"Failed to parse ingredient: {crf_model}: {e}")
# TODO: Capture some sort of state for the user to see that an exception occurred
ingredient = RecipeIngredient(
title="",
note=crf_model.input,
)
ingredient_amount = ingredient_amounts[0]
if isinstance(ingredient_amount, CompositeIngredientAmount):
ingredient_amount = ingredient_amount.amounts[0]
return ingredient_amount
@staticmethod
def _extract_quantity(ingredient_amount: IngredientAmount) -> tuple[float, float]:
confidence = ingredient_amount.confidence
if isinstance(ingredient_amount.quantity, str):
return extract_quantity_from_string(ingredient_amount.quantity)[0], confidence
else:
try:
return float(ingredient_amount.quantity), confidence
except ValueError:
return 0, 0
@staticmethod
def _extract_unit(ingredient_amount: IngredientAmount) -> tuple[str, float]:
confidence = ingredient_amount.confidence
unit = str(ingredient_amount.unit) if ingredient_amount.unit else ""
return unit, confidence
@staticmethod
def _extract_food(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
confidence = ingredient.name.confidence if ingredient.name else 0
food = str(ingredient.name.text) if ingredient.name else ""
return food, confidence
@staticmethod
def _extract_note(ingredient: IngredientParserParsedIngredient) -> tuple[str, float]:
confidences: list[float] = []
note_parts: list[str] = []
if ingredient.size:
note_parts.append(ingredient.size.text)
confidences.append(ingredient.size.confidence)
if ingredient.preparation:
note_parts.append(ingredient.preparation.text)
confidences.append(ingredient.preparation.confidence)
if ingredient.comment:
note_parts.append(ingredient.comment.text)
confidences.append(ingredient.comment.confidence)
# average confidence among all note parts
confidence = sum(confidences) / len(confidences) if confidences else 0
note = ", ".join(note_parts)
note = note.replace("(", "").replace(")", "")
return note, confidence
def _convert_ingredient(self, ingredient: IngredientParserParsedIngredient) -> ParsedIngredient:
ingredient_amount = self._extract_amount(ingredient)
qty, qty_conf = self._extract_quantity(ingredient_amount)
unit, unit_conf = self._extract_unit(ingredient_amount)
food, food_conf = self._extract_food(ingredient)
note, note_conf = self._extract_note(ingredient)
# average confidence for components which were parsed
confidences: list[float] = []
if qty:
confidences.append(qty_conf)
if unit:
confidences.append(unit_conf)
if food:
confidences.append(food_conf)
if note:
confidences.append(note_conf)
parsed_ingredient = ParsedIngredient(
input=crf_model.input,
ingredient=ingredient,
input=ingredient.sentence,
confidence=IngredientConfidence(
quantity=crf_model.confidence.qty,
food=crf_model.confidence.name,
**crf_model.confidence.model_dump(),
average=(sum(confidences) / len(confidences)) if confidences else 0,
quantity=qty_conf,
unit=unit_conf,
food=food_conf,
comment=note_conf,
),
ingredient=RecipeIngredient(
title="",
quantity=qty,
unit=CreateIngredientUnit(name=unit) if unit else None,
food=CreateIngredientFood(name=food) if food else None,
disable_amount=False,
note=note,
),
)
return self.find_ingredient_match(parsed_ingredient)
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
crf_models = crfpp.convert_list_to_crf_model(ingredients)
return [self._crf_to_ingredient(crf_model) for crf_model in crf_models]
async def parse_one(self, ingredient_string: str) -> ParsedIngredient:
items = await self.parse([ingredient_string])
return items[0]
parsed_ingredient = parse_ingredient(ingredient_string)
return self._convert_ingredient(parsed_ingredient)
async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]:
return [await self.parse_one(ingredient) for ingredient in ingredients]
__registrar: dict[RegisteredParser, type[ABCIngredientParser]] = {

199
poetry.lock generated
View file

@ -697,6 +697,42 @@ docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.
testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
typing = ["typing-extensions (>=4.12.2)"]
[[package]]
name = "flexcache"
version = "0.3"
description = "Saves and loads to the cache a transformed versions of a source object."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "flexcache-0.3-py3-none-any.whl", hash = "sha256:d43c9fea82336af6e0115e308d9d33a185390b8346a017564611f1466dcd2e32"},
{file = "flexcache-0.3.tar.gz", hash = "sha256:18743bd5a0621bfe2cf8d519e4c3bfdf57a269c15d1ced3fb4b64e0ff4600656"},
]
[package.dependencies]
typing-extensions = "*"
[package.extras]
test = ["pytest", "pytest-cov", "pytest-mpl", "pytest-subtests"]
[[package]]
name = "flexparser"
version = "0.4"
description = "Parsing made fun ... using typing."
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "flexparser-0.4-py3-none-any.whl", hash = "sha256:3738b456192dcb3e15620f324c447721023c0293f6af9955b481e91d00179846"},
{file = "flexparser-0.4.tar.gz", hash = "sha256:266d98905595be2ccc5da964fe0a2c3526fbbffdc45b65b3146d75db992ef6b2"},
]
[package.dependencies]
typing-extensions = "*"
[package.extras]
test = ["pytest", "pytest-cov", "pytest-mpl", "pytest-subtests"]
[[package]]
name = "freezegun"
version = "1.5.1"
@ -737,7 +773,7 @@ description = "Lightweight in-process concurrent programming"
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "python_version < \"3.14\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"
markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""
files = [
{file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"},
{file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"},
@ -993,6 +1029,23 @@ files = [
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
]
[[package]]
name = "ingredient-parser-nlp"
version = "1.3.2"
description = "A Python package to parse structured information from recipe ingredient sentences"
optional = false
python-versions = "<3.14,>=3.10"
groups = ["main"]
files = [
{file = "ingredient_parser_nlp-1.3.2-py3-none-any.whl", hash = "sha256:4e9b18a977e6b93985edd5a2668e5bb4f1dd3c570374316fb7f811a21ca55523"},
{file = "ingredient_parser_nlp-1.3.2.tar.gz", hash = "sha256:12f4d34717364881b828b476bd5b5f8a72c96474883b8cbe94911a39fd71e719"},
]
[package.dependencies]
nltk = ">=3.9.1"
pint = ">=0.24.4"
python-crfsuite = "*"
[[package]]
name = "iniconfig"
version = "2.0.0"
@ -1136,6 +1189,18 @@ files = [
{file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"},
]
[[package]]
name = "joblib"
version = "1.4.2"
description = "Lightweight pipelining with Python functions"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
{file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
]
[[package]]
name = "jstyleson"
version = "0.0.2"
@ -1645,6 +1710,32 @@ files = [
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
]
[[package]]
name = "nltk"
version = "3.9.1"
description = "Natural Language Toolkit"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"},
{file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"},
]
[package.dependencies]
click = "*"
joblib = "*"
regex = ">=2021.8.3"
tqdm = "*"
[package.extras]
all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"]
corenlp = ["requests"]
machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
plot = ["matplotlib"]
tgrep = ["pyparsing"]
twitter = ["twython"]
[[package]]
name = "nodeenv"
version = "1.7.0"
@ -1995,13 +2086,43 @@ docs = ["sphinx (>=4.4)", "sphinx-issues (>=3.0.1)", "sphinx-rtd-theme (>=1.0)"]
tests = ["defusedxml", "numpy", "packaging", "pympler", "pytest"]
tests-min = ["defusedxml", "packaging", "pytest"]
[[package]]
name = "pint"
version = "0.24.4"
description = "Physical quantities module"
optional = false
python-versions = ">=3.9"
groups = ["main"]
files = [
{file = "Pint-0.24.4-py3-none-any.whl", hash = "sha256:aa54926c8772159fcf65f82cc0d34de6768c151b32ad1deb0331291c38fe7659"},
{file = "pint-0.24.4.tar.gz", hash = "sha256:35275439b574837a6cd3020a5a4a73645eb125ce4152a73a2f126bf164b91b80"},
]
[package.dependencies]
flexcache = ">=0.3"
flexparser = ">=0.4"
platformdirs = ">=2.1.0"
typing-extensions = ">=4.0.0"
[package.extras]
babel = ["babel (<=2.8)"]
bench = ["pytest", "pytest-codspeed"]
dask = ["dask"]
mip = ["mip (>=1.13)"]
numpy = ["numpy (>=1.23)"]
pandas = ["pint-pandas (>=0.3)"]
test = ["pytest", "pytest-benchmark", "pytest-cov", "pytest-mpl", "pytest-subtests"]
testbase = ["pytest", "pytest-benchmark", "pytest-cov", "pytest-subtests"]
uncertainties = ["uncertainties (>=3.1.6)"]
xarray = ["xarray"]
[[package]]
name = "platformdirs"
version = "4.3.6"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
optional = false
python-versions = ">=3.8"
groups = ["dev"]
groups = ["main", "dev"]
files = [
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@ -2499,6 +2620,74 @@ pytest = ">=8.2,<9"
docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
[[package]]
name = "python-crfsuite"
version = "0.9.11"
description = "Python binding for CRFsuite"
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "python_crfsuite-0.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f5ed569517e7b1fa3d32cf5d5cbe2fb6c85486195bf5cad03d52072fef7aa8a"},
{file = "python_crfsuite-0.9.11-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aed10ee4334c99173940e88318d312a4f9e70ba653b8ac0e6f3ef816431af811"},
{file = "python_crfsuite-0.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd8cc52f853436bbed580ad6c17e37c3657466fdfa28ddc55efcbba28b92cdf"},
{file = "python_crfsuite-0.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:182fad0415697d5acbe18364333f8255016c8609d570cba78c20d8d71a392f90"},
{file = "python_crfsuite-0.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:05cd988aaa7ac87a54d4bd1d756455f6e3b078f07b4fcbda3bccfd91a784dd20"},
{file = "python_crfsuite-0.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2dead957809b92b7f0fc4c03fc70af9cbcaf35518ff1fd3a3fe2862dd0bb52fa"},
{file = "python_crfsuite-0.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:66f24e5281b8a10091c3a9eef5a85115aea9570bcb9e0c03c738b0eab7070cb5"},
{file = "python_crfsuite-0.9.11-cp310-cp310-win32.whl", hash = "sha256:b5a9492686e3dde5739ea19a3ec37397eb7cff787362e403a411acb6431aaf84"},
{file = "python_crfsuite-0.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:d2c361819ba331c48038f1b231b8863b886205e9decae2fb89f69da44b28d00a"},
{file = "python_crfsuite-0.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4a2f2ff5b6b0b6cf72ee476436f3926ccd0045c97e7703478a025c9badd180c6"},
{file = "python_crfsuite-0.9.11-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83bc133fc2a411144778bb03d56a95f88a4da0386462fb99d32b45428959101f"},
{file = "python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5e52bfe54c1cb94009f1edb9c1dec3fe6d31823c60fafee04d63354c342303"},
{file = "python_crfsuite-0.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a23a96dc9a25a0d143430236158ca0d836b94a26d5752ffdf7efe315c14045f5"},
{file = "python_crfsuite-0.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dd95a8ab9d92ac6756c17dde8150d7edcc696e49b4ca5f537e347143d19c94bc"},
{file = "python_crfsuite-0.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:800fd345f2eb822d574eeaa6099bb88a23942272f62ea3e182e8ec07f4cf5ca8"},
{file = "python_crfsuite-0.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c17dc2c5ac63d10993afbab0288bb1949e4ac856361c83e8041fff4493d7dab"},
{file = "python_crfsuite-0.9.11-cp311-cp311-win32.whl", hash = "sha256:9a00f1f32203d9cb66658df75ee62ce4809b24f26b982b7f482934a683abc96c"},
{file = "python_crfsuite-0.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:d255f02c890628337c970d76cba787afb7991340b3a7b201d3a158add5f78989"},
{file = "python_crfsuite-0.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:346a37d1ffa9f161d56c523d2386eaa5026c663e70f65db4478adb292d7c047c"},
{file = "python_crfsuite-0.9.11-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec40a7924d2e79a06f8eb0cec613ade54d677b73c4041c6052cd890aca2db89"},
{file = "python_crfsuite-0.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c5b3836e8ee8d684fb9d76d287035db51039b30cadac3332664655acf970831"},
{file = "python_crfsuite-0.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f498cb82686dc18f7cecaf0a7ebceb4590ee2137cfa8cfe1b75f53514d0e956"},
{file = "python_crfsuite-0.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:29cdf55c54c388c62148ba310bf8ad1b93b352d62dd84856d15c421dae2e902d"},
{file = "python_crfsuite-0.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7e6738ed044ba91d8284716f87525ca95bc857ece0b226910a80126a8ce6ad06"},
{file = "python_crfsuite-0.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1a365a70e54dbd20a9251a3b6df91e1406cab1b1b5995a9d68e8c748fc9b3af7"},
{file = "python_crfsuite-0.9.11-cp312-cp312-win32.whl", hash = "sha256:4b230ab1b69c6025e4f64e72c445f7492cccf00d94fc2c0bf2f337fafc05d5d5"},
{file = "python_crfsuite-0.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:c89d7ad4ca520a5f045c676865ec09a2accc25dc5dce387f2199e5b2c9d8f337"},
{file = "python_crfsuite-0.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89b45426f28b39dfc4789d29bcd7398f177746e4ab27f6ae3c7b48a082ecb73b"},
{file = "python_crfsuite-0.9.11-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:788b6ca5fd43797f6822bb7aed8d5b0255d7d53be62746c77ca91dad5dfd2f2b"},
{file = "python_crfsuite-0.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:609ce1e2ea1ff36379e91a4af9f10bcaaca0b22d089ec7489181ae0d9d098419"},
{file = "python_crfsuite-0.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:893af206342196e37c84af73941d7c2498e3ab926a67f846f78de6f48a7cb067"},
{file = "python_crfsuite-0.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a387c4c4794ecccc712e01091b2887fc90b63dbc6612947232c2593116545e8a"},
{file = "python_crfsuite-0.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:00db049cc46f716cef6626fbcf5b8abc258f4740e39dcceccc706ba77200992b"},
{file = "python_crfsuite-0.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c0f95fd723e7a684188c541106f301a1d87104a07acd1e5687df849d2a86391a"},
{file = "python_crfsuite-0.9.11-cp313-cp313-win32.whl", hash = "sha256:5664cebdc82d20b374641f2d0e77a86e8b010fafaf8efeb8862c3fc567d41c08"},
{file = "python_crfsuite-0.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:00123f42dca02897aaa1fc129ea99b815f800c2893ffb210d8b8f71235ffeef4"},
{file = "python_crfsuite-0.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bb02962c16e3c84bb056ed86f2227b3d0432995c047acb7eb15032c1b645044c"},
{file = "python_crfsuite-0.9.11-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5cc941f1e22cd52e1965cd353b67edfbae06dc5ceb6556bf3176d8523113f66"},
{file = "python_crfsuite-0.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8b3ceefc199b46e562a8bfaac9ef71f86108f0435e28f40007da48618f53837"},
{file = "python_crfsuite-0.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b0c244c0ac04f1213576d28743dae133ca3ff2ebba98b3c4abda3327f37ed23"},
{file = "python_crfsuite-0.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8919fec4638133b3e95afe1496b5b771bb8464741bd467534cc1414ae7f0efc6"},
{file = "python_crfsuite-0.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:993705405b979047a9c66141f4ef886635278f244b5371c25db94751f4b7d326"},
{file = "python_crfsuite-0.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:48fb8b11ae294a3f5986dc4ae9a20047d850e1dc20dae3725c3a9d0c70e14418"},
{file = "python_crfsuite-0.9.11-cp38-cp38-win32.whl", hash = "sha256:f8df18614e5c6c3c95d3e20a7968f75201693a0cc1284d893f7bbc04a392f8e3"},
{file = "python_crfsuite-0.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:01a0078292fff9e171ab9f4cabc67cbd2c629647b8fc67187c1335520a7a45fa"},
{file = "python_crfsuite-0.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0e1fad868fe15cb5bca7c0015995bd962de2f0b100e3e5b7dd3c14273fdc806"},
{file = "python_crfsuite-0.9.11-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcb60d6ac04e6f7e64f02aceaea88b6ad4ffdc183c5301f7fd8b8a280c3efc8e"},
{file = "python_crfsuite-0.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e6e9a3439c503884d6bb4311f9e7bb34cd4c5e83da28f8c8abcfa34332b2f7"},
{file = "python_crfsuite-0.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3064a4902b18c8a0916e48db4f94bc323e9390b96ae41098674ceb36f107acee"},
{file = "python_crfsuite-0.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:cac7a8bb6f629dc42408f3df45a892010321ba539a30cecc54bdea8f05580003"},
{file = "python_crfsuite-0.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:796b6b84d4af5b848786f05c378a32f08ef6a5c67dd929f9845f0f7217177db8"},
{file = "python_crfsuite-0.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:92ebc0f4291b6beae87eb6b9999c3381db5299852f7bdd88cdfca62d759630db"},
{file = "python_crfsuite-0.9.11-cp39-cp39-win32.whl", hash = "sha256:d6b4705cd7657efa8fc7742b09783537595944d18c0708e362252c2a9cd2a58d"},
{file = "python_crfsuite-0.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:c7aeec4be4056b0c6dd4a1357707c8d5b9c88b3f74e51d2f4d407692cad4877f"},
{file = "python_crfsuite-0.9.11.tar.gz", hash = "sha256:6eff965ca70567396d822c9a35ea74b0f7edb27d9471524997bdabe7a6da5f5a"},
]
[package.extras]
dev = ["black", "flake8", "isort", "tox"]
[[package]]
name = "python-dateutil"
version = "2.9.0"
@ -2806,7 +2995,7 @@ version = "2022.10.31"
description = "Alternative regular expression module, to replace re."
optional = false
python-versions = ">=3.6"
groups = ["dev"]
groups = ["main", "dev"]
files = [
{file = "regex-2022.10.31-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a8ff454ef0bb061e37df03557afda9d785c905dab15584860f982e88be73015f"},
{file = "regex-2022.10.31-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1eba476b1b242620c266edf6325b443a2e22b633217a9835a52d8da2b5c051f9"},
@ -3579,5 +3768,5 @@ pgsql = ["psycopg2-binary"]
[metadata]
lock-version = "2.1"
python-versions = "^3.12"
content-hash = "3442bd32ecbf82e5d49975511c3b01b0baa877712d9c786b12cfb5dfdda0c08f"
python-versions = ">=3.12,<3.13"
content-hash = "9a19a1b0f75cce3df8c69bcb8b41da14f12fc127e0d26e1a8fb0de2776666448"

View file

@ -31,7 +31,7 @@ orjson = "^3.8.0"
psycopg2-binary = { version = "^2.9.1", optional = true }
pydantic = "^2.6.1"
pyhumps = "^3.5.3"
python = "^3.12"
python = ">=3.12,<3.13"
python-dateutil = "^2.8.2"
python-dotenv = "^1.0.0"
python-ldap = "^3.3.1"
@ -54,6 +54,7 @@ pyjwt = "^2.8.0"
openai = "^1.63.0"
typing-extensions = "^4.12.2"
itsdangerous = "^2.2.0"
ingredient-parser-nlp = "^1.3.2"
[tool.poetry.group.postgres.dependencies]
psycopg2-binary = { version = "^2.9.1" }

View file

@ -2,10 +2,29 @@ import pytest
from fastapi.testclient import TestClient
from mealie.schema.recipe.recipe_ingredient import RegisteredParser
from tests.unit_tests.test_ingredient_parser import TestIngredient, crf_exists, test_ingredients
from tests.unit_tests.test_ingredient_parser import TestIngredient
from tests.utils import api_routes
from tests.utils.fixture_schemas import TestUser
nlp_test_ingredients = [
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
TestIngredient("1½ teaspoons ground black pepper", 1.5, "teaspoon", "ground black pepper", ""),
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "unsweetened flaked coconut", ""),
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
TestIngredient("1 1/2 cups chopped onion ", 1.5, "cup", "onion", "chopped"),
TestIngredient(
"2 pounds russet potatoes, peeled, and cut into 3/4-inch cubes ",
2,
"pound",
"russet potatoes",
"peeled, and cut into 3/4 inch cubes",
),
TestIngredient("2 tablespoons (30ml) vegetable oil ", 2, "tablespoon", "vegetable oil", ""),
TestIngredient("2 teaspoons salt (to taste) ", 2, "teaspoon", "salt", "to taste"),
]
def assert_ingredient(api_response: dict, test_ingredient: TestIngredient):
assert api_response["ingredient"]["quantity"] == pytest.approx(test_ingredient.quantity)
@ -14,8 +33,7 @@ def assert_ingredient(api_response: dict, test_ingredient: TestIngredient):
assert api_response["ingredient"]["note"] == test_ingredient.comments
@pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed")
@pytest.mark.parametrize("test_ingredient", test_ingredients)
@pytest.mark.parametrize("test_ingredient", nlp_test_ingredients)
def test_recipe_ingredient_parser_nlp(api_client: TestClient, test_ingredient: TestIngredient, unique_user: TestUser):
payload = {"parser": RegisteredParser.nlp, "ingredient": test_ingredient.input}
response = api_client.post(api_routes.parser_ingredient, json=payload, headers=unique_user.token)
@ -23,13 +41,12 @@ def test_recipe_ingredient_parser_nlp(api_client: TestClient, test_ingredient: T
assert_ingredient(response.json(), test_ingredient)
@pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed")
def test_recipe_ingredients_parser_nlp(api_client: TestClient, unique_user: TestUser):
payload = {"parser": RegisteredParser.nlp, "ingredients": [x.input for x in test_ingredients]}
payload = {"parser": RegisteredParser.nlp, "ingredients": [x.input for x in nlp_test_ingredients]}
response = api_client.post(api_routes.parser_ingredients, json=payload, headers=unique_user.token)
assert response.status_code == 200
for api_ingredient, test_ingredient in zip(response.json(), test_ingredients, strict=False):
for api_ingredient, test_ingredient in zip(response.json(), nlp_test_ingredients, strict=False):
assert_ingredient(api_ingredient, test_ingredient)

View file

@ -1,8 +1,6 @@
import asyncio
import json
import shutil
from dataclasses import dataclass
from fractions import Fraction
import pytest
from pydantic import UUID4
@ -27,10 +25,6 @@ from mealie.schema.recipe.recipe_ingredient import (
from mealie.schema.user.user import GroupBase
from mealie.services.openai import OpenAIService
from mealie.services.parser_services import RegisteredParser, get_parser
from mealie.services.parser_services.crfpp.processor import (
CRFIngredient,
convert_list_to_crf_model,
)
from tests.utils.factories import random_int, random_string
@ -43,10 +37,6 @@ class TestIngredient:
comments: str
def crf_exists() -> bool:
return shutil.which("crf_test") is not None
def build_parsed_ing(food: str | None, unit: str | None) -> ParsedIngredient:
ing = RecipeIngredient(unit=None, food=None)
if food:
@ -134,32 +124,6 @@ def parsed_ingredient_data(
return foods, units
# TODO - add more robust test cases
test_ingredients = [
TestIngredient("½ cup all-purpose flour", 0.5, "cup", "all-purpose flour", ""),
TestIngredient("1½ teaspoons ground black pepper", 1.5, "teaspoon", "black pepper", "ground"),
TestIngredient("⅔ cup unsweetened flaked coconut", 0.667, "cup", "coconut", "unsweetened flaked"),
TestIngredient("⅓ cup panko bread crumbs", 0.333, "cup", "panko bread crumbs", ""),
# Small Fraction Tests - PR #1369
# Reported error is was for 1/8 - new lowest expected threshold is 1/32
TestIngredient("1/8 cup all-purpose flour", 0.125, "cup", "all-purpose flour", ""),
TestIngredient("1/32 cup all-purpose flour", 0.031, "cup", "all-purpose flour", ""),
]
@pytest.mark.skipif(not crf_exists(), reason="CRF++ not installed")
def test_nlp_parser() -> None:
models: list[CRFIngredient] = convert_list_to_crf_model([x.input for x in test_ingredients])
# Iterate over models and test_ingredients to gather
for model, test_ingredient in zip(models, test_ingredients, strict=False):
assert round(float(sum(Fraction(s) for s in model.qty.split())), 3) == pytest.approx(test_ingredient.quantity)
assert model.comment == test_ingredient.comments
assert model.name == test_ingredient.food
assert model.unit == test_ingredient.unit
@pytest.mark.parametrize(
"input, quantity, unit, food, comment",
[