From b12aea8272516a0b3749ab743c724d8aa58017e1 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Fri, 28 Feb 2025 08:17:28 -0600 Subject: [PATCH] feat: Migrate from CRF++ to Ingredient Parser (a Python package) (#5061) --- .dockerignore | 2 - .gitignore | 2 - Taskfile.yml | 15 - docker/Dockerfile | 17 -- frontend/lang/messages/en-US.json | 1 - .../_groupSlug/r/_slug/ingredient-parser.vue | 7 - mealie/repos/repository_generic.py | 10 +- mealie/repos/repository_recipes.py | 47 +++- mealie/scripts/install_model.py | 21 -- .../parser_services/crfpp/__init__.py | 1 - .../parser_services/crfpp/pre_processor.py | 69 ----- .../parser_services/crfpp/processor.py | 63 ----- .../parser_services/crfpp/tokenizer.py | 38 --- .../services/parser_services/crfpp/utils.py | 266 ------------------ .../parser_services/ingredient_parser.py | 133 ++++++--- poetry.lock | 199 ++++++++++++- pyproject.toml | 3 +- .../test_recipe_ingredient_parser.py | 29 +- tests/unit_tests/test_ingredient_parser.py | 36 --- 19 files changed, 367 insertions(+), 592 deletions(-) delete mode 100644 mealie/scripts/install_model.py delete mode 100644 mealie/services/parser_services/crfpp/__init__.py delete mode 100644 mealie/services/parser_services/crfpp/pre_processor.py delete mode 100644 mealie/services/parser_services/crfpp/processor.py delete mode 100644 mealie/services/parser_services/crfpp/tokenizer.py delete mode 100644 mealie/services/parser_services/crfpp/utils.py diff --git a/.dockerignore b/.dockerignore index f88a2a24b..602e1dd8e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -31,6 +31,4 @@ venv */mealie/.temp /mealie/frontend/ -model.crfmodel - crowdin.yml diff --git a/.gitignore b/.gitignore index 3dbf0a6bd..cd0725b3a 100644 --- a/.gitignore +++ b/.gitignore @@ -157,10 +157,8 @@ dev/data/backups/dev_sample_data*.zip dev/data/recipes/* dev/scripts/output/app_routes.py dev/scripts/output/javascriptAPI/* -mealie/services/scraper/ingredient_nlp/model.crfmodel dev/code-generation/generated/openapi.json dev/code-generation/generated/test_routes.py -mealie/services/parser_services/crfpp/model.crfmodel lcov.info dev/code-generation/openapi.json diff --git a/Taskfile.yml b/Taskfile.yml index 8c017a286..c42cb76c7 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -61,26 +61,11 @@ tasks: - pyproject.toml - .pre-commit-config.yaml - setup:model: - desc: setup nlp model - vars: - MODEL_URL: https://github.com/mealie-recipes/nlp-model/releases/download/v1.0.0/model.crfmodel - OUTPUT: ./mealie/services/parser_services/crfpp/model.crfmodel - sources: - # using pyproject.toml as the dependency since this should only ever need to run once - # during setup. There is perhaps a better way to do this. - - ./pyproject.toml - generates: - - ./mealie/services/parser_services/crfpp/model.crfmodel - cmds: - - curl -L0 {{ .MODEL_URL }} --output {{ .OUTPUT }} - setup: desc: setup all dependencies deps: - setup:ui - setup:py - - setup:model dev:generate: desc: run code generators diff --git a/docker/Dockerfile b/docker/Dockerfile index f08261129..92d9c48fe 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -116,13 +116,6 @@ COPY --from=packages * /dist/ RUN . $VENV_PATH/bin/activate \ && pip install --require-hashes -r /dist/requirements.txt --find-links /dist -############################################### -# CRFPP Image -############################################### -FROM hkotel/crfpp as crfpp - -RUN echo "crfpp-container" - ############################################### # Production Image ############################################### @@ -145,19 +138,9 @@ RUN apt-get update \ # create directory used for Docker Secrets RUN mkdir -p /run/secrets -# copy CRF++ and add it to the library path -ENV LD_LIBRARY_PATH=/usr/local/lib -COPY --from=crfpp /usr/local/lib/ /usr/local/lib -COPY --from=crfpp /usr/local/bin/crf_learn /usr/local/bin/crf_learn -COPY --from=crfpp /usr/local/bin/crf_test /usr/local/bin/crf_test - # Copy venv into image. It contains a fully-installed mealie backend and frontend. COPY --from=venv-builder $VENV_PATH $VENV_PATH - -# Grab CRF++ Model Release -RUN python -m mealie.scripts.install_model - VOLUME [ "$MEALIE_HOME/data/" ] ENV APP_PORT=9000 diff --git a/frontend/lang/messages/en-US.json b/frontend/lang/messages/en-US.json index 33e8e74c1..fb25f51f3 100644 --- a/frontend/lang/messages/en-US.json +++ b/frontend/lang/messages/en-US.json @@ -646,7 +646,6 @@ "nextStep": "Next step", "recipe-actions": "Recipe Actions", "parser": { - "experimental-alert-text": "Mealie uses natural language processing to parse and create units and food items for your recipe ingredients. This feature is experimental and may not always work as expected. If you prefer not to use the parsed results, you can select 'Cancel' and your changes will not be saved.", "ingredient-parser": "Ingredient Parser", "explanation": "To use the ingredient parser, click the 'Parse All' button to start the process. Once the processed ingredients are available, you can review the items and verify that they were parsed correctly. The model's confidence score is displayed on the right of the item title. This score is an average of all the individual scores and may not always be completely accurate.", "alerts-explainer": "Alerts will be displayed if a matching foods or unit is found but does not exists in the database.", diff --git a/frontend/pages/g/_groupSlug/r/_slug/ingredient-parser.vue b/frontend/pages/g/_groupSlug/r/_slug/ingredient-parser.vue index 0c978d9f0..a4ef0fdf1 100644 --- a/frontend/pages/g/_groupSlug/r/_slug/ingredient-parser.vue +++ b/frontend/pages/g/_groupSlug/r/_slug/ingredient-parser.vue @@ -1,13 +1,6 @@