parses datetime properly + clean category - #248

This commit is contained in:
hay-kot 2021-04-01 08:07:33 -08:00
commit 93fd42332c
2 changed files with 104 additions and 4 deletions

View file

@ -1,6 +1,6 @@
import html import html
import re import re
from datetime import datetime from datetime import datetime, timedelta
from typing import List from typing import List
from slugify import slugify from slugify import slugify
@ -10,8 +10,6 @@ class Cleaner:
"""A Namespace for utility function to clean recipe data extracted """A Namespace for utility function to clean recipe data extracted
from a url and returns a dictionary that is ready for import into from a url and returns a dictionary that is ready for import into
the database. Cleaner.clean is the main entrypoint the database. Cleaner.clean is the main entrypoint
""" """
@staticmethod @staticmethod
@ -44,7 +42,7 @@ class Cleaner:
@staticmethod @staticmethod
def category(category: str): def category(category: str):
if isinstance(category, str): if isinstance(category, str) and category != "":
return [category] return [category]
else: else:
return [] return []
@ -144,7 +142,100 @@ class Cleaner:
def time(time_entry): def time(time_entry):
if time_entry is None: if time_entry is None:
return None return None
elif isinstance(time_entry, timedelta):
pretty_print_timedelta(time_entry)
elif isinstance(time_entry, datetime): elif isinstance(time_entry, datetime):
print(time_entry) print(time_entry)
elif isinstance(time_entry, str):
if re.match("PT.*H.*M", time_entry):
time_delta_object = parse_duration(time_entry)
return pretty_print_timedelta(time_delta_object)
else: else:
return str(time_entry) return str(time_entry)
# ! TODO: Cleanup Code Below
def parse_duration(iso_duration):
"""Parses an ISO 8601 duration string into a datetime.timedelta instance.
Args:
iso_duration: an ISO 8601 duration string.
Returns:
a datetime.timedelta instance
"""
m = re.match(r"^P(?:(\d+)Y)?(?:(\d+)M)?(?:(\d+)D)?T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+(?:.\d+)?)S)?$", iso_duration)
if m is None:
raise ValueError("invalid ISO 8601 duration string")
days = 0
hours = 0
minutes = 0
seconds = 0.0
# Years and months are not being utilized here, as there is not enough
# information provided to determine which year and which month.
# Python's time_delta class stores durations as days, seconds and
# microseconds internally, and therefore we'd have to
# convert parsed years and months to specific number of days.
if m[3]:
days = int(m[3])
if m[4]:
hours = int(m[4])
if m[5]:
minutes = int(m[5])
if m[6]:
seconds = float(m[6])
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
def pretty_print_timedelta(t, max_components=None, max_decimal_places=2):
"""
Print a pretty string for a timedelta.
For example datetime.timedelta(days=2, seconds=17280) will be printed as '2 days, 4 hours, 48 minutes'. Setting max_components to e.g. 1 will change this to '2.2 days', where the
number of decimal points can also be set.
"""
time_scales = [
timedelta(days=365),
timedelta(days=1),
timedelta(hours=1),
timedelta(minutes=1),
timedelta(seconds=1),
timedelta(microseconds=1000),
timedelta(microseconds=1),
]
time_scale_names_dict = {
timedelta(days=365): "year",
timedelta(days=1): "day",
timedelta(hours=1): "Hour",
timedelta(minutes=1): "Minute",
timedelta(seconds=1): "Second",
timedelta(microseconds=1000): "millisecond",
timedelta(microseconds=1): "microsecond",
}
count = 0
txt = ""
first = True
for scale in time_scales:
if t >= scale:
count += 1
n = t / scale if count == max_components else int(t / scale)
t -= n * scale
n_txt = str(round(n, max_decimal_places))
if n_txt[-2:] == ".0":
n_txt = n_txt[:-2]
txt += "{}{} {}{}".format(
"" if first else " ",
n_txt,
time_scale_names_dict[scale],
"s" if n > 1 else "",
)
if first:
first = False
if len(txt) == 0:
txt = "none"
return txt

View file

@ -1,5 +1,6 @@
import json import json
import re import re
from datetime import timedelta
import pytest import pytest
from mealie.services.scraper.cleaner import Cleaner from mealie.services.scraper.cleaner import Cleaner
@ -89,3 +90,11 @@ def test_html_with_recipe_data():
assert url_validation_regex.match(recipe_data["image"]) assert url_validation_regex.match(recipe_data["image"])
assert len(recipe_data["recipeIngredient"]) == 13 assert len(recipe_data["recipeIngredient"]) == 13
assert len(recipe_data["recipeInstructions"]) == 4 assert len(recipe_data["recipeInstructions"]) == 4
def test_time_cleaner():
my_time_delta = "PT2H30M"
return_delta = Cleaner.time(my_time_delta)
assert return_delta == "2 Hours 30 Minutes"