Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
2025-08-14 18:47:09 -07:00 · 2022-11-28 19:44:46 -05:00 · 2022-11-28 19:44:46 -05:00 · 2226a74ef8
commit 2226a74ef8
parent ebc9718117
66 changed files with 2995 additions and 1306 deletions
--- a/libs/common/dateutil/parser/init.py
+++ b/libs/common/dateutil/parser/init.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from ._parser import parse, parser, parserinfo
+from ._parser import parse, parser, parserinfo, ParserError
 from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
 from ._parser import UnknownTimezoneWarning

@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse

 __all__ = ['parse', 'parser', 'parserinfo',
           'isoparse', 'isoparser',
+           'ParserError',
           'UnknownTimezoneWarning']


--- a/libs/common/dateutil/parser/_parser.py
+++ b/libs/common/dateutil/parser/_parser.py
@ -20,11 +20,11 @@ value falls back to the end of the month.
 Additional resources about date/time string formats can be found below:

 - `A summary of the international standard date and time notation
-  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
+  <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
+- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
 - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
 - `CPAN ParseDate module
-  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
+  <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
 - `Java SimpleDateFormat Class
  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
 """
@ -40,7 +40,7 @@ from calendar import monthrange
 from io import StringIO

 import six
-from six import binary_type, integer_types, text_type
+from six import integer_types, text_type

 from decimal import Decimal

@ -49,7 +49,7 @@ from warnings import warn
 from .. import relativedelta
 from .. import tz

-__all__ = ["parse", "parserinfo"]
+__all__ = ["parse", "parserinfo", "ParserError"]


 # TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
@ -60,14 +60,8 @@ class _timelex(object):
    _split_decimal = re.compile("([.,])")

    def __init__(self, instream):
-        if six.PY2:
-            # In Python 2, we can't duck type properly because unicode has
-            # a 'decode' function, and we'd be double-decoding
-            if isinstance(instream, (binary_type, bytearray)):
-                instream = instream.decode()
-        else:
-            if getattr(instream, 'decode', None) is not None:
-                instream = instream.decode()
+        if isinstance(instream, (bytes, bytearray)):
+            instream = instream.decode()

        if isinstance(instream, text_type):
            instream = StringIO(instream)
@ -291,7 +285,7 @@ class parserinfo(object):
           ("s", "second", "seconds")]
    AMPM = [("am", "a"),
            ("pm", "p")]
-    UTCZONE = ["UTC", "GMT", "Z"]
+    UTCZONE = ["UTC", "GMT", "Z", "z"]
    PERTAIN = ["of"]
    TZOFFSET = {}
    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@ -388,7 +382,8 @@ class parserinfo(object):
        if res.year is not None:
            res.year = self.convertyear(res.year, res.century_specified)

-        if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
+        if ((res.tzoffset == 0 and not res.tzname) or
+             (res.tzname == 'Z' or res.tzname == 'z')):
            res.tzname = "UTC"
            res.tzoffset = 0
        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@ -422,7 +417,7 @@ class _ymd(list):
        elif not self.has_month:
            return 1 <= value <= 31
        elif not self.has_year:
-            # Be permissive, assume leapyear
+            # Be permissive, assume leap year
            month = self[self.mstridx]
            return 1 <= value <= monthrange(2000, month)[1]
        else:
@ -538,7 +533,7 @@ class _ymd(list):
                    year, month, day = self
                else:
                    # 01-Jan-01
-                    # Give precendence to day-first, since
+                    # Give precedence to day-first, since
                    # two-digit years is usually hand-written.
                    day, month, year = self

@ -625,7 +620,7 @@ class parser(object):
            first element being a :class:`datetime.datetime` object, the second
            a tuple containing the fuzzy tokens.

-        :raises ValueError:
+        :raises ParserError:
            Raised for invalid or unknown string format, if the provided
            :class:`tzinfo` is not in a valid format, or if an invalid date
            would be created.
@ -645,12 +640,15 @@ class parser(object):
        res, skipped_tokens = self._parse(timestr, **kwargs)

        if res is None:
-            raise ValueError("Unknown string format:", timestr)
+            raise ParserError("Unknown string format: %s", timestr)

        if len(res) == 0:
-            raise ValueError("String does not contain a date:", timestr)
+            raise ParserError("String does not contain a date: %s", timestr)

-        ret = self._build_naive(res, default)
+        try:
+            ret = self._build_naive(res, default)
+        except ValueError as e:
+            six.raise_from(ParserError(str(e) + ": %s", timestr), e)

        if not ignoretz:
            ret = self._build_tzaware(ret, res, tzinfos)
@ -1021,7 +1019,7 @@ class parser(object):
            hms_idx = idx + 2

        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
-            # There is a "h", "m", or "s" preceeding this token.  Since neither
+            # There is a "h", "m", or "s" preceding this token.  Since neither
            # of the previous cases was hit, there is no label following this
            # token, so we use the previous label.
            # e.g. the "04" in "12h04"
@ -1060,7 +1058,8 @@ class parser(object):
                tzname is None and
                tzoffset is None and
                len(token) <= 5 and
-                all(x in string.ascii_uppercase for x in token))
+                (all(x in string.ascii_uppercase for x in token)
+                 or token in self.info.UTCZONE))

    def _ampm_valid(self, hour, ampm, fuzzy):
        """
@ -1100,7 +1099,7 @@ class parser(object):
    def _parse_min_sec(self, value):
        # TODO: Every usage of this function sets res.second to the return
        # value. Are there any cases where second will be returned as None and
-        # we *dont* want to set res.second = None?
+        # we *don't* want to set res.second = None?
        minute = int(value)
        second = None

@ -1109,14 +1108,6 @@ class parser(object):
            second = int(60 * sec_remainder)
        return (minute, second)

-    def _parsems(self, value):
-        """Parse a I[.F] seconds value into (seconds, microseconds)."""
-        if "." not in value:
-            return int(value), 0
-        else:
-            i, f = value.split(".")
-            return int(i), int(f.ljust(6, "0")[:6])
-
    def _parse_hms(self, idx, tokens, info, hms_idx):
        # TODO: Is this going to admit a lot of false-positives for when we
        # just happen to have digits and "h", "m" or "s" characters in non-date
@ -1135,21 +1126,35 @@ class parser(object):

        return (new_idx, hms)

-    def _recombine_skipped(self, tokens, skipped_idxs):
-        """
-        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
-        >>> skipped_idxs = [0, 1, 2, 5]
-        >>> _recombine_skipped(tokens, skipped_idxs)
-        ["foo bar", "baz"]
-        """
-        skipped_tokens = []
-        for i, idx in enumerate(sorted(skipped_idxs)):
-            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
-                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
-            else:
-                skipped_tokens.append(tokens[idx])
+    # ------------------------------------------------------------------
+    # Handling for individual tokens.  These are kept as methods instead
+    #  of functions for the sake of customizability via subclassing.

-        return skipped_tokens
+    def _parsems(self, value):
+        """Parse a I[.F] seconds value into (seconds, microseconds)."""
+        if "." not in value:
+            return int(value), 0
+        else:
+            i, f = value.split(".")
+            return int(i), int(f.ljust(6, "0")[:6])
+
+    def _to_decimal(self, val):
+        try:
+            decimal_value = Decimal(val)
+            # See GH 662, edge case, infinite value should not be converted
+            #  via `_to_decimal`
+            if not decimal_value.is_finite():
+                raise ValueError("Converted decimal value is infinite or NaN")
+        except Exception as e:
+            msg = "Could not convert %s to decimal" % val
+            six.raise_from(ValueError(msg), e)
+        else:
+            return decimal_value
+
+    # ------------------------------------------------------------------
+    # Post-Parsing construction of datetime output.  These are kept as
+    #  methods instead of functions for the sake of customizability via
+    #  subclassing.

    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
        if callable(tzinfos):
@ -1164,6 +1169,9 @@ class parser(object):
            tzinfo = tz.tzstr(tzdata)
        elif isinstance(tzdata, integer_types):
            tzinfo = tz.tzoffset(tzname, tzdata)
+        else:
+            raise TypeError("Offset must be tzinfo subclass, tz string, "
+                            "or int offset.")
        return tzinfo

    def _build_tzaware(self, naive, res, tzinfos):
@ -1181,10 +1189,10 @@ class parser(object):
            # This is mostly relevant for winter GMT zones parsed in the UK
            if (aware.tzname() != res.tzname and
                    res.tzname in self.info.UTCZONE):
-                aware = aware.replace(tzinfo=tz.tzutc())
+                aware = aware.replace(tzinfo=tz.UTC)

        elif res.tzoffset == 0:
-            aware = naive.replace(tzinfo=tz.tzutc())
+            aware = naive.replace(tzinfo=tz.UTC)

        elif res.tzoffset:
            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
@ -1239,17 +1247,21 @@ class parser(object):

        return dt

-    def _to_decimal(self, val):
-        try:
-            decimal_value = Decimal(val)
-            # See GH 662, edge case, infinite value should not be converted via `_to_decimal`
-            if not decimal_value.is_finite():
-                raise ValueError("Converted decimal value is infinite or NaN")
-        except Exception as e:
-            msg = "Could not convert %s to decimal" % val
-            six.raise_from(ValueError(msg), e)
-        else:
-            return decimal_value
+    def _recombine_skipped(self, tokens, skipped_idxs):
+        """
+        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
+        >>> skipped_idxs = [0, 1, 2, 5]
+        >>> _recombine_skipped(tokens, skipped_idxs)
+        ["foo bar", "baz"]
+        """
+        skipped_tokens = []
+        for i, idx in enumerate(sorted(skipped_idxs)):
+            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
+                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
+            else:
+                skipped_tokens.append(tokens[idx])
+
+        return skipped_tokens


 DEFAULTPARSER = parser()
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
        first element being a :class:`datetime.datetime` object, the second
        a tuple containing the fuzzy tokens.

-    :raises ValueError:
-        Raised for invalid or unknown string format, if the provided
-        :class:`tzinfo` is not in a valid format, or if an invalid date
-        would be created.
+    :raises ParserError:
+        Raised for invalid or unknown string formats, if the provided
+        :class:`tzinfo` is not in a valid format, or if an invalid date would
+        be created.

    :raises OverflowError:
        Raised if the parsed date exceeds the largest valid C integer on
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
 def _parsetz(tzstr):
    return DEFAULTTZPARSER.parse(tzstr)

+
+class ParserError(ValueError):
+    """Exception subclass used for any failure to parse a datetime string.
+
+    This is a subclass of :py:exc:`ValueError`, and should be raised any time
+    earlier versions of ``dateutil`` would have raised ``ValueError``.
+
+    .. versionadded:: 2.8.1
+    """
+    def __str__(self):
+        try:
+            return self.args[0] % self.args[1:]
+        except (TypeError, IndexError):
+            return super(ParserError, self).__str__()
+
+    def __repr__(self):
+        args = ", ".join("'%s'" % arg for arg in self.args)
+        return "%s(%s)" % (self.__class__.__name__, args)
+
+
 class UnknownTimezoneWarning(RuntimeWarning):
-    """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
+    """Raised when the parser finds a timezone it cannot parse into a tzinfo.
+
+    .. versionadded:: 2.7.0
+    """
 # vim:ts=4:sw=4:et
--- a/libs/common/dateutil/parser/isoparser.py
+++ b/libs/common/dateutil/parser/isoparser.py
@ -88,10 +88,12 @@ class isoparser(object):
        - ``hh``
        - ``hh:mm`` or ``hhmm``
        - ``hh:mm:ss`` or ``hhmmss``
-        - ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
+        - ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)

        Midnight is a special case for `hh`, as the standard supports both
-        00:00 and 24:00 as a representation.
+        00:00 and 24:00 as a representation. The decimal separator can be
+        either a dot or a comma.
+

        .. caution::

@ -137,6 +139,10 @@ class isoparser(object):
            else:
                raise ValueError('String contains unknown ISO components')

+        if len(components) > 3 and components[3] == 24:
+            components[3] = 0
+            return datetime(*components) + timedelta(days=1)
+
        return datetime(*components)

    @_takes_ascii
@ -153,7 +159,7 @@ class isoparser(object):
        components, pos = self._parse_isodate(datestr)
        if pos < len(datestr):
            raise ValueError('String contains unknown ISO ' +
-                             'components: {}'.format(datestr))
+                             'components: {!r}'.format(datestr.decode('ascii')))
        return date(*components)

    @_takes_ascii
@ -167,7 +173,10 @@ class isoparser(object):
        :return:
            Returns a :class:`datetime.time` object
        """
-        return time(*self._parse_isotime(timestr))
+        components = self._parse_isotime(timestr)
+        if components[0] == 24:
+            components[0] = 0
+        return time(*components)

    @_takes_ascii
    def parse_tzstr(self, tzstr, zero_as_utc=True):
@ -190,10 +199,9 @@ class isoparser(object):
        return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)

    # Constants
-    _MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
    _DATE_SEP = b'-'
    _TIME_SEP = b':'
-    _MICRO_SEP = b'.'
+    _FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')

    def _parse_isodate(self, dt_str):
        try:
@ -325,39 +333,42 @@ class isoparser(object):
        pos = 0
        comp = -1

-        if len(timestr) < 2:
+        if len_str < 2:
            raise ValueError('ISO time too short')

-        has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
+        has_sep = False

        while pos < len_str and comp < 5:
            comp += 1

-            if timestr[pos:pos + 1] in b'-+Z':
+            if timestr[pos:pos + 1] in b'-+Zz':
                # Detect time zone boundary
                components[-1] = self._parse_tzstr(timestr[pos:])
                pos = len_str
                break

+            if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
+                has_sep = True
+                pos += 1
+            elif comp == 2 and has_sep:
+                if timestr[pos:pos+1] != self._TIME_SEP:
+                    raise ValueError('Inconsistent use of colon separator')
+                pos += 1
+
            if comp < 3:
                # Hour, minute, second
                components[comp] = int(timestr[pos:pos + 2])
                pos += 2
-                if (has_sep and pos < len_str and
-                        timestr[pos:pos + 1] == self._TIME_SEP):
-                    pos += 1

            if comp == 3:
-                # Microsecond
-                if timestr[pos:pos + 1] != self._MICRO_SEP:
+                # Fraction of a second
+                frac = self._FRACTION_REGEX.match(timestr[pos:])
+                if not frac:
                    continue

-                pos += 1
-                us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
-                                                           1)[0]
-
+                us_str = frac.group(1)[:6]  # Truncate to microseconds
                components[comp] = int(us_str) * 10**(6 - len(us_str))
-                pos += len(us_str)
+                pos += len(frac.group())

        if pos < len_str:
            raise ValueError('Unused components in ISO string')
@ -366,13 +377,12 @@ class isoparser(object):
            # Standard supports 00:00 and 24:00 as representations of midnight
            if any(component != 0 for component in components[1:4]):
                raise ValueError('Hour may only be 24 at 24:00:00.000')
-            components[0] = 0

        return components

    def _parse_tzstr(self, tzstr, zero_as_utc=True):
-        if tzstr == b'Z':
-            return tz.tzutc()
+        if tzstr == b'Z' or tzstr == b'z':
+            return tz.UTC

        if len(tzstr) not in {3, 5, 6}:
            raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
@ -391,7 +401,7 @@ class isoparser(object):
            minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])

        if zero_as_utc and hours == 0 and minutes == 0:
-            return tz.tzutc()
+            return tz.UTC
        else:
            if minutes > 59:
                raise ValueError('Invalid minutes in time zone offset')