Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
2025-08-20 13:23:18 -07:00 · 2022-11-28 19:44:46 -05:00 · 2022-11-28 19:44:46 -05:00 · 2226a74ef8
commit 2226a74ef8
parent ebc9718117
66 changed files with 2995 additions and 1306 deletions
--- a/libs/common/dateutil/parser/_parser.py
+++ b/libs/common/dateutil/parser/_parser.py
@ -20,11 +20,11 @@ value falls back to the end of the month.
 Additional resources about date/time string formats can be found below:

 - `A summary of the international standard date and time notation
-  <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
+  <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
+- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
 - `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
 - `CPAN ParseDate module
-  <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
+  <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
 - `Java SimpleDateFormat Class
  <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
 """
@ -40,7 +40,7 @@ from calendar import monthrange
 from io import StringIO

 import six
-from six import binary_type, integer_types, text_type
+from six import integer_types, text_type

 from decimal import Decimal

@ -49,7 +49,7 @@ from warnings import warn
 from .. import relativedelta
 from .. import tz

-__all__ = ["parse", "parserinfo"]
+__all__ = ["parse", "parserinfo", "ParserError"]


 # TODO: pandas.core.tools.datetimes imports this explicitly.  Might be worth
@ -60,14 +60,8 @@ class _timelex(object):
    _split_decimal = re.compile("([.,])")

    def __init__(self, instream):
-        if six.PY2:
-            # In Python 2, we can't duck type properly because unicode has
-            # a 'decode' function, and we'd be double-decoding
-            if isinstance(instream, (binary_type, bytearray)):
-                instream = instream.decode()
-        else:
-            if getattr(instream, 'decode', None) is not None:
-                instream = instream.decode()
+        if isinstance(instream, (bytes, bytearray)):
+            instream = instream.decode()

        if isinstance(instream, text_type):
            instream = StringIO(instream)
@ -291,7 +285,7 @@ class parserinfo(object):
           ("s", "second", "seconds")]
    AMPM = [("am", "a"),
            ("pm", "p")]
-    UTCZONE = ["UTC", "GMT", "Z"]
+    UTCZONE = ["UTC", "GMT", "Z", "z"]
    PERTAIN = ["of"]
    TZOFFSET = {}
    # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@ -388,7 +382,8 @@ class parserinfo(object):
        if res.year is not None:
            res.year = self.convertyear(res.year, res.century_specified)

-        if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
+        if ((res.tzoffset == 0 and not res.tzname) or
+             (res.tzname == 'Z' or res.tzname == 'z')):
            res.tzname = "UTC"
            res.tzoffset = 0
        elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@ -422,7 +417,7 @@ class _ymd(list):
        elif not self.has_month:
            return 1 <= value <= 31
        elif not self.has_year:
-            # Be permissive, assume leapyear
+            # Be permissive, assume leap year
            month = self[self.mstridx]
            return 1 <= value <= monthrange(2000, month)[1]
        else:
@ -538,7 +533,7 @@ class _ymd(list):
                    year, month, day = self
                else:
                    # 01-Jan-01
-                    # Give precendence to day-first, since
+                    # Give precedence to day-first, since
                    # two-digit years is usually hand-written.
                    day, month, year = self

@ -625,7 +620,7 @@ class parser(object):
            first element being a :class:`datetime.datetime` object, the second
            a tuple containing the fuzzy tokens.

-        :raises ValueError:
+        :raises ParserError:
            Raised for invalid or unknown string format, if the provided
            :class:`tzinfo` is not in a valid format, or if an invalid date
            would be created.
@ -645,12 +640,15 @@ class parser(object):
        res, skipped_tokens = self._parse(timestr, **kwargs)

        if res is None:
-            raise ValueError("Unknown string format:", timestr)
+            raise ParserError("Unknown string format: %s", timestr)

        if len(res) == 0:
-            raise ValueError("String does not contain a date:", timestr)
+            raise ParserError("String does not contain a date: %s", timestr)

-        ret = self._build_naive(res, default)
+        try:
+            ret = self._build_naive(res, default)
+        except ValueError as e:
+            six.raise_from(ParserError(str(e) + ": %s", timestr), e)

        if not ignoretz:
            ret = self._build_tzaware(ret, res, tzinfos)
@ -1021,7 +1019,7 @@ class parser(object):
            hms_idx = idx + 2

        elif idx > 0 and info.hms(tokens[idx-1]) is not None:
-            # There is a "h", "m", or "s" preceeding this token.  Since neither
+            # There is a "h", "m", or "s" preceding this token.  Since neither
            # of the previous cases was hit, there is no label following this
            # token, so we use the previous label.
            # e.g. the "04" in "12h04"
@ -1060,7 +1058,8 @@ class parser(object):
                tzname is None and
                tzoffset is None and
                len(token) <= 5 and
-                all(x in string.ascii_uppercase for x in token))
+                (all(x in string.ascii_uppercase for x in token)
+                 or token in self.info.UTCZONE))

    def _ampm_valid(self, hour, ampm, fuzzy):
        """
@ -1100,7 +1099,7 @@ class parser(object):
    def _parse_min_sec(self, value):
        # TODO: Every usage of this function sets res.second to the return
        # value. Are there any cases where second will be returned as None and
-        # we *dont* want to set res.second = None?
+        # we *don't* want to set res.second = None?
        minute = int(value)
        second = None

@ -1109,14 +1108,6 @@ class parser(object):
            second = int(60 * sec_remainder)
        return (minute, second)

-    def _parsems(self, value):
-        """Parse a I[.F] seconds value into (seconds, microseconds)."""
-        if "." not in value:
-            return int(value), 0
-        else:
-            i, f = value.split(".")
-            return int(i), int(f.ljust(6, "0")[:6])
-
    def _parse_hms(self, idx, tokens, info, hms_idx):
        # TODO: Is this going to admit a lot of false-positives for when we
        # just happen to have digits and "h", "m" or "s" characters in non-date
@ -1135,21 +1126,35 @@ class parser(object):

        return (new_idx, hms)

-    def _recombine_skipped(self, tokens, skipped_idxs):
-        """
-        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
-        >>> skipped_idxs = [0, 1, 2, 5]
-        >>> _recombine_skipped(tokens, skipped_idxs)
-        ["foo bar", "baz"]
-        """
-        skipped_tokens = []
-        for i, idx in enumerate(sorted(skipped_idxs)):
-            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
-                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
-            else:
-                skipped_tokens.append(tokens[idx])
+    # ------------------------------------------------------------------
+    # Handling for individual tokens.  These are kept as methods instead
+    #  of functions for the sake of customizability via subclassing.

-        return skipped_tokens
+    def _parsems(self, value):
+        """Parse a I[.F] seconds value into (seconds, microseconds)."""
+        if "." not in value:
+            return int(value), 0
+        else:
+            i, f = value.split(".")
+            return int(i), int(f.ljust(6, "0")[:6])
+
+    def _to_decimal(self, val):
+        try:
+            decimal_value = Decimal(val)
+            # See GH 662, edge case, infinite value should not be converted
+            #  via `_to_decimal`
+            if not decimal_value.is_finite():
+                raise ValueError("Converted decimal value is infinite or NaN")
+        except Exception as e:
+            msg = "Could not convert %s to decimal" % val
+            six.raise_from(ValueError(msg), e)
+        else:
+            return decimal_value
+
+    # ------------------------------------------------------------------
+    # Post-Parsing construction of datetime output.  These are kept as
+    #  methods instead of functions for the sake of customizability via
+    #  subclassing.

    def _build_tzinfo(self, tzinfos, tzname, tzoffset):
        if callable(tzinfos):
@ -1164,6 +1169,9 @@ class parser(object):
            tzinfo = tz.tzstr(tzdata)
        elif isinstance(tzdata, integer_types):
            tzinfo = tz.tzoffset(tzname, tzdata)
+        else:
+            raise TypeError("Offset must be tzinfo subclass, tz string, "
+                            "or int offset.")
        return tzinfo

    def _build_tzaware(self, naive, res, tzinfos):
@ -1181,10 +1189,10 @@ class parser(object):
            # This is mostly relevant for winter GMT zones parsed in the UK
            if (aware.tzname() != res.tzname and
                    res.tzname in self.info.UTCZONE):
-                aware = aware.replace(tzinfo=tz.tzutc())
+                aware = aware.replace(tzinfo=tz.UTC)

        elif res.tzoffset == 0:
-            aware = naive.replace(tzinfo=tz.tzutc())
+            aware = naive.replace(tzinfo=tz.UTC)

        elif res.tzoffset:
            aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
@ -1239,17 +1247,21 @@ class parser(object):

        return dt

-    def _to_decimal(self, val):
-        try:
-            decimal_value = Decimal(val)
-            # See GH 662, edge case, infinite value should not be converted via `_to_decimal`
-            if not decimal_value.is_finite():
-                raise ValueError("Converted decimal value is infinite or NaN")
-        except Exception as e:
-            msg = "Could not convert %s to decimal" % val
-            six.raise_from(ValueError(msg), e)
-        else:
-            return decimal_value
+    def _recombine_skipped(self, tokens, skipped_idxs):
+        """
+        >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
+        >>> skipped_idxs = [0, 1, 2, 5]
+        >>> _recombine_skipped(tokens, skipped_idxs)
+        ["foo bar", "baz"]
+        """
+        skipped_tokens = []
+        for i, idx in enumerate(sorted(skipped_idxs)):
+            if i > 0 and idx - 1 == skipped_idxs[i - 1]:
+                skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
+            else:
+                skipped_tokens.append(tokens[idx])
+
+        return skipped_tokens


 DEFAULTPARSER = parser()
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
        first element being a :class:`datetime.datetime` object, the second
        a tuple containing the fuzzy tokens.

-    :raises ValueError:
-        Raised for invalid or unknown string format, if the provided
-        :class:`tzinfo` is not in a valid format, or if an invalid date
-        would be created.
+    :raises ParserError:
+        Raised for invalid or unknown string formats, if the provided
+        :class:`tzinfo` is not in a valid format, or if an invalid date would
+        be created.

    :raises OverflowError:
        Raised if the parsed date exceeds the largest valid C integer on
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
 def _parsetz(tzstr):
    return DEFAULTTZPARSER.parse(tzstr)

+
+class ParserError(ValueError):
+    """Exception subclass used for any failure to parse a datetime string.
+
+    This is a subclass of :py:exc:`ValueError`, and should be raised any time
+    earlier versions of ``dateutil`` would have raised ``ValueError``.
+
+    .. versionadded:: 2.8.1
+    """
+    def __str__(self):
+        try:
+            return self.args[0] % self.args[1:]
+        except (TypeError, IndexError):
+            return super(ParserError, self).__str__()
+
+    def __repr__(self):
+        args = ", ".join("'%s'" % arg for arg in self.args)
+        return "%s(%s)" % (self.__class__.__name__, args)
+
+
 class UnknownTimezoneWarning(RuntimeWarning):
-    """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
+    """Raised when the parser finds a timezone it cannot parse into a tzinfo.
+
+    .. versionadded:: 2.7.0
+    """
 # vim:ts=4:sw=4:et