Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo
from ._parser import parse, parser, parserinfo, ParserError
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning
@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser',
'ParserError',
'UnknownTimezoneWarning']

View file

@ -20,11 +20,11 @@ value falls back to the end of the month.
Additional resources about date/time string formats can be found below:
- `A summary of the international standard date and time notation
<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
- `CPAN ParseDate module
<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
- `Java SimpleDateFormat Class
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
"""
@ -40,7 +40,7 @@ from calendar import monthrange
from io import StringIO
import six
from six import binary_type, integer_types, text_type
from six import integer_types, text_type
from decimal import Decimal
@ -49,7 +49,7 @@ from warnings import warn
from .. import relativedelta
from .. import tz
__all__ = ["parse", "parserinfo"]
__all__ = ["parse", "parserinfo", "ParserError"]
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
@ -60,14 +60,8 @@ class _timelex(object):
_split_decimal = re.compile("([.,])")
def __init__(self, instream):
if six.PY2:
# In Python 2, we can't duck type properly because unicode has
# a 'decode' function, and we'd be double-decoding
if isinstance(instream, (binary_type, bytearray)):
instream = instream.decode()
else:
if getattr(instream, 'decode', None) is not None:
instream = instream.decode()
if isinstance(instream, (bytes, bytearray)):
instream = instream.decode()
if isinstance(instream, text_type):
instream = StringIO(instream)
@ -291,7 +285,7 @@ class parserinfo(object):
("s", "second", "seconds")]
AMPM = [("am", "a"),
("pm", "p")]
UTCZONE = ["UTC", "GMT", "Z"]
UTCZONE = ["UTC", "GMT", "Z", "z"]
PERTAIN = ["of"]
TZOFFSET = {}
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
@ -388,7 +382,8 @@ class parserinfo(object):
if res.year is not None:
res.year = self.convertyear(res.year, res.century_specified)
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
if ((res.tzoffset == 0 and not res.tzname) or
(res.tzname == 'Z' or res.tzname == 'z')):
res.tzname = "UTC"
res.tzoffset = 0
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
@ -422,7 +417,7 @@ class _ymd(list):
elif not self.has_month:
return 1 <= value <= 31
elif not self.has_year:
# Be permissive, assume leapyear
# Be permissive, assume leap year
month = self[self.mstridx]
return 1 <= value <= monthrange(2000, month)[1]
else:
@ -538,7 +533,7 @@ class _ymd(list):
year, month, day = self
else:
# 01-Jan-01
# Give precendence to day-first, since
# Give precedence to day-first, since
# two-digit years is usually hand-written.
day, month, year = self
@ -625,7 +620,7 @@ class parser(object):
first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens.
:raises ValueError:
:raises ParserError:
Raised for invalid or unknown string format, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date
would be created.
@ -645,12 +640,15 @@ class parser(object):
res, skipped_tokens = self._parse(timestr, **kwargs)
if res is None:
raise ValueError("Unknown string format:", timestr)
raise ParserError("Unknown string format: %s", timestr)
if len(res) == 0:
raise ValueError("String does not contain a date:", timestr)
raise ParserError("String does not contain a date: %s", timestr)
ret = self._build_naive(res, default)
try:
ret = self._build_naive(res, default)
except ValueError as e:
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
if not ignoretz:
ret = self._build_tzaware(ret, res, tzinfos)
@ -1021,7 +1019,7 @@ class parser(object):
hms_idx = idx + 2
elif idx > 0 and info.hms(tokens[idx-1]) is not None:
# There is a "h", "m", or "s" preceeding this token. Since neither
# There is a "h", "m", or "s" preceding this token. Since neither
# of the previous cases was hit, there is no label following this
# token, so we use the previous label.
# e.g. the "04" in "12h04"
@ -1060,7 +1058,8 @@ class parser(object):
tzname is None and
tzoffset is None and
len(token) <= 5 and
all(x in string.ascii_uppercase for x in token))
(all(x in string.ascii_uppercase for x in token)
or token in self.info.UTCZONE))
def _ampm_valid(self, hour, ampm, fuzzy):
"""
@ -1100,7 +1099,7 @@ class parser(object):
def _parse_min_sec(self, value):
# TODO: Every usage of this function sets res.second to the return
# value. Are there any cases where second will be returned as None and
# we *dont* want to set res.second = None?
# we *don't* want to set res.second = None?
minute = int(value)
second = None
@ -1109,14 +1108,6 @@ class parser(object):
second = int(60 * sec_remainder)
return (minute, second)
def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _parse_hms(self, idx, tokens, info, hms_idx):
# TODO: Is this going to admit a lot of false-positives for when we
# just happen to have digits and "h", "m" or "s" characters in non-date
@ -1135,21 +1126,35 @@ class parser(object):
return (new_idx, hms)
def _recombine_skipped(self, tokens, skipped_idxs):
"""
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
>>> skipped_idxs = [0, 1, 2, 5]
>>> _recombine_skipped(tokens, skipped_idxs)
["foo bar", "baz"]
"""
skipped_tokens = []
for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else:
skipped_tokens.append(tokens[idx])
# ------------------------------------------------------------------
# Handling for individual tokens. These are kept as methods instead
# of functions for the sake of customizability via subclassing.
return skipped_tokens
def _parsems(self, value):
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
if "." not in value:
return int(value), 0
else:
i, f = value.split(".")
return int(i), int(f.ljust(6, "0")[:6])
def _to_decimal(self, val):
try:
decimal_value = Decimal(val)
# See GH 662, edge case, infinite value should not be converted
# via `_to_decimal`
if not decimal_value.is_finite():
raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
else:
return decimal_value
# ------------------------------------------------------------------
# Post-Parsing construction of datetime output. These are kept as
# methods instead of functions for the sake of customizability via
# subclassing.
def _build_tzinfo(self, tzinfos, tzname, tzoffset):
if callable(tzinfos):
@ -1164,6 +1169,9 @@ class parser(object):
tzinfo = tz.tzstr(tzdata)
elif isinstance(tzdata, integer_types):
tzinfo = tz.tzoffset(tzname, tzdata)
else:
raise TypeError("Offset must be tzinfo subclass, tz string, "
"or int offset.")
return tzinfo
def _build_tzaware(self, naive, res, tzinfos):
@ -1181,10 +1189,10 @@ class parser(object):
# This is mostly relevant for winter GMT zones parsed in the UK
if (aware.tzname() != res.tzname and
res.tzname in self.info.UTCZONE):
aware = aware.replace(tzinfo=tz.tzutc())
aware = aware.replace(tzinfo=tz.UTC)
elif res.tzoffset == 0:
aware = naive.replace(tzinfo=tz.tzutc())
aware = naive.replace(tzinfo=tz.UTC)
elif res.tzoffset:
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
@ -1239,17 +1247,21 @@ class parser(object):
return dt
def _to_decimal(self, val):
try:
decimal_value = Decimal(val)
# See GH 662, edge case, infinite value should not be converted via `_to_decimal`
if not decimal_value.is_finite():
raise ValueError("Converted decimal value is infinite or NaN")
except Exception as e:
msg = "Could not convert %s to decimal" % val
six.raise_from(ValueError(msg), e)
else:
return decimal_value
def _recombine_skipped(self, tokens, skipped_idxs):
"""
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
>>> skipped_idxs = [0, 1, 2, 5]
>>> _recombine_skipped(tokens, skipped_idxs)
["foo bar", "baz"]
"""
skipped_tokens = []
for i, idx in enumerate(sorted(skipped_idxs)):
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
else:
skipped_tokens.append(tokens[idx])
return skipped_tokens
DEFAULTPARSER = parser()
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
first element being a :class:`datetime.datetime` object, the second
a tuple containing the fuzzy tokens.
:raises ValueError:
Raised for invalid or unknown string format, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date
would be created.
:raises ParserError:
Raised for invalid or unknown string formats, if the provided
:class:`tzinfo` is not in a valid format, or if an invalid date would
be created.
:raises OverflowError:
Raised if the parsed date exceeds the largest valid C integer on
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
def _parsetz(tzstr):
return DEFAULTTZPARSER.parse(tzstr)
class ParserError(ValueError):
"""Exception subclass used for any failure to parse a datetime string.
This is a subclass of :py:exc:`ValueError`, and should be raised any time
earlier versions of ``dateutil`` would have raised ``ValueError``.
.. versionadded:: 2.8.1
"""
def __str__(self):
try:
return self.args[0] % self.args[1:]
except (TypeError, IndexError):
return super(ParserError, self).__str__()
def __repr__(self):
args = ", ".join("'%s'" % arg for arg in self.args)
return "%s(%s)" % (self.__class__.__name__, args)
class UnknownTimezoneWarning(RuntimeWarning):
"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
"""Raised when the parser finds a timezone it cannot parse into a tzinfo.
.. versionadded:: 2.7.0
"""
# vim:ts=4:sw=4:et

View file

@ -88,10 +88,12 @@ class isoparser(object):
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation.
00:00 and 24:00 as a representation. The decimal separator can be
either a dot or a comma.
.. caution::
@ -137,6 +139,10 @@ class isoparser(object):
else:
raise ValueError('String contains unknown ISO components')
if len(components) > 3 and components[3] == 24:
components[3] = 0
return datetime(*components) + timedelta(days=1)
return datetime(*components)
@_takes_ascii
@ -153,7 +159,7 @@ class isoparser(object):
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr))
'components: {!r}'.format(datestr.decode('ascii')))
return date(*components)
@_takes_ascii
@ -167,7 +173,10 @@ class isoparser(object):
:return:
Returns a :class:`datetime.time` object
"""
return time(*self._parse_isotime(timestr))
components = self._parse_isotime(timestr)
if components[0] == 24:
components[0] = 0
return time(*components)
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
@ -190,10 +199,9 @@ class isoparser(object):
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-'
_TIME_SEP = b':'
_MICRO_SEP = b'.'
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
def _parse_isodate(self, dt_str):
try:
@ -325,39 +333,42 @@ class isoparser(object):
pos = 0
comp = -1
if len(timestr) < 2:
if len_str < 2:
raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
has_sep = False
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Z':
if timestr[pos:pos + 1] in b'-+Zz':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
pos += 1
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3:
# Microsecond
if timestr[pos:pos + 1] != self._MICRO_SEP:
# Fraction of a second
frac = self._FRACTION_REGEX.match(timestr[pos:])
if not frac:
continue
pos += 1
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
us_str = frac.group(1)[:6] # Truncate to microseconds
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str)
pos += len(frac.group())
if pos < len_str:
raise ValueError('Unused components in ISO string')
@ -366,13 +377,12 @@ class isoparser(object):
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z':
return tz.tzutc()
if tzstr == b'Z' or tzstr == b'z':
return tz.UTC
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
@ -391,7 +401,7 @@ class isoparser(object):
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc()
return tz.UTC
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')