Update vendored guessit to 3.1.1

Updates python-dateutil to 2.8.2
Updates rebulk to 2.0.1
This commit is contained in:
Labrys of Knossos 2022-11-28 19:44:46 -05:00
commit 2226a74ef8
66 changed files with 2995 additions and 1306 deletions

View file

@ -88,10 +88,12 @@ class isoparser(object):
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation.
00:00 and 24:00 as a representation. The decimal separator can be
either a dot or a comma.
.. caution::
@ -137,6 +139,10 @@ class isoparser(object):
else:
raise ValueError('String contains unknown ISO components')
if len(components) > 3 and components[3] == 24:
components[3] = 0
return datetime(*components) + timedelta(days=1)
return datetime(*components)
@_takes_ascii
@ -153,7 +159,7 @@ class isoparser(object):
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {}'.format(datestr))
'components: {!r}'.format(datestr.decode('ascii')))
return date(*components)
@_takes_ascii
@ -167,7 +173,10 @@ class isoparser(object):
:return:
Returns a :class:`datetime.time` object
"""
return time(*self._parse_isotime(timestr))
components = self._parse_isotime(timestr)
if components[0] == 24:
components[0] = 0
return time(*components)
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
@ -190,10 +199,9 @@ class isoparser(object):
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
_DATE_SEP = b'-'
_TIME_SEP = b':'
_MICRO_SEP = b'.'
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
def _parse_isodate(self, dt_str):
try:
@ -325,39 +333,42 @@ class isoparser(object):
pos = 0
comp = -1
if len(timestr) < 2:
if len_str < 2:
raise ValueError('ISO time too short')
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
has_sep = False
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Z':
if timestr[pos:pos + 1] in b'-+Zz':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
pos += 1
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if (has_sep and pos < len_str and
timestr[pos:pos + 1] == self._TIME_SEP):
pos += 1
if comp == 3:
# Microsecond
if timestr[pos:pos + 1] != self._MICRO_SEP:
# Fraction of a second
frac = self._FRACTION_REGEX.match(timestr[pos:])
if not frac:
continue
pos += 1
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
1)[0]
us_str = frac.group(1)[:6] # Truncate to microseconds
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(us_str)
pos += len(frac.group())
if pos < len_str:
raise ValueError('Unused components in ISO string')
@ -366,13 +377,12 @@ class isoparser(object):
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
components[0] = 0
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z':
return tz.tzutc()
if tzstr == b'Z' or tzstr == b'z':
return tz.UTC
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
@ -391,7 +401,7 @@ class isoparser(object):
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.tzutc()
return tz.UTC
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')