mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-08-14 18:47:09 -07:00
Update vendored guessit to 3.1.1
Updates python-dateutil to 2.8.2 Updates rebulk to 2.0.1
This commit is contained in:
parent
ebc9718117
commit
2226a74ef8
66 changed files with 2995 additions and 1306 deletions
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from ._parser import parse, parser, parserinfo
|
||||
from ._parser import parse, parser, parserinfo, ParserError
|
||||
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
|
||||
from ._parser import UnknownTimezoneWarning
|
||||
|
||||
|
@ -9,6 +9,7 @@ from .isoparser import isoparser, isoparse
|
|||
|
||||
__all__ = ['parse', 'parser', 'parserinfo',
|
||||
'isoparse', 'isoparser',
|
||||
'ParserError',
|
||||
'UnknownTimezoneWarning']
|
||||
|
||||
|
||||
|
|
|
@ -20,11 +20,11 @@ value falls back to the end of the month.
|
|||
Additional resources about date/time string formats can be found below:
|
||||
|
||||
- `A summary of the international standard date and time notation
|
||||
<http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
|
||||
- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
|
||||
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
|
||||
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_
|
||||
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
|
||||
- `CPAN ParseDate module
|
||||
<http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
|
||||
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
|
||||
- `Java SimpleDateFormat Class
|
||||
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
|
||||
"""
|
||||
|
@ -40,7 +40,7 @@ from calendar import monthrange
|
|||
from io import StringIO
|
||||
|
||||
import six
|
||||
from six import binary_type, integer_types, text_type
|
||||
from six import integer_types, text_type
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
|
@ -49,7 +49,7 @@ from warnings import warn
|
|||
from .. import relativedelta
|
||||
from .. import tz
|
||||
|
||||
__all__ = ["parse", "parserinfo"]
|
||||
__all__ = ["parse", "parserinfo", "ParserError"]
|
||||
|
||||
|
||||
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
|
||||
|
@ -60,14 +60,8 @@ class _timelex(object):
|
|||
_split_decimal = re.compile("([.,])")
|
||||
|
||||
def __init__(self, instream):
|
||||
if six.PY2:
|
||||
# In Python 2, we can't duck type properly because unicode has
|
||||
# a 'decode' function, and we'd be double-decoding
|
||||
if isinstance(instream, (binary_type, bytearray)):
|
||||
instream = instream.decode()
|
||||
else:
|
||||
if getattr(instream, 'decode', None) is not None:
|
||||
instream = instream.decode()
|
||||
if isinstance(instream, (bytes, bytearray)):
|
||||
instream = instream.decode()
|
||||
|
||||
if isinstance(instream, text_type):
|
||||
instream = StringIO(instream)
|
||||
|
@ -291,7 +285,7 @@ class parserinfo(object):
|
|||
("s", "second", "seconds")]
|
||||
AMPM = [("am", "a"),
|
||||
("pm", "p")]
|
||||
UTCZONE = ["UTC", "GMT", "Z"]
|
||||
UTCZONE = ["UTC", "GMT", "Z", "z"]
|
||||
PERTAIN = ["of"]
|
||||
TZOFFSET = {}
|
||||
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
|
||||
|
@ -388,7 +382,8 @@ class parserinfo(object):
|
|||
if res.year is not None:
|
||||
res.year = self.convertyear(res.year, res.century_specified)
|
||||
|
||||
if res.tzoffset == 0 and not res.tzname or res.tzname == 'Z':
|
||||
if ((res.tzoffset == 0 and not res.tzname) or
|
||||
(res.tzname == 'Z' or res.tzname == 'z')):
|
||||
res.tzname = "UTC"
|
||||
res.tzoffset = 0
|
||||
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
|
||||
|
@ -422,7 +417,7 @@ class _ymd(list):
|
|||
elif not self.has_month:
|
||||
return 1 <= value <= 31
|
||||
elif not self.has_year:
|
||||
# Be permissive, assume leapyear
|
||||
# Be permissive, assume leap year
|
||||
month = self[self.mstridx]
|
||||
return 1 <= value <= monthrange(2000, month)[1]
|
||||
else:
|
||||
|
@ -538,7 +533,7 @@ class _ymd(list):
|
|||
year, month, day = self
|
||||
else:
|
||||
# 01-Jan-01
|
||||
# Give precendence to day-first, since
|
||||
# Give precedence to day-first, since
|
||||
# two-digit years is usually hand-written.
|
||||
day, month, year = self
|
||||
|
||||
|
@ -625,7 +620,7 @@ class parser(object):
|
|||
first element being a :class:`datetime.datetime` object, the second
|
||||
a tuple containing the fuzzy tokens.
|
||||
|
||||
:raises ValueError:
|
||||
:raises ParserError:
|
||||
Raised for invalid or unknown string format, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date
|
||||
would be created.
|
||||
|
@ -645,12 +640,15 @@ class parser(object):
|
|||
res, skipped_tokens = self._parse(timestr, **kwargs)
|
||||
|
||||
if res is None:
|
||||
raise ValueError("Unknown string format:", timestr)
|
||||
raise ParserError("Unknown string format: %s", timestr)
|
||||
|
||||
if len(res) == 0:
|
||||
raise ValueError("String does not contain a date:", timestr)
|
||||
raise ParserError("String does not contain a date: %s", timestr)
|
||||
|
||||
ret = self._build_naive(res, default)
|
||||
try:
|
||||
ret = self._build_naive(res, default)
|
||||
except ValueError as e:
|
||||
six.raise_from(ParserError(str(e) + ": %s", timestr), e)
|
||||
|
||||
if not ignoretz:
|
||||
ret = self._build_tzaware(ret, res, tzinfos)
|
||||
|
@ -1021,7 +1019,7 @@ class parser(object):
|
|||
hms_idx = idx + 2
|
||||
|
||||
elif idx > 0 and info.hms(tokens[idx-1]) is not None:
|
||||
# There is a "h", "m", or "s" preceeding this token. Since neither
|
||||
# There is a "h", "m", or "s" preceding this token. Since neither
|
||||
# of the previous cases was hit, there is no label following this
|
||||
# token, so we use the previous label.
|
||||
# e.g. the "04" in "12h04"
|
||||
|
@ -1060,7 +1058,8 @@ class parser(object):
|
|||
tzname is None and
|
||||
tzoffset is None and
|
||||
len(token) <= 5 and
|
||||
all(x in string.ascii_uppercase for x in token))
|
||||
(all(x in string.ascii_uppercase for x in token)
|
||||
or token in self.info.UTCZONE))
|
||||
|
||||
def _ampm_valid(self, hour, ampm, fuzzy):
|
||||
"""
|
||||
|
@ -1100,7 +1099,7 @@ class parser(object):
|
|||
def _parse_min_sec(self, value):
|
||||
# TODO: Every usage of this function sets res.second to the return
|
||||
# value. Are there any cases where second will be returned as None and
|
||||
# we *dont* want to set res.second = None?
|
||||
# we *don't* want to set res.second = None?
|
||||
minute = int(value)
|
||||
second = None
|
||||
|
||||
|
@ -1109,14 +1108,6 @@ class parser(object):
|
|||
second = int(60 * sec_remainder)
|
||||
return (minute, second)
|
||||
|
||||
def _parsems(self, value):
|
||||
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||
if "." not in value:
|
||||
return int(value), 0
|
||||
else:
|
||||
i, f = value.split(".")
|
||||
return int(i), int(f.ljust(6, "0")[:6])
|
||||
|
||||
def _parse_hms(self, idx, tokens, info, hms_idx):
|
||||
# TODO: Is this going to admit a lot of false-positives for when we
|
||||
# just happen to have digits and "h", "m" or "s" characters in non-date
|
||||
|
@ -1135,21 +1126,35 @@ class parser(object):
|
|||
|
||||
return (new_idx, hms)
|
||||
|
||||
def _recombine_skipped(self, tokens, skipped_idxs):
|
||||
"""
|
||||
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
|
||||
>>> skipped_idxs = [0, 1, 2, 5]
|
||||
>>> _recombine_skipped(tokens, skipped_idxs)
|
||||
["foo bar", "baz"]
|
||||
"""
|
||||
skipped_tokens = []
|
||||
for i, idx in enumerate(sorted(skipped_idxs)):
|
||||
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
|
||||
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
|
||||
else:
|
||||
skipped_tokens.append(tokens[idx])
|
||||
# ------------------------------------------------------------------
|
||||
# Handling for individual tokens. These are kept as methods instead
|
||||
# of functions for the sake of customizability via subclassing.
|
||||
|
||||
return skipped_tokens
|
||||
def _parsems(self, value):
|
||||
"""Parse a I[.F] seconds value into (seconds, microseconds)."""
|
||||
if "." not in value:
|
||||
return int(value), 0
|
||||
else:
|
||||
i, f = value.split(".")
|
||||
return int(i), int(f.ljust(6, "0")[:6])
|
||||
|
||||
def _to_decimal(self, val):
|
||||
try:
|
||||
decimal_value = Decimal(val)
|
||||
# See GH 662, edge case, infinite value should not be converted
|
||||
# via `_to_decimal`
|
||||
if not decimal_value.is_finite():
|
||||
raise ValueError("Converted decimal value is infinite or NaN")
|
||||
except Exception as e:
|
||||
msg = "Could not convert %s to decimal" % val
|
||||
six.raise_from(ValueError(msg), e)
|
||||
else:
|
||||
return decimal_value
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Post-Parsing construction of datetime output. These are kept as
|
||||
# methods instead of functions for the sake of customizability via
|
||||
# subclassing.
|
||||
|
||||
def _build_tzinfo(self, tzinfos, tzname, tzoffset):
|
||||
if callable(tzinfos):
|
||||
|
@ -1164,6 +1169,9 @@ class parser(object):
|
|||
tzinfo = tz.tzstr(tzdata)
|
||||
elif isinstance(tzdata, integer_types):
|
||||
tzinfo = tz.tzoffset(tzname, tzdata)
|
||||
else:
|
||||
raise TypeError("Offset must be tzinfo subclass, tz string, "
|
||||
"or int offset.")
|
||||
return tzinfo
|
||||
|
||||
def _build_tzaware(self, naive, res, tzinfos):
|
||||
|
@ -1181,10 +1189,10 @@ class parser(object):
|
|||
# This is mostly relevant for winter GMT zones parsed in the UK
|
||||
if (aware.tzname() != res.tzname and
|
||||
res.tzname in self.info.UTCZONE):
|
||||
aware = aware.replace(tzinfo=tz.tzutc())
|
||||
aware = aware.replace(tzinfo=tz.UTC)
|
||||
|
||||
elif res.tzoffset == 0:
|
||||
aware = naive.replace(tzinfo=tz.tzutc())
|
||||
aware = naive.replace(tzinfo=tz.UTC)
|
||||
|
||||
elif res.tzoffset:
|
||||
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
|
||||
|
@ -1239,17 +1247,21 @@ class parser(object):
|
|||
|
||||
return dt
|
||||
|
||||
def _to_decimal(self, val):
|
||||
try:
|
||||
decimal_value = Decimal(val)
|
||||
# See GH 662, edge case, infinite value should not be converted via `_to_decimal`
|
||||
if not decimal_value.is_finite():
|
||||
raise ValueError("Converted decimal value is infinite or NaN")
|
||||
except Exception as e:
|
||||
msg = "Could not convert %s to decimal" % val
|
||||
six.raise_from(ValueError(msg), e)
|
||||
else:
|
||||
return decimal_value
|
||||
def _recombine_skipped(self, tokens, skipped_idxs):
|
||||
"""
|
||||
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
|
||||
>>> skipped_idxs = [0, 1, 2, 5]
|
||||
>>> _recombine_skipped(tokens, skipped_idxs)
|
||||
["foo bar", "baz"]
|
||||
"""
|
||||
skipped_tokens = []
|
||||
for i, idx in enumerate(sorted(skipped_idxs)):
|
||||
if i > 0 and idx - 1 == skipped_idxs[i - 1]:
|
||||
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
|
||||
else:
|
||||
skipped_tokens.append(tokens[idx])
|
||||
|
||||
return skipped_tokens
|
||||
|
||||
|
||||
DEFAULTPARSER = parser()
|
||||
|
@ -1341,10 +1353,10 @@ def parse(timestr, parserinfo=None, **kwargs):
|
|||
first element being a :class:`datetime.datetime` object, the second
|
||||
a tuple containing the fuzzy tokens.
|
||||
|
||||
:raises ValueError:
|
||||
Raised for invalid or unknown string format, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date
|
||||
would be created.
|
||||
:raises ParserError:
|
||||
Raised for invalid or unknown string formats, if the provided
|
||||
:class:`tzinfo` is not in a valid format, or if an invalid date would
|
||||
be created.
|
||||
|
||||
:raises OverflowError:
|
||||
Raised if the parsed date exceeds the largest valid C integer on
|
||||
|
@ -1573,6 +1585,29 @@ DEFAULTTZPARSER = _tzparser()
|
|||
def _parsetz(tzstr):
|
||||
return DEFAULTTZPARSER.parse(tzstr)
|
||||
|
||||
|
||||
class ParserError(ValueError):
|
||||
"""Exception subclass used for any failure to parse a datetime string.
|
||||
|
||||
This is a subclass of :py:exc:`ValueError`, and should be raised any time
|
||||
earlier versions of ``dateutil`` would have raised ``ValueError``.
|
||||
|
||||
.. versionadded:: 2.8.1
|
||||
"""
|
||||
def __str__(self):
|
||||
try:
|
||||
return self.args[0] % self.args[1:]
|
||||
except (TypeError, IndexError):
|
||||
return super(ParserError, self).__str__()
|
||||
|
||||
def __repr__(self):
|
||||
args = ", ".join("'%s'" % arg for arg in self.args)
|
||||
return "%s(%s)" % (self.__class__.__name__, args)
|
||||
|
||||
|
||||
class UnknownTimezoneWarning(RuntimeWarning):
|
||||
"""Raised when the parser finds a timezone it cannot parse into a tzinfo"""
|
||||
"""Raised when the parser finds a timezone it cannot parse into a tzinfo.
|
||||
|
||||
.. versionadded:: 2.7.0
|
||||
"""
|
||||
# vim:ts=4:sw=4:et
|
||||
|
|
|
@ -88,10 +88,12 @@ class isoparser(object):
|
|||
- ``hh``
|
||||
- ``hh:mm`` or ``hhmm``
|
||||
- ``hh:mm:ss`` or ``hhmmss``
|
||||
- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits)
|
||||
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
|
||||
|
||||
Midnight is a special case for `hh`, as the standard supports both
|
||||
00:00 and 24:00 as a representation.
|
||||
00:00 and 24:00 as a representation. The decimal separator can be
|
||||
either a dot or a comma.
|
||||
|
||||
|
||||
.. caution::
|
||||
|
||||
|
@ -137,6 +139,10 @@ class isoparser(object):
|
|||
else:
|
||||
raise ValueError('String contains unknown ISO components')
|
||||
|
||||
if len(components) > 3 and components[3] == 24:
|
||||
components[3] = 0
|
||||
return datetime(*components) + timedelta(days=1)
|
||||
|
||||
return datetime(*components)
|
||||
|
||||
@_takes_ascii
|
||||
|
@ -153,7 +159,7 @@ class isoparser(object):
|
|||
components, pos = self._parse_isodate(datestr)
|
||||
if pos < len(datestr):
|
||||
raise ValueError('String contains unknown ISO ' +
|
||||
'components: {}'.format(datestr))
|
||||
'components: {!r}'.format(datestr.decode('ascii')))
|
||||
return date(*components)
|
||||
|
||||
@_takes_ascii
|
||||
|
@ -167,7 +173,10 @@ class isoparser(object):
|
|||
:return:
|
||||
Returns a :class:`datetime.time` object
|
||||
"""
|
||||
return time(*self._parse_isotime(timestr))
|
||||
components = self._parse_isotime(timestr)
|
||||
if components[0] == 24:
|
||||
components[0] = 0
|
||||
return time(*components)
|
||||
|
||||
@_takes_ascii
|
||||
def parse_tzstr(self, tzstr, zero_as_utc=True):
|
||||
|
@ -190,10 +199,9 @@ class isoparser(object):
|
|||
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
|
||||
|
||||
# Constants
|
||||
_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+')
|
||||
_DATE_SEP = b'-'
|
||||
_TIME_SEP = b':'
|
||||
_MICRO_SEP = b'.'
|
||||
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
|
||||
|
||||
def _parse_isodate(self, dt_str):
|
||||
try:
|
||||
|
@ -325,39 +333,42 @@ class isoparser(object):
|
|||
pos = 0
|
||||
comp = -1
|
||||
|
||||
if len(timestr) < 2:
|
||||
if len_str < 2:
|
||||
raise ValueError('ISO time too short')
|
||||
|
||||
has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP
|
||||
has_sep = False
|
||||
|
||||
while pos < len_str and comp < 5:
|
||||
comp += 1
|
||||
|
||||
if timestr[pos:pos + 1] in b'-+Z':
|
||||
if timestr[pos:pos + 1] in b'-+Zz':
|
||||
# Detect time zone boundary
|
||||
components[-1] = self._parse_tzstr(timestr[pos:])
|
||||
pos = len_str
|
||||
break
|
||||
|
||||
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
|
||||
has_sep = True
|
||||
pos += 1
|
||||
elif comp == 2 and has_sep:
|
||||
if timestr[pos:pos+1] != self._TIME_SEP:
|
||||
raise ValueError('Inconsistent use of colon separator')
|
||||
pos += 1
|
||||
|
||||
if comp < 3:
|
||||
# Hour, minute, second
|
||||
components[comp] = int(timestr[pos:pos + 2])
|
||||
pos += 2
|
||||
if (has_sep and pos < len_str and
|
||||
timestr[pos:pos + 1] == self._TIME_SEP):
|
||||
pos += 1
|
||||
|
||||
if comp == 3:
|
||||
# Microsecond
|
||||
if timestr[pos:pos + 1] != self._MICRO_SEP:
|
||||
# Fraction of a second
|
||||
frac = self._FRACTION_REGEX.match(timestr[pos:])
|
||||
if not frac:
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6],
|
||||
1)[0]
|
||||
|
||||
us_str = frac.group(1)[:6] # Truncate to microseconds
|
||||
components[comp] = int(us_str) * 10**(6 - len(us_str))
|
||||
pos += len(us_str)
|
||||
pos += len(frac.group())
|
||||
|
||||
if pos < len_str:
|
||||
raise ValueError('Unused components in ISO string')
|
||||
|
@ -366,13 +377,12 @@ class isoparser(object):
|
|||
# Standard supports 00:00 and 24:00 as representations of midnight
|
||||
if any(component != 0 for component in components[1:4]):
|
||||
raise ValueError('Hour may only be 24 at 24:00:00.000')
|
||||
components[0] = 0
|
||||
|
||||
return components
|
||||
|
||||
def _parse_tzstr(self, tzstr, zero_as_utc=True):
|
||||
if tzstr == b'Z':
|
||||
return tz.tzutc()
|
||||
if tzstr == b'Z' or tzstr == b'z':
|
||||
return tz.UTC
|
||||
|
||||
if len(tzstr) not in {3, 5, 6}:
|
||||
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
|
||||
|
@ -391,7 +401,7 @@ class isoparser(object):
|
|||
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
|
||||
|
||||
if zero_as_utc and hours == 0 and minutes == 0:
|
||||
return tz.tzutc()
|
||||
return tz.UTC
|
||||
else:
|
||||
if minutes > 59:
|
||||
raise ValueError('Invalid minutes in time zone offset')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue