Update arrow-1.2.0

This commit is contained in:
JonnyWong16 2021-10-14 20:43:42 -07:00
parent 3b645cf6c3
commit 9a54fb9a44
No known key found for this signature in database
GPG key ID: B1F1F9807184697A
11 changed files with 7793 additions and 2322 deletions

View file

@ -1,8 +1,39 @@
# -*- coding: utf-8 -*- from ._version import __version__
from .api import get, now, utcnow
from .arrow import Arrow from .arrow import Arrow
from .factory import ArrowFactory from .factory import ArrowFactory
from .api import get, now, utcnow from .formatter import (
FORMAT_ATOM,
FORMAT_COOKIE,
FORMAT_RFC822,
FORMAT_RFC850,
FORMAT_RFC1036,
FORMAT_RFC1123,
FORMAT_RFC2822,
FORMAT_RFC3339,
FORMAT_RSS,
FORMAT_W3C,
)
from .parser import ParserError
__version__ = '0.10.0' # https://mypy.readthedocs.io/en/stable/command_line.html#cmdoption-mypy-no-implicit-reexport
VERSION = __version__ # Mypy with --strict or --no-implicit-reexport requires an explicit reexport.
__all__ = [
"__version__",
"get",
"now",
"utcnow",
"Arrow",
"ArrowFactory",
"FORMAT_ATOM",
"FORMAT_COOKIE",
"FORMAT_RFC822",
"FORMAT_RFC850",
"FORMAT_RFC1036",
"FORMAT_RFC1123",
"FORMAT_RFC2822",
"FORMAT_RFC3339",
"FORMAT_RSS",
"FORMAT_W3C",
"ParserError",
]

1
lib/arrow/_version.py Normal file
View file

@ -0,0 +1 @@
__version__ = "1.2.0"

View file

@ -1,55 +1,126 @@
# -*- coding: utf-8 -*- """
'''
Provides the default implementation of :class:`ArrowFactory <arrow.factory.ArrowFactory>` Provides the default implementation of :class:`ArrowFactory <arrow.factory.ArrowFactory>`
methods for use as a module API. methods for use as a module API.
''' """
from __future__ import absolute_import from datetime import date, datetime
from datetime import tzinfo as dt_tzinfo
from time import struct_time
from typing import Any, List, Optional, Tuple, Type, Union, overload
from arrow.arrow import TZ_EXPR, Arrow
from arrow.constants import DEFAULT_LOCALE
from arrow.factory import ArrowFactory from arrow.factory import ArrowFactory
# internal default factory. # internal default factory.
_factory = ArrowFactory() _factory = ArrowFactory()
# TODO: Use Positional Only Argument (https://www.python.org/dev/peps/pep-0570/)
# after Python 3.7 deprecation
def get(*args, **kwargs):
''' Implements the default :class:`ArrowFactory <arrow.factory.ArrowFactory>`
``get`` method.
''' @overload
def get(
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
*args: int,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
__obj: Union[
Arrow,
datetime,
date,
struct_time,
dt_tzinfo,
int,
float,
str,
Tuple[int, int, int],
],
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
__arg1: Union[datetime, date],
__arg2: TZ_EXPR,
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
__arg1: str,
__arg2: Union[str, List[str]],
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
def get(*args: Any, **kwargs: Any) -> Arrow:
"""Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``get`` method."""
return _factory.get(*args, **kwargs) return _factory.get(*args, **kwargs)
def utcnow():
''' Implements the default :class:`ArrowFactory <arrow.factory.ArrowFactory>`
``utcnow`` method.
''' get.__doc__ = _factory.get.__doc__
def utcnow() -> Arrow:
"""Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``utcnow`` method."""
return _factory.utcnow() return _factory.utcnow()
def now(tz=None): utcnow.__doc__ = _factory.utcnow.__doc__
''' Implements the default :class:`ArrowFactory <arrow.factory.ArrowFactory>`
``now`` method.
'''
def now(tz: Optional[TZ_EXPR] = None) -> Arrow:
"""Calls the default :class:`ArrowFactory <arrow.factory.ArrowFactory>` ``now`` method."""
return _factory.now(tz) return _factory.now(tz)
def factory(type): now.__doc__ = _factory.now.__doc__
''' Returns an :class:`.ArrowFactory` for the specified :class:`Arrow <arrow.arrow.Arrow>`
def factory(type: Type[Arrow]) -> ArrowFactory:
"""Returns an :class:`.ArrowFactory` for the specified :class:`Arrow <arrow.arrow.Arrow>`
or derived type. or derived type.
:param type: the type, :class:`Arrow <arrow.arrow.Arrow>` or derived. :param type: the type, :class:`Arrow <arrow.arrow.Arrow>` or derived.
''' """
return ArrowFactory(type) return ArrowFactory(type)
__all__ = ['get', 'utcnow', 'now', 'factory'] __all__ = ["get", "utcnow", "now", "factory"]

File diff suppressed because it is too large Load diff

146
lib/arrow/constants.py Normal file
View file

@ -0,0 +1,146 @@
"""Constants used internally in arrow."""
import sys
from datetime import datetime
if sys.version_info < (3, 8): # pragma: no cover
from typing_extensions import Final
else:
from typing import Final # pragma: no cover
# datetime.max.timestamp() errors on Windows, so we must hardcode
# the highest possible datetime value that can output a timestamp.
# tl;dr platform-independent max timestamps are hard to form
# See: https://stackoverflow.com/q/46133223
try:
# Get max timestamp. Works on POSIX-based systems like Linux and macOS,
# but will trigger an OverflowError, ValueError, or OSError on Windows
_MAX_TIMESTAMP = datetime.max.timestamp()
except (OverflowError, ValueError, OSError): # pragma: no cover
# Fallback for Windows and 32-bit systems if initial max timestamp call fails
# Must get max value of ctime on Windows based on architecture (x32 vs x64)
# https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/ctime-ctime32-ctime64-wctime-wctime32-wctime64
# Note: this may occur on both 32-bit Linux systems (issue #930) along with Windows systems
is_64bits = sys.maxsize > 2 ** 32
_MAX_TIMESTAMP = (
datetime(3000, 1, 1, 23, 59, 59, 999999).timestamp()
if is_64bits
else datetime(2038, 1, 1, 23, 59, 59, 999999).timestamp()
)
MAX_TIMESTAMP: Final[float] = _MAX_TIMESTAMP
MAX_TIMESTAMP_MS: Final[float] = MAX_TIMESTAMP * 1000
MAX_TIMESTAMP_US: Final[float] = MAX_TIMESTAMP * 1_000_000
MAX_ORDINAL: Final[int] = datetime.max.toordinal()
MIN_ORDINAL: Final[int] = 1
DEFAULT_LOCALE: Final[str] = "en-us"
# Supported dehumanize locales
DEHUMANIZE_LOCALES = {
"en",
"en-us",
"en-gb",
"en-au",
"en-be",
"en-jp",
"en-za",
"en-ca",
"en-ph",
"fr",
"fr-fr",
"fr-ca",
"it",
"it-it",
"es",
"es-es",
"el",
"el-gr",
"ja",
"ja-jp",
"se",
"se-fi",
"se-no",
"se-se",
"sv",
"sv-se",
"fi",
"fi-fi",
"zh",
"zh-cn",
"zh-tw",
"zh-hk",
"nl",
"nl-nl",
"af",
"de",
"de-de",
"de-ch",
"de-at",
"nb",
"nb-no",
"nn",
"nn-no",
"pt",
"pt-pt",
"pt-br",
"tl",
"tl-ph",
"vi",
"vi-vn",
"tr",
"tr-tr",
"az",
"az-az",
"da",
"da-dk",
"ml",
"hi",
"fa",
"fa-ir",
"mr",
"ca",
"ca-es",
"ca-ad",
"ca-fr",
"ca-it",
"eo",
"eo-xx",
"bn",
"bn-bd",
"bn-in",
"rm",
"rm-ch",
"ro",
"ro-ro",
"sl",
"sl-si",
"id",
"id-id",
"ne",
"ne-np",
"ee",
"et",
"sw",
"sw-ke",
"sw-tz",
"la",
"la-va",
"lt",
"lt-lt",
"ms",
"ms-my",
"ms-bn",
"or",
"or-in",
"lb",
"lb-lu",
"zu",
"zu-za",
"sq",
"sq-al",
"ta",
"ta-in",
"ta-lk",
}

View file

@ -1,4 +1,3 @@
# -*- coding: utf-8 -*-
""" """
Implements the :class:`ArrowFactory <arrow.factory.ArrowFactory>` class, Implements the :class:`ArrowFactory <arrow.factory.ArrowFactory>` class,
providing factory methods for common :class:`Arrow <arrow.arrow.Arrow>` providing factory methods for common :class:`Arrow <arrow.arrow.Arrow>`
@ -6,31 +5,100 @@ construction scenarios.
""" """
from __future__ import absolute_import
from arrow.arrow import Arrow
from arrow import parser
from arrow.util import is_timestamp, isstr
from datetime import datetime, tzinfo, date
from dateutil import tz as dateutil_tz
from time import struct_time
import calendar import calendar
from datetime import date, datetime
from datetime import tzinfo as dt_tzinfo
from decimal import Decimal
from time import struct_time
from typing import Any, List, Optional, Tuple, Type, Union, overload
from dateutil import tz as dateutil_tz
from arrow import parser
from arrow.arrow import TZ_EXPR, Arrow
from arrow.constants import DEFAULT_LOCALE
from arrow.util import is_timestamp, iso_to_gregorian
class ArrowFactory(object): class ArrowFactory:
''' A factory for generating :class:`Arrow <arrow.arrow.Arrow>` objects. """A factory for generating :class:`Arrow <arrow.arrow.Arrow>` objects.
:param type: (optional) the :class:`Arrow <arrow.arrow.Arrow>`-based class to construct from. :param type: (optional) the :class:`Arrow <arrow.arrow.Arrow>`-based class to construct from.
Defaults to :class:`Arrow <arrow.arrow.Arrow>`. Defaults to :class:`Arrow <arrow.arrow.Arrow>`.
''' """
def __init__(self, type=Arrow): type: Type[Arrow]
def __init__(self, type: Type[Arrow] = Arrow) -> None:
self.type = type self.type = type
def get(self, *args, **kwargs): @overload
''' Returns an :class:`Arrow <arrow.arrow.Arrow>` object based on flexible inputs. def get(
self,
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
self,
__obj: Union[
Arrow,
datetime,
date,
struct_time,
dt_tzinfo,
int,
float,
str,
Tuple[int, int, int],
],
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
self,
__arg1: Union[datetime, date],
__arg2: TZ_EXPR,
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
@overload
def get(
self,
__arg1: str,
__arg2: Union[str, List[str]],
*,
locale: str = DEFAULT_LOCALE,
tzinfo: Optional[TZ_EXPR] = None,
normalize_whitespace: bool = False,
) -> Arrow:
... # pragma: no cover
def get(self, *args: Any, **kwargs: Any) -> Arrow:
"""Returns an :class:`Arrow <arrow.arrow.Arrow>` object based on flexible inputs.
:param locale: (optional) a ``str`` specifying a locale for the parser. Defaults to 'en-us'.
:param tzinfo: (optional) a :ref:`timezone expression <tz-expr>` or tzinfo object.
Replaces the timezone unless using an input form that is explicitly UTC or specifies
the timezone in a positional argument. Defaults to UTC.
:param normalize_whitespace: (optional) a ``bool`` specifying whether or not to normalize
redundant whitespace (spaces, tabs, and newlines) in a datetime string before parsing.
Defaults to false.
Usage:: Usage::
@ -41,18 +109,14 @@ class ArrowFactory(object):
>>> arrow.get() >>> arrow.get()
<Arrow [2013-05-08T05:51:43.316458+00:00]> <Arrow [2013-05-08T05:51:43.316458+00:00]>
**None** to also get current UTC time::
>>> arrow.get(None)
<Arrow [2013-05-08T05:51:43.316458+00:00]>
**One** :class:`Arrow <arrow.arrow.Arrow>` object, to get a copy. **One** :class:`Arrow <arrow.arrow.Arrow>` object, to get a copy.
>>> arw = arrow.utcnow() >>> arw = arrow.utcnow()
>>> arrow.get(arw) >>> arrow.get(arw)
<Arrow [2013-10-23T15:21:54.354846+00:00]> <Arrow [2013-10-23T15:21:54.354846+00:00]>
**One** ``str``, ``float``, or ``int``, convertible to a floating-point timestamp, to get that timestamp in UTC:: **One** ``float`` or ``int``, convertible to a floating-point timestamp, to get
that timestamp in UTC::
>>> arrow.get(1367992474.293378) >>> arrow.get(1367992474.293378)
<Arrow [2013-05-08T05:54:34.293378+00:00]> <Arrow [2013-05-08T05:54:34.293378+00:00]>
@ -60,18 +124,17 @@ class ArrowFactory(object):
>>> arrow.get(1367992474) >>> arrow.get(1367992474)
<Arrow [2013-05-08T05:54:34+00:00]> <Arrow [2013-05-08T05:54:34+00:00]>
>>> arrow.get('1367992474.293378') **One** ISO 8601-formatted ``str``, to parse it::
<Arrow [2013-05-08T05:54:34.293378+00:00]>
>>> arrow.get('1367992474')
<Arrow [2013-05-08T05:54:34+0struct_time0:00]>
**One** ISO-8601-formatted ``str``, to parse it::
>>> arrow.get('2013-09-29T01:26:43.830580') >>> arrow.get('2013-09-29T01:26:43.830580')
<Arrow [2013-09-29T01:26:43.830580+00:00]> <Arrow [2013-09-29T01:26:43.830580+00:00]>
**One** ``tzinfo``, to get the current time in that timezone:: **One** ISO 8601-formatted ``str``, in basic format, to parse it::
>>> arrow.get('20160413T133656.456289')
<Arrow [2016-04-13T13:36:56.456289+00:00]>
**One** ``tzinfo``, to get the current time **converted** to that timezone::
>>> arrow.get(tz.tzlocal()) >>> arrow.get(tz.tzlocal())
<Arrow [2013-05-07T22:57:28.484717-07:00]> <Arrow [2013-05-07T22:57:28.484717-07:00]>
@ -91,85 +154,117 @@ class ArrowFactory(object):
>>> arrow.get(date(2013, 5, 5)) >>> arrow.get(date(2013, 5, 5))
<Arrow [2013-05-05T00:00:00+00:00]> <Arrow [2013-05-05T00:00:00+00:00]>
**Two** arguments, a naive or aware ``datetime``, and a timezone expression (as above):: **One** time.struct time::
>>> arrow.get(gmtime(0))
<Arrow [1970-01-01T00:00:00+00:00]>
**One** iso calendar ``tuple``, to get that week date in UTC::
>>> arrow.get((2013, 18, 7))
<Arrow [2013-05-05T00:00:00+00:00]>
**Two** arguments, a naive or aware ``datetime``, and a replacement
:ref:`timezone expression <tz-expr>`::
>>> arrow.get(datetime(2013, 5, 5), 'US/Pacific') >>> arrow.get(datetime(2013, 5, 5), 'US/Pacific')
<Arrow [2013-05-05T00:00:00-07:00]> <Arrow [2013-05-05T00:00:00-07:00]>
**Two** arguments, a naive ``date``, and a timezone expression (as above):: **Two** arguments, a naive ``date``, and a replacement
:ref:`timezone expression <tz-expr>`::
>>> arrow.get(date(2013, 5, 5), 'US/Pacific') >>> arrow.get(date(2013, 5, 5), 'US/Pacific')
<Arrow [2013-05-05T00:00:00-07:00]> <Arrow [2013-05-05T00:00:00-07:00]>
**Two** arguments, both ``str``, to parse the first according to the format of the second:: **Two** arguments, both ``str``, to parse the first according to the format of the second::
>>> arrow.get('2013-05-05 12:30:45', 'YYYY-MM-DD HH:mm:ss') >>> arrow.get('2013-05-05 12:30:45 America/Chicago', 'YYYY-MM-DD HH:mm:ss ZZZ')
<Arrow [2013-05-05T12:30:45+00:00]> <Arrow [2013-05-05T12:30:45-05:00]>
**Two** arguments, first a ``str`` to parse and second a ``list`` of formats to try:: **Two** arguments, first a ``str`` to parse and second a ``list`` of formats to try::
>>> arrow.get('2013-05-05 12:30:45', ['MM/DD/YYYY', 'YYYY-MM-DD HH:mm:ss']) >>> arrow.get('2013-05-05 12:30:45', ['MM/DD/YYYY', 'YYYY-MM-DD HH:mm:ss'])
<Arrow [2013-05-05T12:30:45+00:00]> <Arrow [2013-05-05T12:30:45+00:00]>
**Three or more** arguments, as for the constructor of a ``datetime``:: **Three or more** arguments, as for the direct constructor of an ``Arrow`` object::
>>> arrow.get(2013, 5, 5, 12, 30, 45) >>> arrow.get(2013, 5, 5, 12, 30, 45)
<Arrow [2013-05-05T12:30:45+00:00]> <Arrow [2013-05-05T12:30:45+00:00]>
**One** time.struct time:: """
>>> arrow.get(gmtime(0))
<Arrow [1970-01-01T00:00:00+00:00]>
'''
arg_count = len(args) arg_count = len(args)
locale = kwargs.get('locale', 'en_us') locale = kwargs.pop("locale", DEFAULT_LOCALE)
tz = kwargs.get('tzinfo', None) tz = kwargs.get("tzinfo", None)
normalize_whitespace = kwargs.pop("normalize_whitespace", False)
# () -> now, @ utc. # if kwargs given, send to constructor unless only tzinfo provided
if len(kwargs) > 1:
arg_count = 3
# tzinfo kwarg is not provided
if len(kwargs) == 1 and tz is None:
arg_count = 3
# () -> now, @ tzinfo or utc
if arg_count == 0: if arg_count == 0:
if isinstance(tz, tzinfo): if isinstance(tz, str):
return self.type.now(tz) tz = parser.TzinfoParser.parse(tz)
return self.type.now(tzinfo=tz)
if isinstance(tz, dt_tzinfo):
return self.type.now(tzinfo=tz)
return self.type.utcnow() return self.type.utcnow()
if arg_count == 1: if arg_count == 1:
arg = args[0] arg = args[0]
if isinstance(arg, Decimal):
arg = float(arg)
# (None) -> now, @ utc. # (None) -> raises an exception
if arg is None: if arg is None:
return self.type.utcnow() raise TypeError("Cannot parse argument of type None.")
# try (int, float, str(int), str(float)) -> utc, from timestamp. # try (int, float) -> from timestamp @ tzinfo
if is_timestamp(arg): elif not isinstance(arg, str) and is_timestamp(arg):
return self.type.utcfromtimestamp(arg) if tz is None:
# set to UTC by default
tz = dateutil_tz.tzutc()
return self.type.fromtimestamp(arg, tzinfo=tz)
# (Arrow) -> from the object's datetime. # (Arrow) -> from the object's datetime @ tzinfo
if isinstance(arg, Arrow): elif isinstance(arg, Arrow):
return self.type.fromdatetime(arg.datetime) return self.type.fromdatetime(arg.datetime, tzinfo=tz)
# (datetime) -> from datetime. # (datetime) -> from datetime @ tzinfo
if isinstance(arg, datetime): elif isinstance(arg, datetime):
return self.type.fromdatetime(arg) return self.type.fromdatetime(arg, tzinfo=tz)
# (date) -> from date. # (date) -> from date @ tzinfo
if isinstance(arg, date): elif isinstance(arg, date):
return self.type.fromdate(arg) return self.type.fromdate(arg, tzinfo=tz)
# (tzinfo) -> now, @ tzinfo. # (tzinfo) -> now @ tzinfo
elif isinstance(arg, tzinfo): elif isinstance(arg, dt_tzinfo):
return self.type.now(arg) return self.type.now(tzinfo=arg)
# (str) -> now, @ tzinfo. # (str) -> parse @ tzinfo
elif isstr(arg): elif isinstance(arg, str):
dt = parser.DateTimeParser(locale).parse_iso(arg) dt = parser.DateTimeParser(locale).parse_iso(arg, normalize_whitespace)
return self.type.fromdatetime(dt) return self.type.fromdatetime(dt, tzinfo=tz)
# (struct_time) -> from struct_time # (struct_time) -> from struct_time
elif isinstance(arg, struct_time): elif isinstance(arg, struct_time):
return self.type.utcfromtimestamp(calendar.timegm(arg)) return self.type.utcfromtimestamp(calendar.timegm(arg))
# (iso calendar) -> convert then from date @ tzinfo
elif isinstance(arg, tuple) and len(arg) == 3:
d = iso_to_gregorian(*arg)
return self.type.fromdate(d, tzinfo=tz)
else: else:
raise TypeError('Can\'t parse single argument type of \'{0}\''.format(type(arg))) raise TypeError(f"Cannot parse single argument of type {type(arg)!r}.")
elif arg_count == 2: elif arg_count == 2:
@ -177,58 +272,57 @@ class ArrowFactory(object):
if isinstance(arg_1, datetime): if isinstance(arg_1, datetime):
# (datetime, tzinfo) -> fromdatetime @ tzinfo/string. # (datetime, tzinfo/str) -> fromdatetime @ tzinfo
if isinstance(arg_2, tzinfo) or isstr(arg_2): if isinstance(arg_2, (dt_tzinfo, str)):
return self.type.fromdatetime(arg_1, arg_2) return self.type.fromdatetime(arg_1, tzinfo=arg_2)
else: else:
raise TypeError('Can\'t parse two arguments of types \'datetime\', \'{0}\''.format( raise TypeError(
type(arg_2))) f"Cannot parse two arguments of types 'datetime', {type(arg_2)!r}."
)
# (date, tzinfo/str) -> fromdate @ tzinfo/string.
elif isinstance(arg_1, date): elif isinstance(arg_1, date):
if isinstance(arg_2, tzinfo) or isstr(arg_2): # (date, tzinfo/str) -> fromdate @ tzinfo
if isinstance(arg_2, (dt_tzinfo, str)):
return self.type.fromdate(arg_1, tzinfo=arg_2) return self.type.fromdate(arg_1, tzinfo=arg_2)
else: else:
raise TypeError('Can\'t parse two arguments of types \'date\', \'{0}\''.format( raise TypeError(
type(arg_2))) f"Cannot parse two arguments of types 'date', {type(arg_2)!r}."
)
# (str, format) -> parse. # (str, format) -> parse @ tzinfo
elif isstr(arg_1) and (isstr(arg_2) or isinstance(arg_2, list)): elif isinstance(arg_1, str) and isinstance(arg_2, (str, list)):
dt = parser.DateTimeParser(locale).parse(args[0], args[1]) dt = parser.DateTimeParser(locale).parse(
args[0], args[1], normalize_whitespace
)
return self.type.fromdatetime(dt, tzinfo=tz) return self.type.fromdatetime(dt, tzinfo=tz)
else: else:
raise TypeError('Can\'t parse two arguments of types \'{0}\', \'{1}\''.format( raise TypeError(
type(arg_1), type(arg_2))) f"Cannot parse two arguments of types {type(arg_1)!r} and {type(arg_2)!r}."
)
# 3+ args -> datetime-like via constructor. # 3+ args -> datetime-like via constructor
else: else:
return self.type(*args, **kwargs) return self.type(*args, **kwargs)
def utcnow(self): def utcnow(self) -> Arrow:
'''Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now" in UTC time. """Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now" in UTC time.
Usage:: Usage::
>>> import arrow >>> import arrow
>>> arrow.utcnow() >>> arrow.utcnow()
<Arrow [2013-05-08T05:19:07.018993+00:00]> <Arrow [2013-05-08T05:19:07.018993+00:00]>
''' """
return self.type.utcnow() return self.type.utcnow()
def now(self, tz=None): def now(self, tz: Optional[TZ_EXPR] = None) -> Arrow:
'''Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now". """Returns an :class:`Arrow <arrow.arrow.Arrow>` object, representing "now" in the given
timezone.
:param tz: (optional) An expression representing a timezone. Defaults to local time. :param tz: (optional) A :ref:`timezone expression <tz-expr>`. Defaults to local time.
Recognized timezone expressions:
- A ``tzinfo`` object.
- A ``str`` describing a timezone, similar to 'US/Pacific', or 'Europe/Berlin'.
- A ``str`` in ISO-8601 style, as in '+07:00'.
- A ``str``, one of the following: 'local', 'utc', 'UTC'.
Usage:: Usage::
@ -244,11 +338,11 @@ class ArrowFactory(object):
>>> arrow.now('local') >>> arrow.now('local')
<Arrow [2013-05-07T22:19:39.130059-07:00]> <Arrow [2013-05-07T22:19:39.130059-07:00]>
''' """
if tz is None: if tz is None:
tz = dateutil_tz.tzlocal() tz = dateutil_tz.tzlocal()
elif not isinstance(tz, tzinfo): elif not isinstance(tz, dt_tzinfo):
tz = parser.TzinfoParser.parse(tz) tz = parser.TzinfoParser.parse(tz)
return self.type.now(tz) return self.type.now(tz)

View file

@ -1,105 +1,152 @@
# -*- coding: utf-8 -*- """Provides the :class:`Arrow <arrow.formatter.DateTimeFormatter>` class, an improved formatter for datetimes."""
from __future__ import absolute_import
import calendar
import re import re
import sys
from datetime import datetime, timedelta
from typing import Optional, Pattern, cast
from dateutil import tz as dateutil_tz from dateutil import tz as dateutil_tz
from arrow import util, locales
from arrow import locales
from arrow.constants import DEFAULT_LOCALE
if sys.version_info < (3, 8): # pragma: no cover
from typing_extensions import Final
else:
from typing import Final # pragma: no cover
class DateTimeFormatter(object): FORMAT_ATOM: Final[str] = "YYYY-MM-DD HH:mm:ssZZ"
FORMAT_COOKIE: Final[str] = "dddd, DD-MMM-YYYY HH:mm:ss ZZZ"
FORMAT_RFC822: Final[str] = "ddd, DD MMM YY HH:mm:ss Z"
FORMAT_RFC850: Final[str] = "dddd, DD-MMM-YY HH:mm:ss ZZZ"
FORMAT_RFC1036: Final[str] = "ddd, DD MMM YY HH:mm:ss Z"
FORMAT_RFC1123: Final[str] = "ddd, DD MMM YYYY HH:mm:ss Z"
FORMAT_RFC2822: Final[str] = "ddd, DD MMM YYYY HH:mm:ss Z"
FORMAT_RFC3339: Final[str] = "YYYY-MM-DD HH:mm:ssZZ"
FORMAT_RSS: Final[str] = "ddd, DD MMM YYYY HH:mm:ss Z"
FORMAT_W3C: Final[str] = "YYYY-MM-DD HH:mm:ssZZ"
_FORMAT_RE = re.compile('(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?dd?d?|HH?|hh?|mm?|ss?|SS?S?S?S?S?|ZZ?|a|A|X)')
def __init__(self, locale='en_us'): class DateTimeFormatter:
# This pattern matches characters enclosed in square brackets are matched as
# an atomic group. For more info on atomic groups and how to they are
# emulated in Python's re library, see https://stackoverflow.com/a/13577411/2701578
_FORMAT_RE: Final[Pattern[str]] = re.compile(
r"(\[(?:(?=(?P<literal>[^]]))(?P=literal))*\]|YYY?Y?|MM?M?M?|Do|DD?D?D?|d?dd?d?|HH?|hh?|mm?|ss?|SS?S?S?S?S?|ZZ?Z?|a|A|X|x|W)"
)
locale: locales.Locale
def __init__(self, locale: str = DEFAULT_LOCALE) -> None:
self.locale = locales.get_locale(locale) self.locale = locales.get_locale(locale)
def format(cls, dt, fmt): def format(cls, dt: datetime, fmt: str) -> str:
return cls._FORMAT_RE.sub(lambda m: cls._format_token(dt, m.group(0)), fmt) # FIXME: _format_token() is nullable
return cls._FORMAT_RE.sub(
lambda m: cast(str, cls._format_token(dt, m.group(0))), fmt
)
def _format_token(self, dt, token): def _format_token(self, dt: datetime, token: Optional[str]) -> Optional[str]:
if token == 'YYYY': if token and token.startswith("[") and token.endswith("]"):
return token[1:-1]
if token == "YYYY":
return self.locale.year_full(dt.year) return self.locale.year_full(dt.year)
if token == 'YY': if token == "YY":
return self.locale.year_abbreviation(dt.year) return self.locale.year_abbreviation(dt.year)
if token == 'MMMM': if token == "MMMM":
return self.locale.month_name(dt.month) return self.locale.month_name(dt.month)
if token == 'MMM': if token == "MMM":
return self.locale.month_abbreviation(dt.month) return self.locale.month_abbreviation(dt.month)
if token == 'MM': if token == "MM":
return '{0:02d}'.format(dt.month) return f"{dt.month:02d}"
if token == 'M': if token == "M":
return str(dt.month) return f"{dt.month}"
if token == 'DDDD': if token == "DDDD":
return '{0:03d}'.format(dt.timetuple().tm_yday) return f"{dt.timetuple().tm_yday:03d}"
if token == 'DDD': if token == "DDD":
return str(dt.timetuple().tm_yday) return f"{dt.timetuple().tm_yday}"
if token == 'DD': if token == "DD":
return '{0:02d}'.format(dt.day) return f"{dt.day:02d}"
if token == 'D': if token == "D":
return str(dt.day) return f"{dt.day}"
if token == 'Do': if token == "Do":
return self.locale.ordinal_number(dt.day) return self.locale.ordinal_number(dt.day)
if token == 'dddd': if token == "dddd":
return self.locale.day_name(dt.isoweekday()) return self.locale.day_name(dt.isoweekday())
if token == 'ddd': if token == "ddd":
return self.locale.day_abbreviation(dt.isoweekday()) return self.locale.day_abbreviation(dt.isoweekday())
if token == 'd': if token == "d":
return str(dt.isoweekday()) return f"{dt.isoweekday()}"
if token == 'HH': if token == "HH":
return '{0:02d}'.format(dt.hour) return f"{dt.hour:02d}"
if token == 'H': if token == "H":
return str(dt.hour) return f"{dt.hour}"
if token == 'hh': if token == "hh":
return '{0:02d}'.format(dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12)) return f"{dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12):02d}"
if token == 'h': if token == "h":
return str(dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12)) return f"{dt.hour if 0 < dt.hour < 13 else abs(dt.hour - 12)}"
if token == 'mm': if token == "mm":
return '{0:02d}'.format(dt.minute) return f"{dt.minute:02d}"
if token == 'm': if token == "m":
return str(dt.minute) return f"{dt.minute}"
if token == 'ss': if token == "ss":
return '{0:02d}'.format(dt.second) return f"{dt.second:02d}"
if token == 's': if token == "s":
return str(dt.second) return f"{dt.second}"
if token == 'SSSSSS': if token == "SSSSSS":
return str('{0:06d}'.format(int(dt.microsecond))) return f"{dt.microsecond:06d}"
if token == 'SSSSS': if token == "SSSSS":
return str('{0:05d}'.format(int(dt.microsecond / 10))) return f"{dt.microsecond // 10:05d}"
if token == 'SSSS': if token == "SSSS":
return str('{0:04d}'.format(int(dt.microsecond / 100))) return f"{dt.microsecond // 100:04d}"
if token == 'SSS': if token == "SSS":
return str('{0:03d}'.format(int(dt.microsecond / 1000))) return f"{dt.microsecond // 1000:03d}"
if token == 'SS': if token == "SS":
return str('{0:02d}'.format(int(dt.microsecond / 10000))) return f"{dt.microsecond // 10000:02d}"
if token == 'S': if token == "S":
return str(int(dt.microsecond / 100000)) return f"{dt.microsecond // 100000}"
if token == 'X': if token == "X":
return str(calendar.timegm(dt.utctimetuple())) return f"{dt.timestamp()}"
if token in ['ZZ', 'Z']: if token == "x":
separator = ':' if token == 'ZZ' else '' return f"{dt.timestamp() * 1_000_000:.0f}"
if token == "ZZZ":
return dt.tzname()
if token in ["ZZ", "Z"]:
separator = ":" if token == "ZZ" else ""
tz = dateutil_tz.tzutc() if dt.tzinfo is None else dt.tzinfo tz = dateutil_tz.tzutc() if dt.tzinfo is None else dt.tzinfo
total_minutes = int(util.total_seconds(tz.utcoffset(dt)) / 60) # `dt` must be aware object. Otherwise, this line will raise AttributeError
# https://github.com/arrow-py/arrow/pull/883#discussion_r529866834
# datetime awareness: https://docs.python.org/3/library/datetime.html#aware-and-naive-objects
total_minutes = int(cast(timedelta, tz.utcoffset(dt)).total_seconds() / 60)
sign = '+' if total_minutes >= 0 else '-' sign = "+" if total_minutes >= 0 else "-"
total_minutes = abs(total_minutes) total_minutes = abs(total_minutes)
hour, minute = divmod(total_minutes, 60) hour, minute = divmod(total_minutes, 60)
return '{0}{1:02d}{2}{3:02d}'.format(sign, hour, separator, minute) return f"{sign}{hour:02d}{separator}{minute:02d}"
if token in ('a', 'A'): if token in ("a", "A"):
return self.locale.meridian(dt.hour, token) return self.locale.meridian(dt.hour, token)
if token == "W":
year, week, day = dt.isocalendar()
return f"{year}-W{week:02d}-{day}"

File diff suppressed because it is too large Load diff

View file

@ -1,205 +1,555 @@
# -*- coding: utf-8 -*- """Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
from __future__ import absolute_import
from __future__ import unicode_literals
from datetime import datetime
from dateutil import tz
import re import re
import sys
from datetime import datetime, timedelta
from datetime import tzinfo as dt_tzinfo
from functools import lru_cache
from typing import (
Any,
ClassVar,
Dict,
Iterable,
List,
Match,
Optional,
Pattern,
SupportsFloat,
SupportsInt,
Tuple,
Union,
cast,
overload,
)
from dateutil import tz
from arrow import locales from arrow import locales
from arrow.constants import DEFAULT_LOCALE
from arrow.util import next_weekday, normalize_timestamp
if sys.version_info < (3, 8): # pragma: no cover
from typing_extensions import Literal, TypedDict
else:
from typing import Literal, TypedDict # pragma: no cover
class ParserError(RuntimeError): class ParserError(ValueError):
pass pass
class DateTimeParser(object): # Allows for ParserErrors to be propagated from _build_datetime()
# when day_of_year errors occur.
_FORMAT_RE = re.compile('(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|X)') # Before this, the ParserErrors were caught by the try/except in
_ESCAPE_RE = re.compile('\[[^\[\]]*\]') # _parse_multiformat() and the appropriate error message was not
# transmitted to the user.
_ONE_OR_MORE_DIGIT_RE = re.compile('\d+') class ParserMatchError(ParserError):
_ONE_OR_TWO_DIGIT_RE = re.compile('\d{1,2}') pass
_FOUR_DIGIT_RE = re.compile('\d{4}')
_TWO_DIGIT_RE = re.compile('\d{2}')
_TZ_RE = re.compile('[+\-]?\d{2}:?(\d{2})?')
_TZ_NAME_RE = re.compile('\w[\w+\-/]+')
_BASE_INPUT_RE_MAP = { _WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
'YYYY': _FOUR_DIGIT_RE,
'YY': _TWO_DIGIT_RE, _FORMAT_TYPE = Literal[
'MM': _TWO_DIGIT_RE, "YYYY",
'M': _ONE_OR_TWO_DIGIT_RE, "YY",
'DD': _TWO_DIGIT_RE, "MM",
'D': _ONE_OR_TWO_DIGIT_RE, "M",
'HH': _TWO_DIGIT_RE, "DDDD",
'H': _ONE_OR_TWO_DIGIT_RE, "DDD",
'hh': _TWO_DIGIT_RE, "DD",
'h': _ONE_OR_TWO_DIGIT_RE, "D",
'mm': _TWO_DIGIT_RE, "HH",
'm': _ONE_OR_TWO_DIGIT_RE, "H",
'ss': _TWO_DIGIT_RE, "hh",
's': _ONE_OR_TWO_DIGIT_RE, "h",
'X': re.compile('\d+'), "mm",
'ZZZ': _TZ_NAME_RE, "m",
'ZZ': _TZ_RE, "ss",
'Z': _TZ_RE, "s",
'S': _ONE_OR_MORE_DIGIT_RE, "X",
"x",
"ZZZ",
"ZZ",
"Z",
"S",
"W",
"MMMM",
"MMM",
"Do",
"dddd",
"ddd",
"d",
"a",
"A",
]
class _Parts(TypedDict, total=False):
year: int
month: int
day_of_year: int
day: int
hour: int
minute: int
second: int
microsecond: int
timestamp: float
expanded_timestamp: int
tzinfo: dt_tzinfo
am_pm: Literal["am", "pm"]
day_of_week: int
weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
class DateTimeParser:
_FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
)
_ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
_ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
_ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
_ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
_FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
_TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
_TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
_TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
# NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
# break cases like "15 Jul 2000" and a format list (see issue #447)
_TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
_TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
_TIME_RE: ClassVar[Pattern[str]] = re.compile(
r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
)
_WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
)
_BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
"YYYY": _FOUR_DIGIT_RE,
"YY": _TWO_DIGIT_RE,
"MM": _TWO_DIGIT_RE,
"M": _ONE_OR_TWO_DIGIT_RE,
"DDDD": _THREE_DIGIT_RE,
"DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
"DD": _TWO_DIGIT_RE,
"D": _ONE_OR_TWO_DIGIT_RE,
"HH": _TWO_DIGIT_RE,
"H": _ONE_OR_TWO_DIGIT_RE,
"hh": _TWO_DIGIT_RE,
"h": _ONE_OR_TWO_DIGIT_RE,
"mm": _TWO_DIGIT_RE,
"m": _ONE_OR_TWO_DIGIT_RE,
"ss": _TWO_DIGIT_RE,
"s": _ONE_OR_TWO_DIGIT_RE,
"X": _TIMESTAMP_RE,
"x": _TIMESTAMP_EXPANDED_RE,
"ZZZ": _TZ_NAME_RE,
"ZZ": _TZ_ZZ_RE,
"Z": _TZ_Z_RE,
"S": _ONE_OR_MORE_DIGIT_RE,
"W": _WEEK_DATE_RE,
} }
MARKERS = ['YYYY', 'MM', 'DD'] SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
SEPARATORS = ['-', '/', '.']
def __init__(self, locale='en_us'): locale: locales.Locale
_input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
self.locale = locales.get_locale(locale) self.locale = locales.get_locale(locale)
self._input_re_map = self._BASE_INPUT_RE_MAP.copy() self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
self._input_re_map.update({ self._input_re_map.update(
'MMMM': self._choice_re(self.locale.month_names[1:], re.IGNORECASE), {
'MMM': self._choice_re(self.locale.month_abbreviations[1:], "MMMM": self._generate_choice_re(
re.IGNORECASE), self.locale.month_names[1:], re.IGNORECASE
'Do': re.compile(self.locale.ordinal_day_re), ),
'dddd': self._choice_re(self.locale.day_names[1:], re.IGNORECASE), "MMM": self._generate_choice_re(
'ddd': self._choice_re(self.locale.day_abbreviations[1:], self.locale.month_abbreviations[1:], re.IGNORECASE
re.IGNORECASE), ),
'd' : re.compile("[1-7]"), "Do": re.compile(self.locale.ordinal_day_re),
'a': self._choice_re( "dddd": self._generate_choice_re(
(self.locale.meridians['am'], self.locale.meridians['pm']) self.locale.day_names[1:], re.IGNORECASE
), ),
# note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to "ddd": self._generate_choice_re(
# ensure backwards compatibility of this token self.locale.day_abbreviations[1:], re.IGNORECASE
'A': self._choice_re(self.locale.meridians.values()) ),
}) "d": re.compile(r"[1-7]"),
"a": self._generate_choice_re(
(self.locale.meridians["am"], self.locale.meridians["pm"])
),
# note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
# ensure backwards compatibility of this token
"A": self._generate_choice_re(self.locale.meridians.values()),
}
)
if cache_size > 0:
self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore
self._generate_pattern_re
)
def parse_iso(self, string): # TODO: since we support more than ISO 8601, we should rename this function
# IDEA: break into multiple functions
def parse_iso(
self, datetime_string: str, normalize_whitespace: bool = False
) -> datetime:
has_time = 'T' in string or ' ' in string.strip() if normalize_whitespace:
space_divider = ' ' in string.strip() datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
has_space_divider = " " in datetime_string
has_t_divider = "T" in datetime_string
num_spaces = datetime_string.count(" ")
if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
raise ParserError(
f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
"Try passing in a format string to resolve this."
)
has_time = has_space_divider or has_t_divider
has_tz = False
# date formats (ISO 8601 and others) to test against
# NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
formats = [
"YYYY-MM-DD",
"YYYY-M-DD",
"YYYY-M-D",
"YYYY/MM/DD",
"YYYY/M/DD",
"YYYY/M/D",
"YYYY.MM.DD",
"YYYY.M.DD",
"YYYY.M.D",
"YYYYMMDD",
"YYYY-DDDD",
"YYYYDDDD",
"YYYY-MM",
"YYYY/MM",
"YYYY.MM",
"YYYY",
"W",
]
if has_time: if has_time:
if space_divider:
date_string, time_string = string.split(' ', 1) if has_space_divider:
date_string, time_string = datetime_string.split(" ", 1)
else: else:
date_string, time_string = string.split('T', 1) date_string, time_string = datetime_string.split("T", 1)
time_parts = re.split('[+-]', time_string, 1)
has_tz = len(time_parts) > 1 time_parts = re.split(r"[\+\-Z]", time_string, 1, re.IGNORECASE)
has_seconds = time_parts[0].count(':') > 1
has_subseconds = re.search('[.,]', time_parts[0]) time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
if time_components is None:
raise ParserError(
"Invalid time component provided. "
"Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
)
(
hours,
minutes,
seconds,
subseconds_sep,
subseconds,
) = time_components.groups()
has_tz = len(time_parts) == 2
has_minutes = minutes is not None
has_seconds = seconds is not None
has_subseconds = subseconds is not None
is_basic_time_format = ":" not in time_parts[0]
tz_format = "Z"
# use 'ZZ' token instead since tz offset is present in non-basic format
if has_tz and ":" in time_parts[1]:
tz_format = "ZZ"
time_sep = "" if is_basic_time_format else ":"
if has_subseconds: if has_subseconds:
formats = ['YYYY-MM-DDTHH:mm:ss%sS' % has_subseconds.group()] time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
time_sep=time_sep, subseconds_sep=subseconds_sep
)
elif has_seconds: elif has_seconds:
formats = ['YYYY-MM-DDTHH:mm:ss'] time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
elif has_minutes:
time_string = f"HH{time_sep}mm"
else: else:
formats = ['YYYY-MM-DDTHH:mm'] time_string = "HH"
else:
has_tz = False if has_space_divider:
# generate required formats: YYYY-MM-DD, YYYY-MM-DD, YYYY formats = [f"{f} {time_string}" for f in formats]
# using various separators: -, /, . else:
l = len(self.MARKERS) formats = [f"{f}T{time_string}" for f in formats]
formats = [separator.join(self.MARKERS[:l-i])
for i in range(l)
for separator in self.SEPARATORS]
if has_time and has_tz: if has_time and has_tz:
formats = [f + 'Z' for f in formats] # Add "Z" or "ZZ" to the format strings to indicate to
# _parse_token() that a timezone needs to be parsed
formats = [f"{f}{tz_format}" for f in formats]
if space_divider: return self._parse_multiformat(datetime_string, formats)
formats = [item.replace('T', ' ', 1) for item in formats]
return self._parse_multiformat(string, formats) def parse(
self,
datetime_string: str,
fmt: Union[List[str], str],
normalize_whitespace: bool = False,
) -> datetime:
def parse(self, string, fmt): if normalize_whitespace:
datetime_string = re.sub(r"\s+", " ", datetime_string)
if isinstance(fmt, list): if isinstance(fmt, list):
return self._parse_multiformat(string, fmt) return self._parse_multiformat(datetime_string, fmt)
try:
fmt_tokens: List[_FORMAT_TYPE]
fmt_pattern_re: Pattern[str]
fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
except re.error as e:
raise ParserMatchError(
f"Failed to generate regular expression pattern: {e}."
)
match = fmt_pattern_re.search(datetime_string)
if match is None:
raise ParserMatchError(
f"Failed to match {fmt!r} when parsing {datetime_string!r}."
)
parts: _Parts = {}
for token in fmt_tokens:
value: Union[Tuple[str, str, str], str]
if token == "Do":
value = match.group("value")
elif token == "W":
value = (match.group("year"), match.group("week"), match.group("day"))
else:
value = match.group(token)
if value is None:
raise ParserMatchError(
f"Unable to find a match group for the specified token {token!r}."
)
self._parse_token(token, value, parts) # type: ignore
return self._build_datetime(parts)
def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
# fmt is a string of tokens like 'YYYY-MM-DD' # fmt is a string of tokens like 'YYYY-MM-DD'
# we construct a new string by replacing each # we construct a new string by replacing each
# token by its pattern: # token by its pattern:
# 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})' # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
tokens = [] tokens: List[_FORMAT_TYPE] = []
offset = 0 offset = 0
# Escape all special RegEx chars
escaped_fmt = re.escape(fmt)
# Extract the bracketed expressions to be reinserted later. # Extract the bracketed expressions to be reinserted later.
escaped_fmt = re.sub(self._ESCAPE_RE, "#" , fmt) escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
# Any number of S is the same as one. # Any number of S is the same as one.
escaped_fmt = re.sub('S+', 'S', escaped_fmt) # TODO: allow users to specify the number of digits to parse
escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
escaped_data = re.findall(self._ESCAPE_RE, fmt) escaped_data = re.findall(self._ESCAPE_RE, fmt)
fmt_pattern = escaped_fmt fmt_pattern = escaped_fmt
for m in self._FORMAT_RE.finditer(escaped_fmt): for m in self._FORMAT_RE.finditer(escaped_fmt):
token = m.group(0) token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
try: try:
input_re = self._input_re_map[token] input_re = self._input_re_map[token]
except KeyError: except KeyError:
raise ParserError('Unrecognized token \'{0}\''.format(token)) raise ParserError(f"Unrecognized token {token!r}.")
input_pattern = '(?P<{0}>{1})'.format(token, input_re.pattern) input_pattern = f"(?P<{token}>{input_re.pattern})"
tokens.append(token) tokens.append(token)
# a pattern doesn't have the same length as the token # a pattern doesn't have the same length as the token
# it replaces! We keep the difference in the offset variable. # it replaces! We keep the difference in the offset variable.
# This works because the string is scanned left-to-right and matches # This works because the string is scanned left-to-right and matches
# are returned in the order found by finditer. # are returned in the order found by finditer.
fmt_pattern = fmt_pattern[:m.start() + offset] + input_pattern + fmt_pattern[m.end() + offset:] fmt_pattern = (
fmt_pattern[: m.start() + offset]
+ input_pattern
+ fmt_pattern[m.end() + offset :]
)
offset += len(input_pattern) - (m.end() - m.start()) offset += len(input_pattern) - (m.end() - m.start())
final_fmt_pattern = "" final_fmt_pattern = ""
a = fmt_pattern.split("#") split_fmt = fmt_pattern.split(r"\#")
b = escaped_data
# Due to the way Python splits, 'a' will always be longer # Due to the way Python splits, 'split_fmt' will always be longer
for i in range(len(a)): for i in range(len(split_fmt)):
final_fmt_pattern += a[i] final_fmt_pattern += split_fmt[i]
if i < len(b): if i < len(escaped_data):
final_fmt_pattern += b[i][1:-1] final_fmt_pattern += escaped_data[i][1:-1]
match = re.search(final_fmt_pattern, string, flags=re.IGNORECASE) # Wrap final_fmt_pattern in a custom word boundary to strictly
if match is None: # match the formatting pattern and filter out date and time formats
raise ParserError('Failed to match \'{0}\' when parsing \'{1}\''.format(final_fmt_pattern, string)) # that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
parts = {} # blah1998-09-12blah. The custom word boundary matches every character
for token in tokens: # that is not a whitespace character to allow for searching for a date
if token == 'Do': # and time string in a natural language sentence. Therefore, searching
value = match.group('value') # for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
else: # work properly.
value = match.group(token) # Certain punctuation before or after the target pattern such as
self._parse_token(token, value, parts) # "1998-09-12," is permitted. For the full list of valid punctuation,
return self._build_datetime(parts) # see the documentation.
def _parse_token(self, token, value, parts): starting_word_boundary = (
r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
# like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
# pattern (i.e. "It can't not be these characters before the pattern")
r"(\b|^)"
# The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
# negative number through i.e. before epoch numbers
)
ending_word_boundary = (
r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
# can appear after the pattern at most 1 time
r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
)
bounded_fmt_pattern = r"{}{}{}".format(
starting_word_boundary, final_fmt_pattern, ending_word_boundary
)
if token == 'YYYY': return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
parts['year'] = int(value)
elif token == 'YY': @overload
def _parse_token(
self,
token: Literal[
"YYYY",
"YY",
"MM",
"M",
"DDDD",
"DDD",
"DD",
"D",
"Do",
"HH",
"hh",
"h",
"H",
"mm",
"m",
"ss",
"s",
"x",
],
value: Union[str, bytes, SupportsInt, bytearray],
parts: _Parts,
) -> None:
... # pragma: no cover
@overload
def _parse_token(
self,
token: Literal["X"],
value: Union[str, bytes, SupportsFloat, bytearray],
parts: _Parts,
) -> None:
... # pragma: no cover
@overload
def _parse_token(
self,
token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
value: Union[str, bytes, bytearray],
parts: _Parts,
) -> None:
... # pragma: no cover
@overload
def _parse_token(
self,
token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
value: Union[str, bytes],
parts: _Parts,
) -> None:
... # pragma: no cover
@overload
def _parse_token(
self,
token: Literal["W"],
value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
parts: _Parts,
) -> None:
... # pragma: no cover
def _parse_token(
self,
token: Any,
value: Any,
parts: _Parts,
) -> None:
if token == "YYYY":
parts["year"] = int(value)
elif token == "YY":
value = int(value) value = int(value)
parts['year'] = 1900 + value if value > 68 else 2000 + value parts["year"] = 1900 + value if value > 68 else 2000 + value
elif token in ['MMMM', 'MMM']: elif token in ["MMMM", "MMM"]:
parts['month'] = self.locale.month_number(value.lower()) # FIXME: month_number() is nullable
parts["month"] = self.locale.month_number(value.lower()) # type: ignore
elif token in ['MM', 'M']: elif token in ["MM", "M"]:
parts['month'] = int(value) parts["month"] = int(value)
elif token in ['DD', 'D']: elif token in ["DDDD", "DDD"]:
parts['day'] = int(value) parts["day_of_year"] = int(value)
elif token in ['Do']: elif token in ["DD", "D"]:
parts['day'] = int(value) parts["day"] = int(value)
elif token.upper() in ['HH', 'H']: elif token == "Do":
parts['hour'] = int(value) parts["day"] = int(value)
elif token in ['mm', 'm']: elif token == "dddd":
parts['minute'] = int(value) # locale day names are 1-indexed
day_of_week = [x.lower() for x in self.locale.day_names].index(
value.lower()
)
parts["day_of_week"] = day_of_week - 1
elif token in ['ss', 's']: elif token == "ddd":
parts['second'] = int(value) # locale day abbreviations are 1-indexed
day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
value.lower()
)
parts["day_of_week"] = day_of_week - 1
elif token == 'S': elif token.upper() in ["HH", "H"]:
parts["hour"] = int(value)
elif token in ["mm", "m"]:
parts["minute"] = int(value)
elif token in ["ss", "s"]:
parts["second"] = int(value)
elif token == "S":
# We have the *most significant* digits of an arbitrary-precision integer. # We have the *most significant* digits of an arbitrary-precision integer.
# We want the six most significant digits as an integer, rounded. # We want the six most significant digits as an integer, rounded.
# FIXME: add nanosecond support somehow? # IDEA: add nanosecond support somehow? Need datetime support for it first.
value = value.ljust(7, str('0')) value = value.ljust(7, "0")
# floating-point (IEEE-754) defaults to half-to-even rounding # floating-point (IEEE-754) defaults to half-to-even rounding
seventh_digit = int(value[6]) seventh_digit = int(value[6])
@ -210,119 +560,220 @@ class DateTimeParser(object):
else: else:
rounding = 0 rounding = 0
parts['microsecond'] = int(value[:6]) + rounding parts["microsecond"] = int(value[:6]) + rounding
elif token == 'X': elif token == "X":
parts['timestamp'] = int(value) parts["timestamp"] = float(value)
elif token in ['ZZZ', 'ZZ', 'Z']: elif token == "x":
parts['tzinfo'] = TzinfoParser.parse(value) parts["expanded_timestamp"] = int(value)
elif token in ['a', 'A']: elif token in ["ZZZ", "ZZ", "Z"]:
if value in ( parts["tzinfo"] = TzinfoParser.parse(value)
self.locale.meridians['am'],
self.locale.meridians['AM'] elif token in ["a", "A"]:
): if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
parts['am_pm'] = 'am' parts["am_pm"] = "am"
elif value in ( if "hour" in parts and not 0 <= parts["hour"] <= 12:
self.locale.meridians['pm'], raise ParserMatchError(
self.locale.meridians['PM'] f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
): )
parts['am_pm'] = 'pm' elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
parts["am_pm"] = "pm"
elif token == "W":
parts["weekdate"] = value
@staticmethod @staticmethod
def _build_datetime(parts): def _build_datetime(parts: _Parts) -> datetime:
weekdate = parts.get("weekdate")
timestamp = parts.get('timestamp') if weekdate is not None:
if timestamp: year, week = int(weekdate[0]), int(weekdate[1])
tz_utc = tz.tzutc()
return datetime.fromtimestamp(timestamp, tz=tz_utc)
am_pm = parts.get('am_pm') if weekdate[2] is not None:
hour = parts.get('hour', 0) _day = int(weekdate[2])
else:
# day not given, default to 1
_day = 1
if am_pm == 'pm' and hour < 12: date_string = f"{year}-{week}-{_day}"
# tokens for ISO 8601 weekdates
dt = datetime.strptime(date_string, "%G-%V-%u")
parts["year"] = dt.year
parts["month"] = dt.month
parts["day"] = dt.day
timestamp = parts.get("timestamp")
if timestamp is not None:
return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
expanded_timestamp = parts.get("expanded_timestamp")
if expanded_timestamp is not None:
return datetime.fromtimestamp(
normalize_timestamp(expanded_timestamp),
tz=tz.tzutc(),
)
day_of_year = parts.get("day_of_year")
if day_of_year is not None:
_year = parts.get("year")
month = parts.get("month")
if _year is None:
raise ParserError(
"Year component is required with the DDD and DDDD tokens."
)
if month is not None:
raise ParserError(
"Month component is not allowed with the DDD and DDDD tokens."
)
date_string = f"{_year}-{day_of_year}"
try:
dt = datetime.strptime(date_string, "%Y-%j")
except ValueError:
raise ParserError(
f"The provided day of year {day_of_year!r} is invalid."
)
parts["year"] = dt.year
parts["month"] = dt.month
parts["day"] = dt.day
day_of_week: Optional[int] = parts.get("day_of_week")
day = parts.get("day")
# If day is passed, ignore day of week
if day_of_week is not None and day is None:
year = parts.get("year", 1970)
month = parts.get("month", 1)
day = 1
# dddd => first day of week after epoch
# dddd YYYY => first day of week in specified year
# dddd MM YYYY => first day of week in specified year and month
# dddd MM => first day after epoch in specified month
next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
parts["year"] = next_weekday_dt.year
parts["month"] = next_weekday_dt.month
parts["day"] = next_weekday_dt.day
am_pm = parts.get("am_pm")
hour = parts.get("hour", 0)
if am_pm == "pm" and hour < 12:
hour += 12 hour += 12
elif am_pm == 'am' and hour == 12: elif am_pm == "am" and hour == 12:
hour = 0 hour = 0
return datetime(year=parts.get('year', 1), month=parts.get('month', 1), # Support for midnight at the end of day
day=parts.get('day', 1), hour=hour, minute=parts.get('minute', 0), if hour == 24:
second=parts.get('second', 0), microsecond=parts.get('microsecond', 0), if parts.get("minute", 0) != 0:
tzinfo=parts.get('tzinfo')) raise ParserError("Midnight at the end of day must not contain minutes")
if parts.get("second", 0) != 0:
raise ParserError("Midnight at the end of day must not contain seconds")
if parts.get("microsecond", 0) != 0:
raise ParserError(
"Midnight at the end of day must not contain microseconds"
)
hour = 0
day_increment = 1
else:
day_increment = 0
def _parse_multiformat(self, string, formats): # account for rounding up to 1000000
microsecond = parts.get("microsecond", 0)
if microsecond == 1000000:
microsecond = 0
second_increment = 1
else:
second_increment = 0
_datetime = None increment = timedelta(days=day_increment, seconds=second_increment)
return (
datetime(
year=parts.get("year", 1),
month=parts.get("month", 1),
day=parts.get("day", 1),
hour=hour,
minute=parts.get("minute", 0),
second=parts.get("second", 0),
microsecond=microsecond,
tzinfo=parts.get("tzinfo"),
)
+ increment
)
def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
_datetime: Optional[datetime] = None
for fmt in formats: for fmt in formats:
try: try:
_datetime = self.parse(string, fmt) _datetime = self.parse(string, fmt)
break break
except ParserError: except ParserMatchError:
pass pass
if _datetime is None: if _datetime is None:
raise ParserError('Could not match input to any of {0} on \'{1}\''.format(formats, string)) supported_formats = ", ".join(formats)
raise ParserError(
f"Could not match input {string!r} to any of the following formats: {supported_formats}."
)
return _datetime return _datetime
# generates a capture group of choices separated by an OR operator
@staticmethod @staticmethod
def _map_lookup(input_map, key): def _generate_choice_re(
choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
try: ) -> Pattern[str]:
return input_map[key] return re.compile(r"({})".format("|".join(choices)), flags=flags)
except KeyError:
raise ParserError('Could not match "{0}" to {1}'.format(key, input_map))
@staticmethod
def _try_timestamp(string):
try:
return float(string)
except:
return None
@staticmethod
def _choice_re(choices, flags=0):
return re.compile('({0})'.format('|'.join(choices)), flags=flags)
class TzinfoParser(object): class TzinfoParser:
_TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
_TZINFO_RE = re.compile('([+\-])?(\d\d):?(\d\d)?') r"^([\+\-])?(\d{2})(?:\:?(\d{2}))?$"
)
@classmethod @classmethod
def parse(cls, string): def parse(cls, tzinfo_string: str) -> dt_tzinfo:
tzinfo = None tzinfo: Optional[dt_tzinfo] = None
if string == 'local': if tzinfo_string == "local":
tzinfo = tz.tzlocal() tzinfo = tz.tzlocal()
elif string in ['utc', 'UTC']: elif tzinfo_string in ["utc", "UTC", "Z"]:
tzinfo = tz.tzutc() tzinfo = tz.tzutc()
else: else:
iso_match = cls._TZINFO_RE.match(string) iso_match = cls._TZINFO_RE.match(tzinfo_string)
if iso_match: if iso_match:
sign: Optional[str]
hours: str
minutes: Union[str, int, None]
sign, hours, minutes = iso_match.groups() sign, hours, minutes = iso_match.groups()
if minutes is None: seconds = int(hours) * 3600 + int(minutes or 0) * 60
minutes = 0
seconds = int(hours) * 3600 + int(minutes) * 60
if sign == '-': if sign == "-":
seconds *= -1 seconds *= -1
tzinfo = tz.tzoffset(None, seconds) tzinfo = tz.tzoffset(None, seconds)
else: else:
tzinfo = tz.gettz(string) tzinfo = tz.gettz(tzinfo_string)
if tzinfo is None: if tzinfo is None:
raise ParserError('Could not parse timezone expression "{0}"'.format(string)) raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
return tzinfo return tzinfo

0
lib/arrow/py.typed Normal file
View file

View file

@ -1,47 +1,117 @@
# -*- coding: utf-8 -*- """Helpful functions used internally within arrow."""
from __future__ import absolute_import
import sys import datetime
from typing import Any, Optional, cast
# python 2.6 / 2.7 definitions for total_seconds function. from dateutil.rrule import WEEKLY, rrule
def _total_seconds_27(td): # pragma: no cover from arrow.constants import (
return td.total_seconds() MAX_ORDINAL,
MAX_TIMESTAMP,
def _total_seconds_26(td): MAX_TIMESTAMP_MS,
return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6) / 1e6 MAX_TIMESTAMP_US,
MIN_ORDINAL,
)
# get version info and assign correct total_seconds function. def next_weekday(
start_date: Optional[datetime.date], weekday: int
) -> datetime.datetime:
"""Get next weekday from the specified start date.
version = '{0}.{1}.{2}'.format(*sys.version_info[:3]) :param start_date: Datetime object representing the start date.
:param weekday: Next weekday to obtain. Can be a value between 0 (Monday) and 6 (Sunday).
:return: Datetime object corresponding to the next weekday after start_date.
if version < '2.7': # pragma: no cover Usage::
total_seconds = _total_seconds_26
else: # pragma: no cover
total_seconds = _total_seconds_27
def is_timestamp(value): # Get first Monday after epoch
if type(value) == bool: >>> next_weekday(datetime(1970, 1, 1), 0)
1970-01-05 00:00:00
# Get first Thursday after epoch
>>> next_weekday(datetime(1970, 1, 1), 3)
1970-01-01 00:00:00
# Get first Sunday after epoch
>>> next_weekday(datetime(1970, 1, 1), 6)
1970-01-04 00:00:00
"""
if weekday < 0 or weekday > 6:
raise ValueError("Weekday must be between 0 (Monday) and 6 (Sunday).")
return cast(
datetime.datetime,
rrule(freq=WEEKLY, dtstart=start_date, byweekday=weekday, count=1)[0],
)
def is_timestamp(value: Any) -> bool:
"""Check if value is a valid timestamp."""
if isinstance(value, bool):
return False
if not isinstance(value, (int, float, str)):
return False return False
try: try:
float(value) float(value)
return True return True
except: except ValueError:
return False return False
# python 2.7 / 3.0+ definitions for isstr function.
try: # pragma: no cover def validate_ordinal(value: Any) -> None:
basestring """Raise an exception if value is an invalid Gregorian ordinal.
def isstr(s): :param value: the input to be checked
return isinstance(s, basestring)
except NameError: #pragma: no cover """
if isinstance(value, bool) or not isinstance(value, int):
def isstr(s): raise TypeError(f"Ordinal must be an integer (got type {type(value)}).")
return isinstance(s, str) if not (MIN_ORDINAL <= value <= MAX_ORDINAL):
raise ValueError(f"Ordinal {value} is out of range.")
__all__ = ['total_seconds', 'is_timestamp', 'isstr'] def normalize_timestamp(timestamp: float) -> float:
"""Normalize millisecond and microsecond timestamps into normal timestamps."""
if timestamp > MAX_TIMESTAMP:
if timestamp < MAX_TIMESTAMP_MS:
timestamp /= 1000
elif timestamp < MAX_TIMESTAMP_US:
timestamp /= 1_000_000
else:
raise ValueError(f"The specified timestamp {timestamp!r} is too large.")
return timestamp
# Credit to https://stackoverflow.com/a/1700069
def iso_to_gregorian(iso_year: int, iso_week: int, iso_day: int) -> datetime.date:
"""Converts an ISO week date into a datetime object.
:param iso_year: the year
:param iso_week: the week number, each year has either 52 or 53 weeks
:param iso_day: the day numbered 1 through 7, beginning with Monday
"""
if not 1 <= iso_week <= 53:
raise ValueError("ISO Calendar week value must be between 1-53.")
if not 1 <= iso_day <= 7:
raise ValueError("ISO Calendar day value must be between 1-7")
# The first week of the year always contains 4 Jan.
fourth_jan = datetime.date(iso_year, 1, 4)
delta = datetime.timedelta(fourth_jan.isoweekday() - 1)
year_start = fourth_jan - delta
gregorian = year_start + datetime.timedelta(days=iso_day - 1, weeks=iso_week - 1)
return gregorian
def validate_bounds(bounds: str) -> None:
if bounds != "()" and bounds != "(]" and bounds != "[)" and bounds != "[]":
raise ValueError(
"Invalid bounds. Please select between '()', '(]', '[)', or '[]'."
)
__all__ = ["next_weekday", "is_timestamp", "validate_ordinal", "iso_to_gregorian"]