Merge branch 'nightly' into dependabot/pip/nightly/requests-2.31.0

This commit is contained in:
JonnyWong16 2023-08-23 21:39:52 -07:00 committed by GitHub
commit 127f2c0321
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
109 changed files with 2801 additions and 1322 deletions

View file

@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Stale
uses: actions/stale@v7
uses: actions/stale@v8
with:
stale-issue-message: >
This issue is stale because it has been open for 30 days with no activity.
@ -30,7 +30,7 @@ jobs:
days-before-close: 5
- name: Invalid Template
uses: actions/stale@v7
uses: actions/stale@v8
with:
stale-issue-message: >
Invalid issues template.

View file

@ -95,7 +95,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Get Build Job Status
uses: technote-space/workflow-conclusion-action@v3.0
uses: technote-space/workflow-conclusion-action@v3
- name: Combine Job Status
id: status

View file

@ -68,7 +68,7 @@ jobs:
pyinstaller -y ./package/Tautulli-${{ matrix.os }}.spec
- name: Create Windows Installer
uses: joncloud/makensis-action@v3.7
uses: joncloud/makensis-action@v4
if: matrix.os == 'windows'
with:
script-file: ./package/Tautulli.nsi
@ -100,10 +100,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Get Build Job Status
uses: technote-space/workflow-conclusion-action@v3.0
uses: technote-space/workflow-conclusion-action@v3
- name: Checkout Code
uses: actions/checkout@v3.2.0
uses: actions/checkout@v3
- name: Set Release Version
id: get_version
@ -168,7 +168,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Get Build Job Status
uses: technote-space/workflow-conclusion-action@v3.0
uses: technote-space/workflow-conclusion-action@v3
- name: Combine Job Status
id: status

View file

@ -70,7 +70,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Get Build Job Status
uses: technote-space/workflow-conclusion-action@v3.0
uses: technote-space/workflow-conclusion-action@v3
- name: Combine Job Status
id: status

View file

@ -18,7 +18,6 @@ jobs:
with:
message: Pull requests must be made to the `nightly` branch. Thanks.
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: 'github-actions[bot]'
- name: Fail Workflow
if: github.base_ref != 'nightly'

View file

@ -1,5 +1,33 @@
# Changelog
## v2.12.5 (2023-07-13)
* Activity:
* New: Added d3d11va to list of hardware decoders.
* History:
* Fix: Incorrect grouping of play history.
* New: Added button in settings to regroup play history.
* Notifications:
* Fix: Incorrect concurrent streams notifications by IP addresss for IPv6 addresses (#2096) (Thanks @pooley182)
* UI:
* Fix: Occasional UI crashing on Python 3.11.
* New: Added multiselect user filters to History and Graphs pages. (#2090) (Thanks @zdimension)
* API:
* New: Added regroup_history API command.
* Change: Updated graph API commands to accept a comma separated list of user IDs.
## v2.12.4 (2023-05-23)
* History:
* Fix: Set view offset equal to duration if a stream is stopped within the last 10 sec.
* Other:
* Fix: Database import may fail for some older databases.
* Fix: Double-quoted strings for newer versions of SQLite. (#2015, #2057)
* API:
* Change: Return the ID for async API calls (export_metadata, notify, notify_newsletter).
## v2.12.3 (2023-04-14)
* Activity:

File diff suppressed because one or more lines are too long

View file

@ -2914,7 +2914,7 @@ a .home-platforms-list-cover-face:hover
margin-bottom: -20px;
width: 100%;
max-width: 1750px;
overflow: hidden;
display: flow-root;
}
.table-card-back td {
font-size: 12px;

View file

@ -1,6 +1,7 @@
<%inherit file="base.html"/>
<%def name="headIncludes()">
<link rel="stylesheet" href="${http_root}css/bootstrap-select.min.css">
<link rel="stylesheet" href="${http_root}css/dataTables.bootstrap.min.css">
<link rel="stylesheet" href="${http_root}css/tautulli-dataTables.css">
</%def>
@ -14,9 +15,7 @@
<div class="button-bar">
<div class="btn-group" id="user-selection">
<label>
<select name="graph-user" id="graph-user" class="btn" style="color: inherit;">
<option value="">All Users</option>
<option disabled>&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;</option>
<select name="graph-user" id="graph-user" multiple>
</select>
</label>
</div>
@ -225,6 +224,7 @@
</%def>
<%def name="javascriptIncludes()">
<script src="${http_root}js/bootstrap-select.min.js"></script>
<script src="${http_root}js/highcharts.min.js"></script>
<script src="${http_root}js/jquery.dataTables.min.js"></script>
<script src="${http_root}js/dataTables.bootstrap.min.js"></script>
@ -373,14 +373,35 @@
type: 'get',
dataType: "json",
success: function (data) {
var select = $('#graph-user');
let select = $('#graph-user');
let by_id = {};
data.sort(function(a, b) {
return a.friendly_name.localeCompare(b.friendly_name);
});
data.forEach(function(item) {
select.append('<option value="' + item.user_id + '">' +
item.friendly_name + '</option>');
by_id[item.user_id] = item.friendly_name;
});
select.selectpicker({
countSelectedText: function(sel, total) {
if (sel === 0 || sel === total) {
return 'All users';
} else if (sel > 1) {
return sel + ' users';
} else {
return select.val().map(function(id) {
return by_id[id];
}).join(', ');
}
},
style: 'btn-dark',
actionsBox: true,
selectedTextFormat: 'count',
noneSelectedText: 'All users'
});
select.selectpicker('render');
select.selectpicker('selectAll');
}
});
@ -602,11 +623,6 @@
$('#nav-tabs-total').tab('show');
}
// Set initial state
if (current_tab === '#tabs-plays') { loadGraphsTab1(current_day_range, yaxis); }
if (current_tab === '#tabs-stream') { loadGraphsTab2(current_day_range, yaxis); }
if (current_tab === '#tabs-total') { loadGraphsTab3(current_month_range, yaxis); }
// Tab1 opened
$('#nav-tabs-plays').on('shown.bs.tab', function (e) {
e.preventDefault();
@ -652,9 +668,20 @@
$('.months').text(current_month_range);
});
let graph_user_last_id = undefined;
// User changed
$('#graph-user').on('change', function() {
selected_user_id = $(this).val() || null;
let val = $(this).val();
if (val.length === 0 || val.length === $(this).children().length) {
selected_user_id = null; // if all users are selected, just send an empty list
} else {
selected_user_id = val.join(",");
}
if (selected_user_id === graph_user_last_id) {
return;
}
graph_user_last_id = selected_user_id;
if (current_tab === '#tabs-plays') { loadGraphsTab1(current_day_range, yaxis); }
if (current_tab === '#tabs-stream') { loadGraphsTab2(current_day_range, yaxis); }
if (current_tab === '#tabs-total') { loadGraphsTab3(current_month_range, yaxis); }

View file

@ -1,6 +1,7 @@
<%inherit file="base.html"/>
<%def name="headIncludes()">
<link rel="stylesheet" href="${http_root}css/bootstrap-select.min.css">
<link rel="stylesheet" href="${http_root}css/dataTables.bootstrap.min.css">
<link rel="stylesheet" href="${http_root}css/dataTables.colVis.css">
<link rel="stylesheet" href="${http_root}css/tautulli-dataTables.css">
@ -31,9 +32,7 @@
% if _session['user_group'] == 'admin':
<div class="btn-group" id="user-selection">
<label>
<select name="history-user" id="history-user" class="btn" style="color: inherit;">
<option value="">All Users</option>
<option disabled>&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;&#9472;</option>
<select name="history-user" id="history-user" multiple>
</select>
</label>
</div>
@ -121,6 +120,7 @@
</%def>
<%def name="javascriptIncludes()">
<script src="${http_root}js/bootstrap-select.min.js"></script>
<script src="${http_root}js/jquery.dataTables.min.js"></script>
<script src="${http_root}js/dataTables.colVis.js"></script>
<script src="${http_root}js/dataTables.bootstrap.min.js"></script>
@ -134,17 +134,40 @@
type: 'GET',
dataType: 'json',
success: function (data) {
var select = $('#history-user');
let select = $('#history-user');
let by_id = {};
data.sort(function (a, b) {
return a.friendly_name.localeCompare(b.friendly_name);
});
data.forEach(function (item) {
select.append('<option value="' + item.user_id + '">' +
item.friendly_name + '</option>');
by_id[item.user_id] = item.friendly_name;
});
select.selectpicker({
countSelectedText: function(sel, total) {
if (sel === 0 || sel === total) {
return 'All users';
} else if (sel > 1) {
return sel + ' users';
} else {
return select.val().map(function(id) {
return by_id[id];
}).join(', ');
}
},
style: 'btn-dark',
actionsBox: true,
selectedTextFormat: 'count',
noneSelectedText: 'All users'
});
select.selectpicker('render');
select.selectpicker('selectAll');
}
});
let history_user_last_id = undefined;
function loadHistoryTable(media_type, transcode_decision, selected_user_id) {
history_table_options.ajax = {
url: 'get_history',
@ -187,7 +210,16 @@
});
$('#history-user').on('change', function () {
selected_user_id = $(this).val() || null;
let val = $(this).val();
if (val.length === 0 || val.length === $(this).children().length) {
selected_user_id = null; // if all users are selected, just send an empty list
} else {
selected_user_id = val.join(",");
}
if (selected_user_id === history_user_last_id) {
return;
}
history_user_last_id = selected_user_id;
history_table.draw();
});
}

File diff suppressed because one or more lines are too long

View file

@ -132,12 +132,6 @@
</label>
<p class="help-block">Change the "<em>Play by day of week</em>" graph to start on Monday. Default is start on Sunday.</p>
</div>
<div class="checkbox advanced-setting">
<label>
<input type="checkbox" id="group_history_tables" name="group_history_tables" value="1" ${config['group_history_tables']}> Group Play History
</label>
<p class="help-block">Group play history for the same item and user as a single entry when progress is less than the watched percent.</p>
</div>
<div class="checkbox advanced-setting">
<label>
<input type="checkbox" id="history_table_activity" name="history_table_activity" value="1" ${config['history_table_activity']}> Current Activity in History Tables
@ -227,6 +221,25 @@
</div>
<p class="help-block">Decide whether to use end credits markers to determine the 'watched' state of video items. When markers are not available the selected threshold percentage will be used.</p>
</div>
<div class="checkbox advanced-setting">
<label>
<input type="checkbox" id="group_history_tables" name="group_history_tables" value="1" ${config['group_history_tables']}> Group Play History
</label>
<p class="help-block">Group play history for the same item and user as a single entry when progress is less than the watched percent.</p>
</div>
<div class="form-group advanced-setting">
<label>Regroup Play History</label>
<p class="help-block">
Fix grouping of play history in the database.<br />
</p>
<div class="row">
<div class="col-md-4">
<div class="btn-group">
<button class="btn btn-form" type="button" id="regroup_history">Regroup</button>
</div>
</div>
</div>
</div>
<div class="form-group advanced-setting">
<label>Flush Temporary Sessions</label>
<p class="help-block">
@ -2484,6 +2497,12 @@ $(document).ready(function() {
confirmAjaxCall(url, msg);
});
$("#regroup_history").click(function () {
var msg = 'Are you sure you want to regroup play history in the database?<br /><br /><strong>This make take a long time for large databases.<br />Regrouping will continue in the background.</strong>';
var url = 'regroup_history';
confirmAjaxCall(url, msg);
});
$("#delete_temp_sessions").click(function () {
var msg = 'Are you sure you want to flush the temporary sessions?<br /><br /><strong>This will reset all currently active sessions.</strong>';
var url = 'delete_temp_sessions';

View file

@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.11.2"
__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@ -38,11 +38,13 @@ from .builder import (
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
CSS,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
element_classes=None, **kwargs):
@ -348,25 +350,49 @@ class BeautifulSoup(Tag):
self.markup = None
self.builder.soup = None
def __copy__(self):
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
copy = type(self)(
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
)
def _clone(self):
"""Create a new BeautifulSoup object with the same TreeBuilder,
but not associated with any markup.
# Although we encoded the tree to UTF-8, that may not have
# been the encoding of the original markup. Set the copy's
# .original_encoding to reflect the original object's
# .original_encoding.
copy.original_encoding = self.original_encoding
return copy
This is the first step of the deepcopy process.
"""
clone = type(self)("", None, self.builder)
# Keep track of the encoding of the original document,
# since we won't be parsing it again.
clone.original_encoding = self.original_encoding
return clone
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
d['builder'] = None
d['builder'] = type(self.builder)
# Store the contents as a Unicode string.
d['contents'] = []
d['markup'] = self.decode()
# If _most_recent_element is present, it's a Tag object left
# over from initial parse. It might not be picklable and we
# don't need it.
if '_most_recent_element' in d:
del d['_most_recent_element']
return d
def __setstate__(self, state):
# If necessary, restore the TreeBuilder by looking it up.
self.__dict__ = state
if isinstance(self.builder, type):
self.builder = self.builder()
elif not self.builder:
# We don't know which builder was used to build this
# parse tree, so use a default we know is always available.
self.builder = HTMLParserTreeBuilder()
self.builder.soup = self
self.reset()
self._feed()
return state
@classmethod
def _decode_markup(cls, markup):
@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = []
self.string_container_stack = []
self._most_recent_element = None
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup

View file

@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
DetectsXMLParsedAsHTML,
ParserRejectedMarkup,
HTML,
HTMLTreeBuilder,
STRICT,
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self._initialize_xml_detector()
def error(self, message):
# NOTE: This method is required so long as Python 3.9 is
# supported. The corresponding code is removed from HTMLParser
# in 3.5, but not removed from ParserBase until 3.10.
# https://github.com/python/cpython/issues/76025
#
# The original implementation turned the error into a warning,
# but in every case I discovered, this made HTMLParser
# immediately crash with an error message that was less
# helpful than the warning. The new implementation makes it
# more clear that html.parser just can't parse this
# markup. The 3.10 implementation does the same, though it
# raises AssertionError rather than calling a method. (We
# catch this error and wrap it in a ParserRejectedMarkup.)
raise ParserRejectedMarkup(message)
def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag.
@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
parser.feed(markup)
try:
parser.feed(markup)
except AssertionError as e:
# html.parser raises AssertionError in rare cases to
# indicate a fatal problem with the markup, especially
# when there's an error in the doctype declaration.
raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []

280
lib/bs4/css.py Normal file
View file

@ -0,0 +1,280 @@
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
class CSS(object):
"""A proxy object against the soupsieve library, to simplify its
CSS selector API.
Acquire this object through the .css attribute on the
BeautifulSoup object, or on the Tag you want to use as the
starting point for a CSS selector.
The main advantage of doing this is that the tag to be selected
against doesn't need to be explicitly specified in the function
calls, since it's already scoped to a tag.
"""
def __init__(self, tag, api=soupsieve):
"""Constructor.
You don't need to instantiate this class yourself; instead,
access the .css attribute on the BeautifulSoup object, or on
the Tag you want to use as the starting point for your CSS
selector.
:param tag: All CSS selectors will use this as their starting
point.
:param api: A plug-in replacement for the soupsieve module,
designed mainly for use in tests.
"""
if api is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
self.api = api
self.tag = tag
def escape(self, ident):
"""Escape a CSS identifier.
This is a simple wrapper around soupselect.escape(). See the
documentation for that function for more information.
"""
if soupsieve is None:
raise NotImplementedError(
"Cannot escape CSS identifiers because the soupsieve package is not installed."
)
return self.api.escape(ident)
def _ns(self, ns, select):
"""Normalize a dictionary of namespaces."""
if not isinstance(select, self.api.SoupSieve) and ns is None:
# If the selector is a precompiled pattern, it already has
# a namespace context compiled in, which cannot be
# replaced.
ns = self.tag._namespaces
return ns
def _rs(self, results):
"""Normalize a list of results to a Resultset.
A ResultSet is more consistent with the rest of Beautiful
Soup's API, and ResultSet.__getattr__ has a helpful error
message if you try to treat a list of results as a single
result (a common mistake).
"""
# Import here to avoid circular import
from bs4.element import ResultSet
return ResultSet(None, results)
def compile(self, select, namespaces=None, flags=0, **kwargs):
"""Pre-compile a selector and return the compiled object.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.compile() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.compile() method.
:return: A precompiled selector object.
:rtype: soupsieve.SoupSieve
"""
return self.api.compile(
select, self._ns(namespaces, select), flags, **kwargs
)
def select_one(self, select, namespaces=None, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag and return the
first result.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select_one()
method.
:param selector: A CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select_one() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select_one() method.
:return: A Tag, or None if the selector has no match.
:rtype: bs4.element.Tag
"""
return self.api.select_one(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.select()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.select() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
if limit is None:
limit = 0
return self._rs(
self.api.select(
select, self.tag, self._ns(namespaces, select), limit, flags,
**kwargs
)
)
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
"""Perform a CSS selection operation on the current Tag.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.iselect()
method. It is the same as select(), but it returns a generator
instead of a list.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param limit: After finding this number of results, stop looking.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.iselect() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.iselect() method.
:return: A generator
:rtype: types.GeneratorType
"""
return self.api.iselect(
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
)
def closest(self, select, namespaces=None, flags=0, **kwargs):
"""Find the Tag closest to this one that matches the given selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.closest()
method.
:param selector: A string containing a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.closest() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.closest() method.
:return: A Tag, or None if there is no match.
:rtype: bs4.Tag
"""
return self.api.closest(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def match(self, select, namespaces=None, flags=0, **kwargs):
"""Check whether this Tag matches the given CSS selector.
This uses the Soup Sieve library. For more information, see
that library's documentation for the soupsieve.match()
method.
:param: a CSS selector.
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.match() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.match() method.
:return: True if this Tag matches the selector; False otherwise.
:rtype: bool
"""
return self.api.match(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
def filter(self, select, namespaces=None, flags=0, **kwargs):
"""Filter this Tag's direct children based on the given CSS selector.
This uses the Soup Sieve library. It works the same way as
passing this Tag into that library's soupsieve.filter()
method. More information, for more information see the
documentation for soupsieve.filter().
:param namespaces: A dictionary mapping namespace prefixes
used in the CSS selector to namespace URIs. By default,
Beautiful Soup will pass in the prefixes it encountered while
parsing the document.
:param flags: Flags to be passed into Soup Sieve's
soupsieve.filter() method.
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.filter() method.
:return: A ResultSet of Tag objects.
:rtype: bs4.element.ResultSet
"""
return self._rs(
self.api.filter(
select, self.tag, self._ns(namespaces, select), flags, **kwargs
)
)

View file

@ -59,21 +59,6 @@ def diagnose(data):
if hasattr(data, 'read'):
data = data.read()
elif data.startswith("http:") or data.startswith("https:"):
print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
return
else:
try:
if os.path.exists(data):
print(('"%s" looks like a filename. Reading data from the file.' % data))
with open(data) as fp:
data = fp.read()
except ValueError:
# This can happen on some platforms when the 'filename' is
# too long. Assume it's data and not a filename.
pass
print("")
for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser))

View file

@ -8,14 +8,8 @@ except ImportError as e:
import re
import sys
import warnings
try:
import soupsieve
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from bs4.css import CSS
from bs4.formatter import (
Formatter,
HTMLFormatter,
@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
"string-escape",
"string_escape",
])
class NamespacedAttribute(str):
"""A namespaced string (e.g. 'xml:lang') that remembers the namespace
('xml') and the name ('lang') that were used to create it.
"""
def __new__(cls, prefix, name=None, namespace=None):
if not name:
# This is the default namespace. Its name "has no value"
@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
return match.group(1) + encoding
return self.CHARSET_RE.sub(rewrite, self.original_value)
class PageElement(object):
"""Contains the navigational information for some part of the page:
that is, its current location in the parse tree.
NavigableString, Tag, etc. are all subclasses of PageElement.
"""
# In general, we can't tell just by looking at an element whether
# it's contained in an XML document or an HTML document. But for
# Tags (q.v.) we can store this information at parse time.
known_xml = None
def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None):
"""Sets up the initial relations between this element and
@ -163,7 +162,7 @@ class PageElement(object):
:param previous_element: The element parsed immediately before
this one.
:param next_element: The element parsed immediately before
this one.
@ -257,11 +256,11 @@ class PageElement(object):
default = object()
def _all_strings(self, strip=False, types=default):
"""Yield all strings of certain classes, possibly stripping them.
This is implemented differently in Tag and NavigableString.
"""
raise NotImplementedError()
@property
def stripped_strings(self):
"""Yield all strings in this PageElement, stripping them first.
@ -294,11 +293,11 @@ class PageElement(object):
strip, types=types)])
getText = get_text
text = property(get_text)
def replace_with(self, *args):
"""Replace this PageElement with one or more PageElements, keeping the
"""Replace this PageElement with one or more PageElements, keeping the
rest of the tree the same.
:param args: One or more PageElements.
:return: `self`, no longer part of the tree.
"""
@ -410,7 +409,7 @@ class PageElement(object):
This works the same way as `list.insert`.
:param position: The numeric position that should be occupied
in `self.children` by the new PageElement.
in `self.children` by the new PageElement.
:param new_child: A PageElement.
"""
if new_child is None:
@ -546,7 +545,7 @@ class PageElement(object):
"Element has no parent, so 'after' has no meaning.")
if any(x is self for x in args):
raise ValueError("Can't insert an element after itself.")
offset = 0
for successor in args:
# Extract first so that the index won't be screwed up if they
@ -912,7 +911,7 @@ class PageElement(object):
:rtype: bool
"""
return getattr(self, '_decomposed', False) or False
# Old non-property versions of the generators, for backwards
# compatibility with BS3.
def nextGenerator(self):
@ -936,16 +935,11 @@ class NavigableString(str, PageElement):
When Beautiful Soup parses the markup <b>penguin</b>, it will
create a NavigableString for the string "penguin".
"""
"""
PREFIX = ''
SUFFIX = ''
# We can't tell just by looking at a string whether it's contained
# in an XML document or an HTML document.
known_xml = None
def __new__(cls, value):
"""Create a new NavigableString.
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
u.setup()
return u
def __copy__(self):
def __deepcopy__(self, memo, recursive=False):
"""A copy of a NavigableString has the same contents and class
as the original, but it is not connected to the parse tree.
:param recursive: This parameter is ignored; it's only defined
so that NavigableString.__deepcopy__ implements the same
signature as Tag.__deepcopy__.
"""
return type(self)(self)
def __copy__(self):
"""A copy of a NavigableString can only be a deep copy, because
only one PageElement can occupy a given place in a parse tree.
"""
return self.__deepcopy__({})
def __getnewargs__(self):
return (str(self),)
@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString):
as comments (the Comment class) and CDATA blocks (the CData
class).
"""
PREFIX = ''
SUFFIX = ''
def output_ready(self, formatter=None):
"""Make this string ready for output by adding any subclass-specific
prefix or suffix.
@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString):
"""
pass
class Script(NavigableString):
"""A NavigableString representing an executable script (probably
Javascript).
@ -1250,7 +1254,7 @@ class Tag(PageElement):
if ((not builder or builder.store_line_numbers)
and (sourceline is not None or sourcepos is not None)):
self.sourceline = sourceline
self.sourcepos = sourcepos
self.sourcepos = sourcepos
if attrs is None:
attrs = {}
elif attrs:
@ -1308,13 +1312,49 @@ class Tag(PageElement):
self.interesting_string_types = builder.string_containers[self.name]
else:
self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
parserClass = _alias("parser_class") # BS3
def __copy__(self):
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
def __deepcopy__(self, memo, recursive=True):
"""A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents.
"""
clone = self._clone()
if recursive:
# Clone this tag's descendants recursively, but without
# making any recursive function calls.
tag_stack = [clone]
for event, element in self._event_stream(self.descendants):
if event is Tag.END_ELEMENT_EVENT:
# Stop appending incoming Tags to the Tag that was
# just closed.
tag_stack.pop()
else:
descendant_clone = element.__deepcopy__(
memo, recursive=False
)
# Add to its parent's .contents
tag_stack[-1].append(descendant_clone)
if event is Tag.START_ELEMENT_EVENT:
# Add the Tag itself to the stack so that its
# children will be .appended to it.
tag_stack.append(descendant_clone)
return clone
def __copy__(self):
"""A copy of a Tag must always be a deep copy, because a Tag's
children can only have one parent at a time.
"""
return self.__deepcopy__({})
def _clone(self):
"""Create a new Tag just like this one, but with no
contents and unattached to any parse tree.
This is the first step in the deepcopy process.
"""
clone = type(self)(
None, self.builder, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
@ -1326,8 +1366,6 @@ class Tag(PageElement):
)
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
for child in self.contents:
clone.append(child.__copy__())
return clone
@property
@ -1433,7 +1471,7 @@ class Tag(PageElement):
i.contents = []
i._decomposed = True
i = n
def clear(self, decompose=False):
"""Wipe out all children of this PageElement by calling extract()
on them.
@ -1521,7 +1559,7 @@ class Tag(PageElement):
if not isinstance(value, list):
value = [value]
return value
def has_attr(self, key):
"""Does this PageElement have an attribute with the given name?"""
return key in self.attrs
@ -1608,7 +1646,7 @@ class Tag(PageElement):
def __repr__(self, encoding="unicode-escape"):
"""Renders this PageElement as a string.
:param encoding: The encoding to use (Python 2 only).
:param encoding: The encoding to use (Python 2 only).
TODO: This is now ignored and a warning should be issued
if a value is provided.
:return: A (Unicode) string.
@ -1650,106 +1688,212 @@ class Tag(PageElement):
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Render a Unicode representation of this PageElement and its
contents.
:param indent_level: Each line of the rendering will be
indented this many spaces. Used internally in
recursive calls while pretty-printing.
:param eventual_encoding: The tag is destined to be
encoded into this encoding. This method is _not_
responsible for performing that encoding. This information
is passed in so that it can be substituted in if the
document contains a <META> tag that mentions the document's
encoding.
:param formatter: A Formatter object, or a string naming one of
the standard formatters.
"""
formatter="minimal",
iterator=None):
pieces = []
# First off, turn a non-Formatter `formatter` into a Formatter
# object. This will stop the lookup from happening over and
# over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
attributes = formatter.attributes(self)
attrs = []
for key, val in attributes:
if val is None:
decoded = key
if indent_level is True:
indent_level = 0
# The currently active tag that put us into string literal
# mode. Until this element is closed, children will be treated
# as string literals and not pretty-printed. String literal
# mode is turned on immediately after this tag begins, and
# turned off immediately before it's closed. This means there
# will be whitespace before and after the tag itself.
string_literal_tag = None
for event, element in self._event_stream(iterator):
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
piece = element._format_tag(
eventual_encoding, formatter, opening=True
)
elif event is Tag.END_ELEMENT_EVENT:
piece = element._format_tag(
eventual_encoding, formatter, opening=False
)
if indent_level is not None:
indent_level -= 1
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
elif not isinstance(val, str):
val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None
):
val = val.encode(eventual_encoding)
piece = element.output_ready(formatter)
text = formatter.attribute_value(val)
decoded = (
str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
closeTag = ''
# Now we need to apply the 'prettiness' -- extra
# whitespace before and/or after this tag. This can get
# complicated because certain tags, like <pre> and
# <script>, can't be prettified, since adding whitespace would
# change the meaning of the content.
# The default behavior is to add whitespace before and
# after an element when string literal mode is off, and to
# leave things as they are when string literal mode is on.
if string_literal_tag:
indent_before = indent_after = False
else:
indent_before = indent_after = True
# The only time the behavior is more complex than that is
# when we encounter an opening or closing tag that might
# put us into or out of string literal mode.
if (event is Tag.START_ELEMENT_EVENT
and not string_literal_tag
and not element._should_pretty_print()):
# We are about to enter string literal mode. Add
# whitespace before this tag, but not after. We
# will stay in string literal mode until this tag
# is closed.
indent_before = True
indent_after = False
string_literal_tag = element
elif (event is Tag.END_ELEMENT_EVENT
and element is string_literal_tag):
# We are about to exit string literal mode by closing
# the tag that sent us into that mode. Add whitespace
# after this tag, but not before.
indent_before = False
indent_after = True
string_literal_tag = None
# Now we know whether to add whitespace before and/or
# after this element.
if indent_level is not None:
if (indent_before or indent_after):
if isinstance(element, NavigableString):
piece = piece.strip()
if piece:
piece = self._indent_string(
piece, indent_level, formatter,
indent_before, indent_after
)
if event == Tag.START_ELEMENT_EVENT:
indent_level += 1
pieces.append(piece)
return "".join(pieces)
# Names for the different events yielded by _event_stream
START_ELEMENT_EVENT = object()
END_ELEMENT_EVENT = object()
EMPTY_ELEMENT_EVENT = object()
STRING_ELEMENT_EVENT = object()
def _event_stream(self, iterator=None):
"""Yield a sequence of events that can be used to reconstruct the DOM
for this element.
This lets us recreate the nested structure of this element
(e.g. when formatting it as a string) without using recursive
method calls.
This is similar in concept to the SAX API, but it's a simpler
interface designed for internal use. The events are different
from SAX and the arguments associated with the events are Tags
and other Beautiful Soup objects.
:param iterator: An alternate iterator to use when traversing
the tree.
"""
tag_stack = []
iterator = iterator or self.self_and_descendants
for c in iterator:
# If the parent of the element we're about to yield is not
# the tag currently on the stack, it means that the tag on
# the stack closed before this element appeared.
while tag_stack and c.parent != tag_stack[-1]:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
if isinstance(c, Tag):
if c.is_empty_element:
yield Tag.EMPTY_ELEMENT_EVENT, c
else:
yield Tag.START_ELEMENT_EVENT, c
tag_stack.append(c)
continue
else:
yield Tag.STRING_ELEMENT_EVENT, c
while tag_stack:
now_closed_tag = tag_stack.pop()
yield Tag.END_ELEMENT_EVENT, now_closed_tag
def _indent_string(self, s, indent_level, formatter,
indent_before, indent_after):
"""Add indentation whitespace before and/or after a string.
:param s: The string to amend with whitespace.
:param indent_level: The indentation level; affects how much
whitespace goes before the string.
:param indent_before: Whether or not to add whitespace
before the string.
:param indent_after: Whether or not to add whitespace
(a newline) after the string.
"""
space_before = ''
if indent_before and indent_level:
space_before = (formatter.indent * indent_level)
space_after = ''
if indent_after:
space_after = "\n"
return space_before + s + space_after
def _format_tag(self, eventual_encoding, formatter, opening):
# A tag starts with the < character (see below).
# Then the / character, if this is a closing tag.
closing_slash = ''
if not opening:
closing_slash = '/'
# Then an optional namespace prefix.
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
if self.is_empty_element:
close = formatter.void_element_close_prefix or ''
else:
closeTag = '</%s%s>' % (prefix, self.name)
# Then a list of attribute values, if this is an opening tag.
attribute_string = ''
if opening:
attributes = formatter.attributes(self)
attrs = []
for key, val in attributes:
if val is None:
decoded = key
else:
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
elif not isinstance(val, str):
val = str(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None
):
val = val.encode(eventual_encoding)
pretty_print = self._should_pretty_print(indent_level)
space = ''
indent_space = ''
if indent_level is not None:
indent_space = (formatter.indent * (indent_level - 1))
if pretty_print:
space = indent_space
indent_contents = indent_level + 1
else:
indent_contents = None
contents = self.decode_contents(
indent_contents, eventual_encoding, formatter
)
if self.hidden:
# This is the 'document root' object.
s = contents
else:
s = []
attribute_string = ''
text = formatter.attribute_value(val)
decoded = (
str(key) + '='
+ formatter.quoted_attribute_value(text))
attrs.append(decoded)
if attrs:
attribute_string = ' ' + ' '.join(attrs)
if indent_level is not None:
# Even if this particular tag is not pretty-printed,
# we should indent up to the start of the tag.
s.append(indent_space)
s.append('<%s%s%s%s>' % (
prefix, self.name, attribute_string, close))
if pretty_print:
s.append("\n")
s.append(contents)
if pretty_print and contents and contents[-1] != "\n":
s.append("\n")
if pretty_print and closeTag:
s.append(space)
s.append(closeTag)
if indent_level is not None and closeTag and self.next_sibling:
# Even if this particular tag is not pretty-printed,
# we're now done with the tag, and we should add a
# newline if appropriate.
s.append("\n")
s = ''.join(s)
return s
def _should_pretty_print(self, indent_level):
# Then an optional closing slash (for a void element in an
# XML document).
void_element_closing_slash = ''
if self.is_empty_element:
void_element_closing_slash = formatter.void_element_close_prefix or ''
# Put it all together.
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
def _should_pretty_print(self, indent_level=1):
"""Should this tag be pretty-printed?
Most of them should, but some (such as <pre> in HTML
@ -1770,7 +1914,7 @@ class Tag(PageElement):
a Unicode string will be returned.
:param formatter: A Formatter object, or a string naming one of
the standard formatters.
:return: A Unicode string (if encoding==None) or a bytestring
:return: A Unicode string (if encoding==None) or a bytestring
(otherwise).
"""
if encoding is None:
@ -1800,33 +1944,9 @@ class Tag(PageElement):
the standard Formatters.
"""
# First off, turn a string formatter into a Formatter object. This
# will stop the lookup from happening over and over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
return self.decode(indent_level, eventual_encoding, formatter,
iterator=self.descendants)
pretty_print = (indent_level is not None)
s = []
for c in self:
text = None
if isinstance(c, NavigableString):
text = c.output_ready(formatter)
elif isinstance(c, Tag):
s.append(c.decode(indent_level, eventual_encoding,
formatter))
preserve_whitespace = (
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
)
if text and indent_level and not preserve_whitespace:
text = text.strip()
if text:
if pretty_print and not preserve_whitespace:
s.append(formatter.indent * (indent_level - 1))
s.append(text)
if pretty_print and not preserve_whitespace:
s.append("\n")
return ''.join(s)
def encode_contents(
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
@ -1922,6 +2042,18 @@ class Tag(PageElement):
# return iter() to make the purpose of the method clear
return iter(self.contents) # XXX This seems to be untested.
@property
def self_and_descendants(self):
"""Iterate over this PageElement and its children in a
breadth-first sequence.
:yield: A sequence of PageElements.
"""
if not self.hidden:
yield self
for i in self.descendants:
yield i
@property
def descendants(self):
"""Iterate over all children of this PageElement in a
@ -1948,16 +2080,13 @@ class Tag(PageElement):
Beautiful Soup will use the prefixes it encountered while
parsing the document.
:param kwargs: Keyword arguments to be passed into SoupSieve's
:param kwargs: Keyword arguments to be passed into Soup Sieve's
soupsieve.select() method.
:return: A Tag.
:rtype: bs4.element.Tag
"""
value = self.select(selector, namespaces, 1, **kwargs)
if value:
return value[0]
return None
return self.css.select_one(selector, namespaces, **kwargs)
def select(self, selector, namespaces=None, limit=None, **kwargs):
"""Perform a CSS selection operation on the current element.
@ -1973,27 +2102,18 @@ class Tag(PageElement):
:param limit: After finding this number of results, stop looking.
:param kwargs: Keyword arguments to be passed into SoupSieve's
:param kwargs: Keyword arguments to be passed into SoupSieve's
soupsieve.select() method.
:return: A ResultSet of Tags.
:rtype: bs4.element.ResultSet
"""
if namespaces is None:
namespaces = self._namespaces
if limit is None:
limit = 0
if soupsieve is None:
raise NotImplementedError(
"Cannot execute CSS selectors because the soupsieve package is not installed."
)
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
return self.css.select(selector, namespaces, limit, **kwargs)
# We do this because it's more consistent and because
# ResultSet.__getattr__ has a helpful error message.
return ResultSet(None, results)
@property
def css(self):
"""Return an interface to the CSS selector API."""
return CSS(self)
# Old names for backwards compatibility
def childGenerator(self):
@ -2038,7 +2158,7 @@ class SoupStrainer(object):
:param attrs: A dictionary of filters on attribute values.
:param string: A filter for a NavigableString with specific text.
:kwargs: A dictionary of filters on attribute values.
"""
"""
if string is None and 'text' in kwargs:
string = kwargs.pop('text')
warnings.warn(
@ -2137,7 +2257,7 @@ class SoupStrainer(object):
# looking at a tag with a different name.
if markup and not markup.prefix and self.name != markup.name:
return False
call_function_with_tag_data = (
isinstance(self.name, Callable)
and not isinstance(markup_name, Tag))
@ -2223,7 +2343,7 @@ class SoupStrainer(object):
if self._matches(' '.join(markup), match_against):
return True
return False
if match_against is True:
# True matches any non-None value.
return markup is not None
@ -2267,11 +2387,11 @@ class SoupStrainer(object):
return True
else:
return False
# Beyond this point we might need to run the test twice: once against
# the tag's name and once against its prefixed name.
match = False
if not match and isinstance(match_against, str):
# Exact string match
match = markup == match_against

View file

@ -97,7 +97,7 @@ class Formatter(EntitySubstitution):
else:
indent = ' '
self.indent = indent
def substitute(self, ns):
"""Process a string that needs to undergo entity substitution.
This may be a string encountered in an attribute value or as

View file

@ -297,37 +297,11 @@ class TreeBuilderSmokeTest(object):
markup, multi_valued_attributes=multi_valued_attributes
)
assert soup.a['class'] == ['a', 'b', 'c']
def test_fuzzed_input(self):
# This test centralizes in one place the various fuzz tests
# for Beautiful Soup created by the oss-fuzz project.
# These strings superficially resemble markup, but they
# generally can't be parsed into anything. The best we can
# hope for is that parsing these strings won't crash the
# parser.
#
# n.b. This markup is commented out because these fuzz tests
# _do_ crash the parser. However the crashes are due to bugs
# in html.parser, not Beautiful Soup -- otherwise I'd fix the
# bugs!
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://bugs.python.org/issue37747
#
#b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://bugs.python.org/issue34480
#
#b'<![n\x00'
]
for markup in bad_markup:
with warnings.catch_warnings(record=False):
soup = self.soup(markup)
def test_invalid_doctype(self):
markup = '<![if word]>content<![endif]>'
markup = '<!DOCTYPE html]ff>'
soup = self.soup(markup)
class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
@ -577,8 +551,8 @@ Hello, world!
"""Whitespace must be preserved in <pre> and <textarea> tags,
even if that would mean not prettifying the markup.
"""
pre_markup = "<pre> </pre>"
textarea_markup = "<textarea> woo\nwoo </textarea>"
pre_markup = "<pre>a z</pre>\n"
textarea_markup = "<textarea> woo\nwoo </textarea>\n"
self.assert_soup(pre_markup)
self.assert_soup(textarea_markup)
@ -589,7 +563,7 @@ Hello, world!
assert soup.textarea.prettify() == textarea_markup
soup = self.soup("<textarea></textarea>")
assert soup.textarea.prettify() == "<textarea></textarea>"
assert soup.textarea.prettify() == "<textarea></textarea>\n"
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""

View file

@ -0,0 +1 @@
˙<!DOCTyPEV PUBLIC'''Đ'

View file

@ -0,0 +1 @@
)<a><math><TR><a><mI><a><p><a>

View file

@ -0,0 +1 @@
-<math><sElect><mi><sElect><sElect>

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
ñ<table><svg><html>

487
lib/bs4/tests/test_css.py Normal file
View file

@ -0,0 +1,487 @@
import pytest
import types
from unittest.mock import MagicMock
from bs4 import (
CSS,
BeautifulSoup,
ResultSet,
)
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
results = self.soup.select(selector, **kwargs)
assert isinstance(results, ResultSet)
el_ids = [el['id'] for el in results]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_precompiled(self):
sel = self.soup.css.compile('div')
els = self.soup.select(sel)
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one(sel)
assert 'main' == el['id']
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
def test_closest(self):
inner = self.soup.find("div", id="inner")
closest = inner.css.closest("div[id=main]")
assert closest == self.soup.find("div", id="main")
def test_match(self):
inner = self.soup.find("div", id="inner")
main = self.soup.find("div", id="main")
assert inner.css.match("div[id=main]") == False
assert main.css.match("div[id=main]") == True
def test_iselect(self):
gen = self.soup.css.iselect("h2")
assert isinstance(gen, types.GeneratorType)
[header2, header3] = gen
assert header2['id'] == 'header2'
assert header3['id'] == 'header3'
def test_filter(self):
inner = self.soup.find("div", id="inner")
results = inner.css.filter("h2")
assert len(inner.css.filter("h2")) == 2
results = inner.css.filter("h2[id=header3]")
assert isinstance(results, ResultSet)
[result] = results
assert result['id'] == 'header3'
def test_escape(self):
m = self.soup.css.escape
assert m(".foo#bar") == '\\.foo\\#bar'
assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
assert m(".foo") == self.soup.css.escape(".foo")

View file

@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
@pytest.mark.parametrize(
"indent,expect",
[
(None, '<a>\n<b>\ntext\n</b>\n</a>'),
(-1, '<a>\n<b>\ntext\n</b>\n</a>'),
(0, '<a>\n<b>\ntext\n</b>\n</a>'),
("", '<a>\n<b>\ntext\n</b>\n</a>'),
(None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
(0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
(1, '<a>\n <b>\n text\n </b>\n</a>'),
(2, '<a>\n <b>\n text\n </b>\n</a>'),
(1, '<a>\n <b>\n text\n </b>\n</a>\n'),
(2, '<a>\n <b>\n text\n </b>\n</a>\n'),
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
# Some invalid inputs -- the default behavior is used.
(object(), '<a>\n <b>\n text\n </b>\n</a>'),
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>'),
(object(), '<a>\n <b>\n text\n </b>\n</a>\n'),
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'),
]
)
def test_indent(self, indent, expect):

View file

@ -0,0 +1,91 @@
"""This file contains test cases reported by third parties using
fuzzing tools, primarily from Google's oss-fuzz project. Some of these
represent real problems with Beautiful Soup, but many are problems in
libraries that Beautiful Soup depends on, and many of the test cases
represent different ways of triggering the same problem.
Grouping these test cases together makes it easy to see which test
cases represent the same problem, and puts the test cases in close
proximity to code that can trigger the problems.
"""
import os
import pytest
from bs4 import (
BeautifulSoup,
ParserRejectedMarkup,
)
class TestFuzz(object):
# Test case markup files from fuzzers are given this extension so
# they can be included in builds.
TESTCASE_SUFFIX = ".testcase"
# This class of error has been fixed by catching a less helpful
# exception from html.parser and raising ParserRejectedMarkup
# instead.
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
]
)
def test_rejected_markup(self, filename):
markup = self.__markup(filename)
with pytest.raises(ParserRejectedMarkup):
BeautifulSoup(markup, 'html.parser')
# This class of error has to do with very deeply nested documents
# which overflow the Python call stack when the tree is converted
# to a string. This is an issue with Beautiful Soup which was fixed
# as part of [bug=1471755].
@pytest.mark.parametrize(
"filename", [
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
"clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
]
)
def test_deeply_nested_document(self, filename):
# Parsing the document and encoding it back to a string is
# sufficient to demonstrate that the overflow problem has
# been fixed.
markup = self.__markup(filename)
BeautifulSoup(markup, 'html.parser').encode()
# This class of error represents problems with html5lib's parser,
# not Beautiful Soup. I use
# https://github.com/html5lib/html5lib-python/issues/568 to notify
# the html5lib developers of these issues.
@pytest.mark.skip("html5lib problems")
@pytest.mark.parametrize(
"filename", [
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
"clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320",
# b')<a><math><TR><a><mI><a><p><a>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456",
# b'-<math><sElect><mi><sElect><sElect>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
# b'ñ<table><svg><html>'
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
# <TABLE>, some ^@ characters, some <math> tags.
"clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744",
# Nested table
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
]
)
def test_html5lib_parse_errors(self, filename):
markup = self.__markup(filename)
print(BeautifulSoup(markup, 'html5lib').encode())
def __markup(self, filename):
if not filename.endswith(self.TESTCASE_SUFFIX):
filename += self.TESTCASE_SUFFIX
this_dir = os.path.split(__file__)[0]
path = os.path.join(this_dir, 'fuzz', filename)
return open(path, 'rb').read()

View file

@ -3,9 +3,11 @@ trees."""
from pdb import set_trace
import pickle
import pytest
import warnings
from bs4.builder import (
HTMLParserTreeBuilder,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
)
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
@ -15,6 +17,28 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
default_builder = HTMLParserTreeBuilder
def test_rejected_input(self):
# Python's html.parser will occasionally reject markup,
# especially when there is a problem with the initial DOCTYPE
# declaration. Different versions of Python sound the alarm in
# different ways, but Beautiful Soup consistently raises
# errors as ParserRejectedMarkup exceptions.
bad_markup = [
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
# https://github.com/python/cpython/issues/81928
b'\n<![\xff\xfe\xfe\xcd\x00',
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
# https://github.com/python/cpython/issues/78661
#
b'<![n\x00',
b"<![UNKNOWN[]]>",
]
for markup in bad_markup:
with pytest.raises(ParserRejectedMarkup):
soup = self.soup(markup)
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass

View file

@ -189,13 +189,15 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
assert soup.find('prefix:tag3').name == 'tag3'
assert soup.subtag.find('prefix:tag3').name == 'tag3'
def test_pickle_removes_builder(self):
# The lxml TreeBuilder is not picklable, so it won't be
# preserved in a pickle/unpickle operation.
def test_pickle_restores_builder(self):
# The lxml TreeBuilder is not picklable, so when unpickling
# a document created with it, a new TreeBuilder of the
# appropriate class is created.
soup = self.soup("<a>some markup</a>")
assert isinstance(soup.builder, self.default_builder)
pickled = pickle.dumps(soup)
unpickled = pickle.loads(pickled)
assert "some markup" == unpickled.a.string
assert unpickled.builder is None
assert unpickled.builder != soup.builder
assert isinstance(unpickled.builder, self.default_builder)

View file

@ -2,20 +2,18 @@
import copy
import pickle
import pytest
import sys
from bs4 import BeautifulSoup
from bs4.element import (
Comment,
ResultSet,
SoupStrainer,
)
from . import (
SoupTest,
SOUP_SIEVE_PRESENT,
)
if SOUP_SIEVE_PRESENT:
from soupsieve import SelectorSyntaxError
class TestEncoding(SoupTest):
"""Test the ability to encode objects into strings."""
@ -51,10 +49,21 @@ class TestEncoding(SoupTest):
assert "\N{SNOWMAN}".encode("utf8") == soup.b.encode_contents(
encoding="utf8"
)
def test_encode_deeply_nested_document(self):
# This test verifies that encoding a string doesn't involve
# any recursive function calls. If it did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
encoded = soup.encode()
assert limit == encoded.count(b"<span>")
def test_deprecated_renderContents(self):
html = "<b>\N{SNOWMAN}</b>"
soup = self.soup(html)
soup.renderContents()
assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
def test_repr(self):
@ -159,7 +168,31 @@ class TestFormatters(SoupTest):
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
# Everything outside the <pre> tag is reformatted, but everything
# inside is left alone.
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
def test_prettify_handles_nested_string_literal_tags(self):
# Most of this markup is inside a <pre> tag, so prettify()
# only does three things to it:
# 1. Add a newline and a space between the <div> and the <pre>
# 2. Add a newline after the </pre>
# 3. Add a newline at the end.
#
# The contents of the <pre> tag are left completely alone. In
# particular, we don't start adding whitespace again once we
# encounter the first </pre> tag, because we know it's not
# the one that put us into string literal mode.
markup = """<div><pre><code>some
<script><pre>code</pre></script> for you
</code></pre></div>"""
expect = """<div>
<pre><code>some
<script><pre>code</pre></script> for you
</code></pre>
</div>
"""
soup = self.soup(markup)
assert expect == soup.div.prettify()
def test_prettify_accepts_formatter_function(self):
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
@ -216,429 +249,6 @@ class TestFormatters(SoupTest):
assert soup.contents[0].name == 'pre'
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
class TestCSSSelectors(SoupTest):
"""Test basic CSS selector functionality.
This functionality is implemented in soupsieve, which has a much
more comprehensive test suite, so this is basically an extra check
that soupsieve works as expected.
"""
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>The title</title>
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
</head>
<body>
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
<div id="main" class="fancy">
<div id="inner">
<h1 id="header1">An H1</h1>
<p>Some text</p>
<p class="onep" id="p1">Some more text</p>
<h2 id="header2">An H2</h2>
<p class="class1 class2 class3" id="pmulti">Another</p>
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
<h2 id="header3">Another H2</h2>
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
<span class="s1">
<a href="#" id="s1a1">span1a1</a>
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
<span class="span2">
<a href="#" id="s2a1">span2a1</a>
</span>
<span class="span3"></span>
<custom-dashed-tag class="dashed" id="dash2"/>
<div data-tag="dashedvalue" id="data1"/>
</span>
</div>
<x id="xid">
<z id="zida"/>
<z id="zidab"/>
<z id="zidac"/>
</x>
<y id="yid">
<z id="zidb"/>
</y>
<p lang="en" id="lang-en">English</p>
<p lang="en-gb" id="lang-en-gb">English UK</p>
<p lang="en-us" id="lang-en-us">English US</p>
<p lang="fr" id="lang-fr">French</p>
</div>
<div id="footer">
</div>
"""
def setup_method(self):
self.soup = BeautifulSoup(self.HTML, 'html.parser')
def assert_selects(self, selector, expected_ids, **kwargs):
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
el_ids.sort()
expected_ids.sort()
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
assertSelect = assert_selects
def assert_select_multiple(self, *tests):
for selector, expected_ids in tests:
self.assert_selects(selector, expected_ids)
def test_one_tag_one(self):
els = self.soup.select('title')
assert len(els) == 1
assert els[0].name == 'title'
assert els[0].contents == ['The title']
def test_one_tag_many(self):
els = self.soup.select('div')
assert len(els) == 4
for div in els:
assert div.name == 'div'
el = self.soup.select_one('div')
assert 'main' == el['id']
def test_select_one_returns_none_if_no_match(self):
match = self.soup.select_one('nonexistenttag')
assert None == match
def test_tag_in_tag_one(self):
els = self.soup.select('div div')
self.assert_selects('div div', ['inner', 'data1'])
def test_tag_in_tag_many(self):
for selector in ('html div', 'html body div', 'body div'):
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
def test_limit(self):
self.assert_selects('html div', ['main'], limit=1)
self.assert_selects('html body div', ['inner', 'main'], limit=2)
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
limit=10)
def test_tag_no_match(self):
assert len(self.soup.select('del')) == 0
def test_invalid_tag(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('tag%t')
def test_select_dashed_tag_ids(self):
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
def test_select_dashed_by_id(self):
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
assert dashed[0].name == 'custom-dashed-tag'
assert dashed[0]['id'] == 'dash2'
def test_dashed_tag_text(self):
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
def test_select_dashed_matches_find_all(self):
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
def test_header_tags(self):
self.assert_select_multiple(
('h1', ['header1']),
('h2', ['header2', 'header3']),
)
def test_class_one(self):
for selector in ('.onep', 'p.onep', 'html p.onep'):
els = self.soup.select(selector)
assert len(els) == 1
assert els[0].name == 'p'
assert els[0]['class'] == ['onep']
def test_class_mismatched_tag(self):
els = self.soup.select('div.onep')
assert len(els) == 0
def test_one_id(self):
for selector in ('div#inner', '#inner', 'div div#inner'):
self.assert_selects(selector, ['inner'])
def test_bad_id(self):
els = self.soup.select('#doesnotexist')
assert len(els) == 0
def test_items_in_id(self):
els = self.soup.select('div#inner p')
assert len(els) == 3
for el in els:
assert el.name == 'p'
assert els[1]['class'] == ['onep']
assert not els[0].has_attr('class')
def test_a_bunch_of_emptys(self):
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
assert len(self.soup.select(selector)) == 0
def test_multi_class_support(self):
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assert_selects(selector, ['pmulti'])
def test_multi_class_selection(self):
for selector in ('.class1.class3', '.class3.class2',
'.class1.class2.class3'):
self.assert_selects(selector, ['pmulti'])
def test_child_selector(self):
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
self.assert_selects('.s1 > a span', ['s1a2s1'])
def test_child_selector_id(self):
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
def test_attribute_equals(self):
self.assert_select_multiple(
('p[class="onep"]', ['p1']),
('p[id="p1"]', ['p1']),
('[class="onep"]', ['p1']),
('[id="p1"]', ['p1']),
('link[rel="stylesheet"]', ['l1']),
('link[type="text/css"]', ['l1']),
('link[href="blah.css"]', ['l1']),
('link[href="no-blah.css"]', []),
('[rel="stylesheet"]', ['l1']),
('[type="text/css"]', ['l1']),
('[href="blah.css"]', ['l1']),
('[href="no-blah.css"]', []),
('p[href="no-blah.css"]', []),
('[href="no-blah.css"]', []),
)
def test_attribute_tilde(self):
self.assert_select_multiple(
('p[class~="class1"]', ['pmulti']),
('p[class~="class2"]', ['pmulti']),
('p[class~="class3"]', ['pmulti']),
('[class~="class1"]', ['pmulti']),
('[class~="class2"]', ['pmulti']),
('[class~="class3"]', ['pmulti']),
('a[rel~="friend"]', ['bob']),
('a[rel~="met"]', ['bob']),
('[rel~="friend"]', ['bob']),
('[rel~="met"]', ['bob']),
)
def test_attribute_startswith(self):
self.assert_select_multiple(
('[rel^="style"]', ['l1']),
('link[rel^="style"]', ['l1']),
('notlink[rel^="notstyle"]', []),
('[rel^="notstyle"]', []),
('link[rel^="notstyle"]', []),
('link[href^="bla"]', ['l1']),
('a[href^="http://"]', ['bob', 'me']),
('[href^="http://"]', ['bob', 'me']),
('[id^="p"]', ['pmulti', 'p1']),
('[id^="m"]', ['me', 'main']),
('div[id^="m"]', ['main']),
('a[id^="m"]', ['me']),
('div[data-tag^="dashed"]', ['data1'])
)
def test_attribute_endswith(self):
self.assert_select_multiple(
('[href$=".css"]', ['l1']),
('link[href$=".css"]', ['l1']),
('link[id$="1"]', ['l1']),
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
('div[id$="1"]', ['data1']),
('[id$="noending"]', []),
)
def test_attribute_contains(self):
self.assert_select_multiple(
# From test_attribute_startswith
('[rel*="style"]', ['l1']),
('link[rel*="style"]', ['l1']),
('notlink[rel*="notstyle"]', []),
('[rel*="notstyle"]', []),
('link[rel*="notstyle"]', []),
('link[href*="bla"]', ['l1']),
('[href*="http://"]', ['bob', 'me']),
('[id*="p"]', ['pmulti', 'p1']),
('div[id*="m"]', ['main']),
('a[id*="m"]', ['me']),
# From test_attribute_endswith
('[href*=".css"]', ['l1']),
('link[href*=".css"]', ['l1']),
('link[id*="1"]', ['l1']),
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
('div[id*="1"]', ['data1']),
('[id*="noending"]', []),
# New for this test
('[href*="."]', ['bob', 'me', 'l1']),
('a[href*="."]', ['bob', 'me']),
('link[href*="."]', ['l1']),
('div[id*="n"]', ['main', 'inner']),
('div[id*="nn"]', ['inner']),
('div[data-tag*="edval"]', ['data1'])
)
def test_attribute_exact_or_hypen(self):
self.assert_select_multiple(
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
('p[lang|="fr"]', ['lang-fr']),
('p[lang|="gb"]', []),
)
def test_attribute_exists(self):
self.assert_select_multiple(
('[rel]', ['l1', 'bob', 'me']),
('link[rel]', ['l1']),
('a[rel]', ['bob', 'me']),
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
('p[class]', ['p1', 'pmulti']),
('[blah]', []),
('p[blah]', []),
('div[data-tag]', ['data1'])
)
def test_quoted_space_in_selector_name(self):
html = """<div style="display: wrong">nope</div>
<div style="display: right">yes</div>
"""
soup = BeautifulSoup(html, 'html.parser')
[chosen] = soup.select('div[style="display: right"]')
assert "yes" == chosen.string
def test_unsupported_pseudoclass(self):
with pytest.raises(NotImplementedError):
self.soup.select("a:no-such-pseudoclass")
with pytest.raises(SelectorSyntaxError):
self.soup.select("a:nth-of-type(a)")
def test_nth_of_type(self):
# Try to select first paragraph
els = self.soup.select('div#inner p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
# Try to select third paragraph
els = self.soup.select('div#inner p:nth-of-type(3)')
assert len(els) == 1
assert els[0].string == 'Another'
# Try to select (non-existent!) fourth paragraph
els = self.soup.select('div#inner p:nth-of-type(4)')
assert len(els) == 0
# Zero will select no tags.
els = self.soup.select('div p:nth-of-type(0)')
assert len(els) == 0
def test_nth_of_type_direct_descendant(self):
els = self.soup.select('div#inner > p:nth-of-type(1)')
assert len(els) == 1
assert els[0].string == 'Some text'
def test_id_child_selector_nth_of_type(self):
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
def test_select_on_element(self):
# Other tests operate on the tree; this operates on an element
# within the tree.
inner = self.soup.find("div", id="main")
selected = inner.select("div")
# The <div id="inner"> tag was selected. The <div id="footer">
# tag was not.
self.assert_selects_ids(selected, ['inner', 'data1'])
def test_overspecified_child_id(self):
self.assert_selects(".fancy #inner", ['inner'])
self.assert_selects(".normal #inner", [])
def test_adjacent_sibling_selector(self):
self.assert_selects('#p1 + h2', ['header2'])
self.assert_selects('#p1 + h2 + p', ['pmulti'])
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
assert [] == self.soup.select('#p1 + p')
def test_general_sibling_selector(self):
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
self.assert_selects('#p1 ~ #header2', ['header2'])
self.assert_selects('#p1 ~ h2 + a', ['me'])
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
assert [] == self.soup.select('#inner ~ h2')
def test_dangling_combinator(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select('h1 >')
def test_sibling_combinator_wont_select_same_tag_twice(self):
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
# Test the selector grouping operator (the comma)
def test_multiple_select(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_with_no_space(self):
self.assert_selects('x,y', ['xid', 'yid'])
def test_multiple_select_with_more_space(self):
self.assert_selects('x, y', ['xid', 'yid'])
def test_multiple_select_duplicated(self):
self.assert_selects('x, x', ['xid'])
def test_multiple_select_sibling(self):
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
def test_multiple_select_tag_and_direct_descendant(self):
self.assert_selects('x, y > z', ['xid', 'zidb'])
def test_multiple_select_direct_descendant_and_tags(self):
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_multiple_select_indirect_descendant(self):
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
def test_invalid_multiple_select(self):
with pytest.raises(SelectorSyntaxError):
self.soup.select(',x, y')
with pytest.raises(SelectorSyntaxError):
self.soup.select('x,,y')
def test_multiple_select_attrs(self):
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
def test_multiple_select_ids(self):
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
def test_multiple_select_nested(self):
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
def test_select_duplicate_elements(self):
# When markup contains duplicate elements, a multiple select
# will find all of them.
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
soup = BeautifulSoup(markup, 'html.parser')
selected = soup.select(".c1, .c2")
assert 3 == len(selected)
# Verify that find_all finds the same elements, though because
# of an implementation detail it finds them in a different
# order.
for element in soup.find_all(class_=['c1', 'c2']):
assert element in selected
class TestPersistence(SoupTest):
"Testing features like pickle and deepcopy."
@ -668,12 +278,24 @@ class TestPersistence(SoupTest):
loaded = pickle.loads(dumped)
assert loaded.__class__ == BeautifulSoup
assert loaded.decode() == self.tree.decode()
def test_deepcopy_identity(self):
# Making a deepcopy of a tree yields an identical tree.
copied = copy.deepcopy(self.tree)
assert copied.decode() == self.tree.decode()
def test_copy_deeply_nested_document(self):
# This test verifies that copy and deepcopy don't involve any
# recursive function calls. If they did, this test would
# overflow the Python interpreter stack.
limit = sys.getrecursionlimit() + 1
markup = "<span>" * limit
soup = self.soup(markup)
copied = copy.copy(soup)
copied = copy.deepcopy(soup)
def test_copy_preserves_encoding(self):
soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
encoding = soup.original_encoding

View file

@ -24,6 +24,7 @@ from bs4.builder import (
from bs4.element import (
Comment,
SoupStrainer,
PYTHON_SPECIFIC_ENCODINGS,
Tag,
NavigableString,
)
@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
assert [] == soup.string_container_stack
class TestOutput(SoupTest):
@pytest.mark.parametrize(
"eventual_encoding,actual_encoding", [
("utf-8", "utf-8"),
("utf-16", "utf-16"),
]
)
def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
# Most of the time, calling decode() on an XML document will
# give you a document declaration that mentions the encoding
# you intend to use when encoding the document as a
# bytestring.
soup = self.soup("<tag></tag>")
soup.is_xml = True
assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
@pytest.mark.parametrize(
"eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
)
def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
# But if you pass a Python internal encoding into decode(), or
# omit the eventual_encoding altogether, the document
# declaration won't mention any particular encoding.
soup = BeautifulSoup("<tag></tag>", "html.parser")
soup.is_xml = True
assert (f'<?xml version="1.0"?>\n<tag></tag>'
== soup.decode(eventual_encoding=eventual_encoding))
def test(self):
# BeautifulSoup subclasses Tag and extends the decode() method.
# Make sure the other Tag methods which call decode() call
# it correctly.
soup = self.soup("<tag></tag>")
assert b"<tag></tag>" == soup.encode(encoding="utf-8")
assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
assert "<tag></tag>" == soup.decode_contents()
assert "<tag>\n</tag>\n" == soup.prettify()
class TestWarnings(SoupTest):
# Note that some of the tests in this class create BeautifulSoup
# objects directly rather than using self.soup(). That's

View file

@ -6,7 +6,7 @@ __title__ = "packaging"
__summary__ = "Core utilities for Python packages"
__uri__ = "https://github.com/pypa/packaging"
__version__ = "23.0"
__version__ = "23.1"
__author__ = "Donald Stufft and individual contributors"
__email__ = "donald@stufft.io"

View file

@ -14,6 +14,8 @@ EF_ARM_ABI_VER5 = 0x05000000
EF_ARM_ABI_FLOAT_HARD = 0x00000400
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
# as the type for `path` until then.
@contextlib.contextmanager
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
try:

View file

@ -163,7 +163,11 @@ def _parse_extras(tokenizer: Tokenizer) -> List[str]:
if not tokenizer.check("LEFT_BRACKET", peek=True):
return []
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"):
with tokenizer.enclosing_tokens(
"LEFT_BRACKET",
"RIGHT_BRACKET",
around="extras",
):
tokenizer.consume("WS")
extras = _parse_extras_list(tokenizer)
tokenizer.consume("WS")
@ -203,7 +207,11 @@ def _parse_specifier(tokenizer: Tokenizer) -> str:
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
| WS? version_many WS?
"""
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="version specifier",
):
tokenizer.consume("WS")
parsed_specifiers = _parse_version_many(tokenizer)
tokenizer.consume("WS")
@ -217,7 +225,20 @@ def _parse_version_many(tokenizer: Tokenizer) -> str:
"""
parsed_specifiers = ""
while tokenizer.check("SPECIFIER"):
span_start = tokenizer.position
parsed_specifiers += tokenizer.read().text
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
tokenizer.raise_syntax_error(
".* suffix can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position + 1,
)
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
tokenizer.raise_syntax_error(
"Local version label can only be used with `==` or `!=` operators",
span_start=span_start,
span_end=tokenizer.position,
)
tokenizer.consume("WS")
if not tokenizer.check("COMMA"):
break
@ -254,7 +275,11 @@ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
tokenizer.consume("WS")
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
with tokenizer.enclosing_tokens(
"LEFT_PARENTHESIS",
"RIGHT_PARENTHESIS",
around="marker expression",
):
tokenizer.consume("WS")
marker: MarkerAtom = _parse_marker(tokenizer)
tokenizer.consume("WS")

View file

@ -78,6 +78,8 @@ DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
"AT": r"\@",
"URL": r"[^ \t]+",
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
"VERSION_PREFIX_TRAIL": r"\.\*",
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
"WS": r"[ \t]+",
"END": r"$",
}
@ -167,21 +169,23 @@ class Tokenizer:
)
@contextlib.contextmanager
def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]:
def enclosing_tokens(
self, open_token: str, close_token: str, *, around: str
) -> Iterator[None]:
if self.check(open_token):
open_position = self.position
self.read()
else:
open_position = None
yield open_position is not None
yield
if open_position is None:
return
if not self.check(close_token):
self.raise_syntax_error(
f"Expected closing {close_token}",
f"Expected matching {close_token} for {open_token}, after {around}",
span_start=open_position,
)

View file

@ -8,7 +8,14 @@ import platform
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker
from ._parser import (
MarkerAtom,
MarkerList,
Op,
Value,
Variable,
parse_marker as _parse_marker,
)
from ._tokenizer import ParserSyntaxError
from .specifiers import InvalidSpecifier, Specifier
from .utils import canonicalize_name
@ -189,7 +196,7 @@ class Marker:
# packaging.requirements.Requirement. If any additional logic is
# added here, make sure to mirror/adapt Requirement.
try:
self._markers = _normalize_extra_values(parse_marker(marker))
self._markers = _normalize_extra_values(_parse_marker(marker))
# The attribute `_markers` can be described in terms of a recursive type:
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
#

408
lib/packaging/metadata.py Normal file
View file

@ -0,0 +1,408 @@
import email.feedparser
import email.header
import email.message
import email.parser
import email.policy
import sys
import typing
from typing import Dict, List, Optional, Tuple, Union, cast
if sys.version_info >= (3, 8): # pragma: no cover
from typing import TypedDict
else: # pragma: no cover
if typing.TYPE_CHECKING:
from typing_extensions import TypedDict
else:
try:
from typing_extensions import TypedDict
except ImportError:
class TypedDict:
def __init_subclass__(*_args, **_kwargs):
pass
# The RawMetadata class attempts to make as few assumptions about the underlying
# serialization formats as possible. The idea is that as long as a serialization
# formats offer some very basic primitives in *some* way then we can support
# serializing to and from that format.
class RawMetadata(TypedDict, total=False):
"""A dictionary of raw core metadata.
Each field in core metadata maps to a key of this dictionary (when data is
provided). The key is lower-case and underscores are used instead of dashes
compared to the equivalent core metadata field. Any core metadata field that
can be specified multiple times or can hold multiple values in a single
field have a key with a plural name.
Core metadata fields that can be specified multiple times are stored as a
list or dict depending on which is appropriate for the field. Any fields
which hold multiple values in a single field are stored as a list.
"""
# Metadata 1.0 - PEP 241
metadata_version: str
name: str
version: str
platforms: List[str]
summary: str
description: str
keywords: List[str]
home_page: str
author: str
author_email: str
license: str
# Metadata 1.1 - PEP 314
supported_platforms: List[str]
download_url: str
classifiers: List[str]
requires: List[str]
provides: List[str]
obsoletes: List[str]
# Metadata 1.2 - PEP 345
maintainer: str
maintainer_email: str
requires_dist: List[str]
provides_dist: List[str]
obsoletes_dist: List[str]
requires_python: str
requires_external: List[str]
project_urls: Dict[str, str]
# Metadata 2.0
# PEP 426 attempted to completely revamp the metadata format
# but got stuck without ever being able to build consensus on
# it and ultimately ended up withdrawn.
#
# However, a number of tools had started emiting METADATA with
# `2.0` Metadata-Version, so for historical reasons, this version
# was skipped.
# Metadata 2.1 - PEP 566
description_content_type: str
provides_extra: List[str]
# Metadata 2.2 - PEP 643
dynamic: List[str]
# Metadata 2.3 - PEP 685
# No new fields were added in PEP 685, just some edge case were
# tightened up to provide better interoptability.
_STRING_FIELDS = {
"author",
"author_email",
"description",
"description_content_type",
"download_url",
"home_page",
"license",
"maintainer",
"maintainer_email",
"metadata_version",
"name",
"requires_python",
"summary",
"version",
}
_LIST_STRING_FIELDS = {
"classifiers",
"dynamic",
"obsoletes",
"obsoletes_dist",
"platforms",
"provides",
"provides_dist",
"provides_extra",
"requires",
"requires_dist",
"requires_external",
"supported_platforms",
}
def _parse_keywords(data: str) -> List[str]:
"""Split a string of comma-separate keyboards into a list of keywords."""
return [k.strip() for k in data.split(",")]
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
"""Parse a list of label/URL string pairings separated by a comma."""
urls = {}
for pair in data:
# Our logic is slightly tricky here as we want to try and do
# *something* reasonable with malformed data.
#
# The main thing that we have to worry about, is data that does
# not have a ',' at all to split the label from the Value. There
# isn't a singular right answer here, and we will fail validation
# later on (if the caller is validating) so it doesn't *really*
# matter, but since the missing value has to be an empty str
# and our return value is dict[str, str], if we let the key
# be the missing value, then they'd have multiple '' values that
# overwrite each other in a accumulating dict.
#
# The other potentional issue is that it's possible to have the
# same label multiple times in the metadata, with no solid "right"
# answer with what to do in that case. As such, we'll do the only
# thing we can, which is treat the field as unparseable and add it
# to our list of unparsed fields.
parts = [p.strip() for p in pair.split(",", 1)]
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
# TODO: The spec doesn't say anything about if the keys should be
# considered case sensitive or not... logically they should
# be case-preserving and case-insensitive, but doing that
# would open up more cases where we might have duplicate
# entries.
label, url = parts
if label in urls:
# The label already exists in our set of urls, so this field
# is unparseable, and we can just add the whole thing to our
# unparseable data and stop processing it.
raise KeyError("duplicate labels in project urls")
urls[label] = url
return urls
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
"""Get the body of the message."""
# If our source is a str, then our caller has managed encodings for us,
# and we don't need to deal with it.
if isinstance(source, str):
payload: str = msg.get_payload()
return payload
# If our source is a bytes, then we're managing the encoding and we need
# to deal with it.
else:
bpayload: bytes = msg.get_payload(decode=True)
try:
return bpayload.decode("utf8", "strict")
except UnicodeDecodeError:
raise ValueError("payload in an invalid encoding")
# The various parse_FORMAT functions here are intended to be as lenient as
# possible in their parsing, while still returning a correctly typed
# RawMetadata.
#
# To aid in this, we also generally want to do as little touching of the
# data as possible, except where there are possibly some historic holdovers
# that make valid data awkward to work with.
#
# While this is a lower level, intermediate format than our ``Metadata``
# class, some light touch ups can make a massive difference in usability.
# Map METADATA fields to RawMetadata.
_EMAIL_TO_RAW_MAPPING = {
"author": "author",
"author-email": "author_email",
"classifier": "classifiers",
"description": "description",
"description-content-type": "description_content_type",
"download-url": "download_url",
"dynamic": "dynamic",
"home-page": "home_page",
"keywords": "keywords",
"license": "license",
"maintainer": "maintainer",
"maintainer-email": "maintainer_email",
"metadata-version": "metadata_version",
"name": "name",
"obsoletes": "obsoletes",
"obsoletes-dist": "obsoletes_dist",
"platform": "platforms",
"project-url": "project_urls",
"provides": "provides",
"provides-dist": "provides_dist",
"provides-extra": "provides_extra",
"requires": "requires",
"requires-dist": "requires_dist",
"requires-external": "requires_external",
"requires-python": "requires_python",
"summary": "summary",
"supported-platform": "supported_platforms",
"version": "version",
}
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
"""Parse a distribution's metadata.
This function returns a two-item tuple of dicts. The first dict is of
recognized fields from the core metadata specification. Fields that can be
parsed and translated into Python's built-in types are converted
appropriately. All other fields are left as-is. Fields that are allowed to
appear multiple times are stored as lists.
The second dict contains all other fields from the metadata. This includes
any unrecognized fields. It also includes any fields which are expected to
be parsed into a built-in type but were not formatted appropriately. Finally,
any fields that are expected to appear only once but are repeated are
included in this dict.
"""
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
unparsed: Dict[str, List[str]] = {}
if isinstance(data, str):
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
else:
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
# We have to wrap parsed.keys() in a set, because in the case of multiple
# values for a key (a list), the key will appear multiple times in the
# list of keys, but we're avoiding that by using get_all().
for name in frozenset(parsed.keys()):
# Header names in RFC are case insensitive, so we'll normalize to all
# lower case to make comparisons easier.
name = name.lower()
# We use get_all() here, even for fields that aren't multiple use,
# because otherwise someone could have e.g. two Name fields, and we
# would just silently ignore it rather than doing something about it.
headers = parsed.get_all(name)
# The way the email module works when parsing bytes is that it
# unconditionally decodes the bytes as ascii using the surrogateescape
# handler. When you pull that data back out (such as with get_all() ),
# it looks to see if the str has any surrogate escapes, and if it does
# it wraps it in a Header object instead of returning the string.
#
# As such, we'll look for those Header objects, and fix up the encoding.
value = []
# Flag if we have run into any issues processing the headers, thus
# signalling that the data belongs in 'unparsed'.
valid_encoding = True
for h in headers:
# It's unclear if this can return more types than just a Header or
# a str, so we'll just assert here to make sure.
assert isinstance(h, (email.header.Header, str))
# If it's a header object, we need to do our little dance to get
# the real data out of it. In cases where there is invalid data
# we're going to end up with mojibake, but there's no obvious, good
# way around that without reimplementing parts of the Header object
# ourselves.
#
# That should be fine since, if mojibacked happens, this key is
# going into the unparsed dict anyways.
if isinstance(h, email.header.Header):
# The Header object stores it's data as chunks, and each chunk
# can be independently encoded, so we'll need to check each
# of them.
chunks: List[Tuple[bytes, Optional[str]]] = []
for bin, encoding in email.header.decode_header(h):
try:
bin.decode("utf8", "strict")
except UnicodeDecodeError:
# Enable mojibake.
encoding = "latin1"
valid_encoding = False
else:
encoding = "utf8"
chunks.append((bin, encoding))
# Turn our chunks back into a Header object, then let that
# Header object do the right thing to turn them into a
# string for us.
value.append(str(email.header.make_header(chunks)))
# This is already a string, so just add it.
else:
value.append(h)
# We've processed all of our values to get them into a list of str,
# but we may have mojibake data, in which case this is an unparsed
# field.
if not valid_encoding:
unparsed[name] = value
continue
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
if raw_name is None:
# This is a bit of a weird situation, we've encountered a key that
# we don't know what it means, so we don't know whether it's meant
# to be a list or not.
#
# Since we can't really tell one way or another, we'll just leave it
# as a list, even though it may be a single item list, because that's
# what makes the most sense for email headers.
unparsed[name] = value
continue
# If this is one of our string fields, then we'll check to see if our
# value is a list of a single item. If it is then we'll assume that
# it was emitted as a single string, and unwrap the str from inside
# the list.
#
# If it's any other kind of data, then we haven't the faintest clue
# what we should parse it as, and we have to just add it to our list
# of unparsed stuff.
if raw_name in _STRING_FIELDS and len(value) == 1:
raw[raw_name] = value[0]
# If this is one of our list of string fields, then we can just assign
# the value, since email *only* has strings, and our get_all() call
# above ensures that this is a list.
elif raw_name in _LIST_STRING_FIELDS:
raw[raw_name] = value
# Special Case: Keywords
# The keywords field is implemented in the metadata spec as a str,
# but it conceptually is a list of strings, and is serialized using
# ", ".join(keywords), so we'll do some light data massaging to turn
# this into what it logically is.
elif raw_name == "keywords" and len(value) == 1:
raw[raw_name] = _parse_keywords(value[0])
# Special Case: Project-URL
# The project urls is implemented in the metadata spec as a list of
# specially-formatted strings that represent a key and a value, which
# is fundamentally a mapping, however the email format doesn't support
# mappings in a sane way, so it was crammed into a list of strings
# instead.
#
# We will do a little light data massaging to turn this into a map as
# it logically should be.
elif raw_name == "project_urls":
try:
raw[raw_name] = _parse_project_urls(value)
except KeyError:
unparsed[name] = value
# Nothing that we've done has managed to parse this, so it'll just
# throw it in our unparseable data and move on.
else:
unparsed[name] = value
# We need to support getting the Description from the message payload in
# addition to getting it from the the headers. This does mean, though, there
# is the possibility of it being set both ways, in which case we put both
# in 'unparsed' since we don't know which is right.
try:
payload = _get_payload(parsed, data)
except ValueError:
unparsed.setdefault("description", []).append(
parsed.get_payload(decode=isinstance(data, bytes))
)
else:
if payload:
# Check to see if we've already got a description, if so then both
# it, and this body move to unparseable.
if "description" in raw:
description_header = cast(str, raw.pop("description"))
unparsed.setdefault("description", []).extend(
[description_header, payload]
)
elif "description" in unparsed:
unparsed["description"].append(payload)
else:
raw["description"] = payload
# We need to cast our `raw` to a metadata, because a TypedDict only support
# literal key names, but we're computing our key names on purpose, but the
# way this function is implemented, our `TypedDict` can only have valid key
# names.
return cast(RawMetadata, raw), unparsed

View file

@ -5,7 +5,7 @@
import urllib.parse
from typing import Any, List, Optional, Set
from ._parser import parse_requirement
from ._parser import parse_requirement as _parse_requirement
from ._tokenizer import ParserSyntaxError
from .markers import Marker, _normalize_extra_values
from .specifiers import SpecifierSet
@ -32,7 +32,7 @@ class Requirement:
def __init__(self, requirement_string: str) -> None:
try:
parsed = parse_requirement(requirement_string)
parsed = _parse_requirement(requirement_string)
except ParserSyntaxError as e:
raise InvalidRequirement(str(e)) from e

View file

@ -252,7 +252,8 @@ class Specifier(BaseSpecifier):
# Store whether or not this Specifier should accept prereleases
self._prereleases = prereleases
@property
# https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
@property # type: ignore[override]
def prereleases(self) -> bool:
# If there is an explicit prereleases set for this, then we'll just
# blindly use that.
@ -398,7 +399,9 @@ class Specifier(BaseSpecifier):
# We need special logic to handle prefix matching
if spec.endswith(".*"):
# In the case of prefix matching we want to ignore local segment.
normalized_prospective = canonicalize_version(prospective.public)
normalized_prospective = canonicalize_version(
prospective.public, strip_trailing_zero=False
)
# Get the normalized version string ignoring the trailing .*
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
# Split the spec out by dots, and pretend that there is an implicit

View file

@ -111,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
value = sysconfig.get_config_var(name)
value: Union[int, str, None] = sysconfig.get_config_var(name)
if value is None and warn:
logger.debug(
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
@ -120,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
def _normalize_string(string: str) -> str:
return string.replace(".", "_").replace("-", "_")
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
def _abi3_applies(python_version: PythonVersion) -> bool:

View file

@ -10,7 +10,7 @@
import collections
import itertools
import re
from typing import Callable, Optional, SupportsInt, Tuple, Union
from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
@ -63,7 +63,7 @@ class InvalidVersion(ValueError):
class _BaseVersion:
_key: CmpKey
_key: Tuple[Any, ...]
def __hash__(self) -> int:
return hash(self._key)
@ -179,6 +179,7 @@ class Version(_BaseVersion):
"""
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
_key: CmpKey
def __init__(self, version: str) -> None:
"""Initialize a Version object.

View file

@ -22,8 +22,8 @@ from pytz.tzfile import build_tzinfo
# The IANA (nee Olson) database is updated several times a year.
OLSON_VERSION = '2022g'
VERSION = '2022.7.1' # pip compatible version number.
OLSON_VERSION = '2023c'
VERSION = '2023.3' # pip compatible version number.
__version__ = VERSION
OLSEN_VERSION = OLSON_VERSION # Old releases had this misspelling
@ -1311,7 +1311,6 @@ common_timezones = \
'America/Whitehorse',
'America/Winnipeg',
'America/Yakutat',
'America/Yellowknife',
'Antarctica/Casey',
'Antarctica/Davis',
'Antarctica/DumontDUrville',

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -238,7 +238,7 @@ SY Syria
SZ Eswatini (Swaziland)
TC Turks & Caicos Is
TD Chad
TF French Southern Territories
TF French S. Terr.
TG Togo
TH Thailand
TJ Tajikistan

View file

@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this.
# This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file.
#Expires 2023 Jun 28 00:00:00
#Expires 2023 Dec 28 00:00:00
# POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC)
#expires 1687910400 (2023-06-28 00:00:00 UTC)
#expires 1703721600 (2023-12-28 00:00:00 UTC)
# Updated through IERS Bulletin C64
# File expires on: 28 June 2023
# Updated through IERS Bulletin C65
# File expires on: 28 December 2023

View file

@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
R K 2014 o - Jun 26 24 0 -
R K 2014 o - Jul 31 24 1 S
R K 2014 o - S lastTh 24 0 -
R K 2023 ma - Ap lastF 0 1 S
R K 2023 ma - O lastTh 24 0 -
Z Africa/Cairo 2:5:9 - LMT 1900 O
2 K EE%sT
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
R M 2022 o - Mar 27 3 -1 -
R M 2022 o - May 8 2 0 -
R M 2023 o - Mar 19 3 -1 -
R M 2023 o - Ap 30 2 0 -
R M 2023 o - Ap 23 2 0 -
R M 2024 o - Mar 10 3 -1 -
R M 2024 o - Ap 14 2 0 -
R M 2025 o - F 23 3 -1 -
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
R M 2029 o - D 30 3 -1 -
R M 2030 o - F 10 2 0 -
R M 2030 o - D 22 3 -1 -
R M 2031 o - F 2 2 0 -
R M 2031 o - Ja 26 2 0 -
R M 2031 o - D 14 3 -1 -
R M 2032 o - Ja 18 2 0 -
R M 2032 o - N 28 3 -1 -
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
R M 2037 o - O 4 3 -1 -
R M 2037 o - N 15 2 0 -
R M 2038 o - S 26 3 -1 -
R M 2038 o - N 7 2 0 -
R M 2038 o - O 31 2 0 -
R M 2039 o - S 18 3 -1 -
R M 2039 o - O 23 2 0 -
R M 2040 o - S 2 3 -1 -
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
R M 2045 o - Jul 9 3 -1 -
R M 2045 o - Au 20 2 0 -
R M 2046 o - Jul 1 3 -1 -
R M 2046 o - Au 12 2 0 -
R M 2046 o - Au 5 2 0 -
R M 2047 o - Jun 23 3 -1 -
R M 2047 o - Jul 28 2 0 -
R M 2048 o - Jun 7 3 -1 -
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
R M 2053 o - Ap 13 3 -1 -
R M 2053 o - May 25 2 0 -
R M 2054 o - Ap 5 3 -1 -
R M 2054 o - May 17 2 0 -
R M 2054 o - May 10 2 0 -
R M 2055 o - Mar 28 3 -1 -
R M 2055 o - May 2 2 0 -
R M 2056 o - Mar 12 3 -1 -
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
R M 2061 o - Ja 16 3 -1 -
R M 2061 o - F 27 2 0 -
R M 2062 o - Ja 8 3 -1 -
R M 2062 o - F 19 2 0 -
R M 2062 o - F 12 2 0 -
R M 2062 o - D 31 3 -1 -
R M 2063 o - F 4 2 0 -
R M 2063 o - D 16 3 -1 -
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
R M 2068 o - O 21 3 -1 -
R M 2068 o - D 2 2 0 -
R M 2069 o - O 13 3 -1 -
R M 2069 o - N 24 2 0 -
R M 2069 o - N 17 2 0 -
R M 2070 o - O 5 3 -1 -
R M 2070 o - N 9 2 0 -
R M 2071 o - S 20 3 -1 -
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
R M 2076 o - Jul 26 3 -1 -
R M 2076 o - S 6 2 0 -
R M 2077 o - Jul 18 3 -1 -
R M 2077 o - Au 29 2 0 -
R M 2077 o - Au 22 2 0 -
R M 2078 o - Jul 10 3 -1 -
R M 2078 o - Au 14 2 0 -
R M 2079 o - Jun 25 3 -1 -
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
R M 2081 o - Jun 1 3 -1 -
R M 2081 o - Jul 13 2 0 -
R M 2082 o - May 24 3 -1 -
R M 2082 o - Jul 5 2 0 -
R M 2082 o - Jun 28 2 0 -
R M 2083 o - May 16 3 -1 -
R M 2083 o - Jun 20 2 0 -
R M 2084 o - Ap 30 3 -1 -
R M 2084 o - Jun 11 2 0 -
R M 2085 o - Ap 22 3 -1 -
R M 2085 o - Jun 3 2 0 -
R M 2085 o - May 27 2 0 -
R M 2086 o - Ap 14 3 -1 -
R M 2086 o - May 19 2 0 -
R M 2087 o - Mar 30 3 -1 -
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
R P 2020 o - O 24 1 0 -
R P 2021 o - O 29 1 0 -
R P 2022 o - Mar 27 0 1 S
R P 2022 ma - O Sa<=30 2 0 -
R P 2023 ma - Mar Sa<=30 2 1 S
R P 2022 2035 - O Sa<=30 2 0 -
R P 2023 o - Ap 29 2 1 S
R P 2024 o - Ap 13 2 1 S
R P 2025 o - Ap 5 2 1 S
R P 2026 2054 - Mar Sa<=30 2 1 S
R P 2036 o - O 18 2 0 -
R P 2037 o - O 10 2 0 -
R P 2038 o - S 25 2 0 -
R P 2039 o - S 17 2 0 -
R P 2039 o - O 22 2 1 S
R P 2039 2067 - O Sa<=30 2 0 -
R P 2040 o - S 1 2 0 -
R P 2040 o - O 13 2 1 S
R P 2041 o - Au 24 2 0 -
R P 2041 o - S 28 2 1 S
R P 2042 o - Au 16 2 0 -
R P 2042 o - S 20 2 1 S
R P 2043 o - Au 1 2 0 -
R P 2043 o - S 12 2 1 S
R P 2044 o - Jul 23 2 0 -
R P 2044 o - Au 27 2 1 S
R P 2045 o - Jul 15 2 0 -
R P 2045 o - Au 19 2 1 S
R P 2046 o - Jun 30 2 0 -
R P 2046 o - Au 11 2 1 S
R P 2047 o - Jun 22 2 0 -
R P 2047 o - Jul 27 2 1 S
R P 2048 o - Jun 6 2 0 -
R P 2048 o - Jul 18 2 1 S
R P 2049 o - May 29 2 0 -
R P 2049 o - Jul 3 2 1 S
R P 2050 o - May 21 2 0 -
R P 2050 o - Jun 25 2 1 S
R P 2051 o - May 6 2 0 -
R P 2051 o - Jun 17 2 1 S
R P 2052 o - Ap 27 2 0 -
R P 2052 o - Jun 1 2 1 S
R P 2053 o - Ap 12 2 0 -
R P 2053 o - May 24 2 1 S
R P 2054 o - Ap 4 2 0 -
R P 2054 o - May 16 2 1 S
R P 2055 o - May 1 2 1 S
R P 2056 o - Ap 22 2 1 S
R P 2057 o - Ap 7 2 1 S
R P 2058 ma - Mar Sa<=30 2 1 S
R P 2068 o - O 20 2 0 -
R P 2069 o - O 12 2 0 -
R P 2070 o - O 4 2 0 -
R P 2071 o - S 19 2 0 -
R P 2072 o - S 10 2 0 -
R P 2072 o - O 15 2 1 S
R P 2073 o - S 2 2 0 -
R P 2073 o - O 7 2 1 S
R P 2074 o - Au 18 2 0 -
R P 2074 o - S 29 2 1 S
R P 2075 o - Au 10 2 0 -
R P 2075 o - S 14 2 1 S
R P 2075 ma - O Sa<=30 2 0 -
R P 2076 o - Jul 25 2 0 -
R P 2076 o - S 5 2 1 S
R P 2077 o - Jul 17 2 0 -
R P 2077 o - Au 28 2 1 S
R P 2078 o - Jul 9 2 0 -
R P 2078 o - Au 13 2 1 S
R P 2079 o - Jun 24 2 0 -
R P 2079 o - Au 5 2 1 S
R P 2080 o - Jun 15 2 0 -
R P 2080 o - Jul 20 2 1 S
R P 2081 o - Jun 7 2 0 -
R P 2081 o - Jul 12 2 1 S
R P 2082 o - May 23 2 0 -
R P 2082 o - Jul 4 2 1 S
R P 2083 o - May 15 2 0 -
R P 2083 o - Jun 19 2 1 S
R P 2084 o - Ap 29 2 0 -
R P 2084 o - Jun 10 2 1 S
R P 2085 o - Ap 21 2 0 -
R P 2085 o - Jun 2 2 1 S
R P 2086 o - Ap 13 2 0 -
R P 2086 o - May 18 2 1 S
Z Asia/Gaza 2:17:52 - LMT 1900 O
2 Z EET/EEST 1948 May 15
2 K EE%sT 1967 Jun 5
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
-1 E -01/+00
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
-3 - -03 1980 Ap 6 2
-3 E -03/-02 2023 Mar 25 22
-2 - -02
-3 E -03/-02 2023 O 29 1u
-2 E -02/-01
Z America/Thule -4:35:8 - LMT 1916 Jul 28
-4 Th A%sT
Z Europe/Tallinn 1:39 - LMT 1880
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
3 - +03 1930 Jun 21
4 - +04 1961 N 11
4 R +04/+05 1988 Mar 27 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03 2018 O 28 2s
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK 2018 O 28 2s
4 - +04 2020 D 27 2s
3 - +03
3 - MSK
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1988 Mar 27 2s
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1989 Mar 26 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 - +04 1935 Ja 27
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
-5 - EST 2000 N 5
-6 - CST 2001 Ap 1 3
-7 C M%sT
Z America/Yellowknife 0 - -00 1935
-7 Y M%sT 1980
-7 C M%sT
Z America/Inuvik 0 - -00 1953
-8 Y P%sT 1979 Ap lastSu 2
-7 Y M%sT 1980
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
L America/Tijuana America/Santa_Isabel
L America/Denver America/Shiprock
L America/Toronto America/Thunder_Bay
L America/Edmonton America/Yellowknife
L Pacific/Auckland Antarctica/South_Pole
L Asia/Shanghai Asia/Chongqing
L Asia/Shanghai Asia/Harbin

View file

@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +4906-11631 America/Creston MST - BC (Creston)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
CH +4723+00832 Europe/Zurich
CI +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CM +0403+00942 Africa/Douala
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CW +1211-06900 America/Curacao
CX -1025+10543 Indian/Christmas
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ +5005+01426 Europe/Prague
DE +5230+01322 Europe/Berlin Germany (most areas)
DE +5230+01322 Europe/Berlin most of Germany
DE +4742+00841 Europe/Busingen Busingen
DJ +1136+04309 Africa/Djibouti
DK +5540+01235 Europe/Copenhagen
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
GG +492717-0023210 Europe/Guernsey
GH +0533-00013 Africa/Accra
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KW +2920+04759 Asia/Kuwait
KY +1918-08123 America/Cayman
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
ME +4226+01916 Europe/Podgorica
MF +1804-06305 America/Marigot
MG -1855+04731 Indian/Antananarivo
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas)
MH +0709+17112 Pacific/Majuro most of Marshall Islands
MH +0905+16720 Pacific/Kwajalein Kwajalein
MK +4159+02126 Europe/Skopje
ML +1239-00800 Africa/Bamako
MM +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
MO +221150+1133230 Asia/Macau
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
NP +2743+08519 Asia/Kathmandu
NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ -3652+17446 Pacific/Auckland New Zealand (most areas)
NZ -3652+17446 Pacific/Auckland most of New Zealand
NZ -4357-17633 Pacific/Chatham Chatham Islands
OM +2336+05835 Asia/Muscat
PA +0858-07932 America/Panama
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
PF -2308-13457 Pacific/Gambier Gambier Islands
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas)
PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
PG -0613+15534 Pacific/Bougainville Bougainville
PH +1435+12100 Asia/Manila
PK +2452+06703 Asia/Karachi
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
RW -0157+03004 Africa/Kigali
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
TV -0831+17913 Pacific/Funafuti
TW +2503+12130 Asia/Taipei
TZ -0648+03917 Africa/Dar_es_Salaam
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
UG +0019+03225 Africa/Kampala
UM +2813-17722 Pacific/Midway Midway Islands
UM +1917+16637 Pacific/Wake Wake Island
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo)
US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)

View file

@ -18,7 +18,10 @@
# Please see the theory.html file for how these names are chosen.
# If multiple timezones overlap a country, each has a row in the
# table, with each column 1 containing the country code.
# 4. Comments; present if and only if a country has multiple timezones.
# 4. Comments; present if and only if countries have multiple timezones,
# and useful only for those countries. For example, the comments
# for the row with countries CH,DE,LI and name Europe/Zurich
# are useful only for DE, since CH and LI have no other timezones.
#
# If a timezone covers multiple countries, the most-populous city is used,
# and that country is listed first in column 1; any other countries
@ -34,7 +37,7 @@
#country-
#codes coordinates TZ comments
AD +4230+00131 Europe/Andorra
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
AF +3431+06912 Asia/Kabul
AL +4120+01950 Europe/Tirane
AM +4011+04430 Asia/Yerevan
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
AQ -6734-06808 Antarctica/Rothera Rothera
AQ -720041+0023206 Antarctica/Troll Troll
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway
AS,UM -1416-17042 Pacific/Pago_Pago Midway
AT +4813+01620 Europe/Vienna
AU -3133+15905 Australia/Lord_Howe Lord Howe Island
AU -5430+15857 Antarctica/Macquarie Macquarie Island
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
CA +4606-06447 America/Moncton Atlantic - New Brunswick
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
CA +6043-13503 America/Whitehorse MST - Yukon (east)
CA +6404-13925 America/Dawson MST - Yukon (west)
CA +4916-12307 America/Vancouver Pacific - BC (most areas)
CH,DE,LI +4723+00832 Europe/Zurich Swiss time
CH,DE,LI +4723+00832 Europe/Zurich Büsingen
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CN +3114+12128 Asia/Shanghai Beijing Time
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
CR +0956-08405 America/Costa_Rica
CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ,SK +5005+01426 Europe/Prague
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
DO +1828-06954 America/Santo_Domingo
DZ +3647+00303 Africa/Algiers
EC -0210-07950 America/Guayaquil Ecuador (mainland)
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
GE +4143+04449 Asia/Tbilisi
GF +0456-05220 America/Cayenne
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
JP +353916+1394441 Asia/Tokyo
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
KG +4254+07436 Asia/Bishkek
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
KI -0247-17143 Pacific/Kanton Phoenix Islands
KI +0152-15720 Pacific/Kiritimati Line Islands
KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
MD +4700+02850 Europe/Chisinau
MH +0905+16720 Pacific/Kwajalein Kwajalein
MM,CC +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
MO +221150+1133230 Asia/Macau
MQ +1436-06105 America/Martinique
MT +3554+01431 Europe/Malta
MU -2010+05730 Indian/Mauritius
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I
MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
MX +1924-09909 America/Mexico_City Central Mexico
MX +2105-08646 America/Cancun Quintana Roo
MX +2058-08937 America/Merida Campeche, Yucatán
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
MX +2904-11058 America/Hermosillo Sonora
MX +3232-11701 America/Tijuana Baja California
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
NA -2234+01706 Africa/Windhoek
NC -2216+16627 Pacific/Noumea
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
NZ -4357-17633 Pacific/Chatham Chatham Islands
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
SD +1536+03232 Africa/Khartoum
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia
SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
SR +0550-05510 America/Paramaribo
SS +0451+03137 Africa/Juba
ST +0020+00644 Africa/Sao_Tome
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
SY +3330+03618 Asia/Damascus
TC +2128-07108 America/Grand_Turk
TD +1207+01503 Africa/Ndjamena
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas)
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
TJ +3835+06848 Asia/Dushanbe
TK -0922-17114 Pacific/Fakaofo
TL -0833+12535 Asia/Dili
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
TO -210800-1751200 Pacific/Tongatapu
TR +4101+02858 Europe/Istanbul
TW +2503+12130 Asia/Taipei
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
US +404251-0740023 America/New_York Eastern (most areas)
US +421953-0830245 America/Detroit Eastern - MI (most areas)
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC
US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US,UM +211825-1575130 Pacific/Honolulu Hawaii
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
VE +1030-06656 America/Caracas
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south)
VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
VU -1740+16825 Pacific/Efate
WS -1350-17144 Pacific/Apia
ZA,LS,SZ -2615+02800 Africa/Johannesburg

View file

@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
"""
from __future__ import absolute_import
__version__ = '3.18.3'
__version__ = '3.19.1'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
@ -149,28 +149,10 @@ def _import_c_make_encoder():
except ImportError:
return None
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
use_decimal=True,
namedtuple_as_object=True,
tuple_as_array=True,
iterable_as_array=False,
bigint_as_string=False,
item_sort_key=None,
for_json=False,
ignore_nan=False,
int_as_string_bitcount=None,
)
_default_encoder = JSONEncoder()
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
contain non-ASCII characters, so long as they do not need to be escaped
by JSON. When it is true, all non-ASCII characters are escaped.
If *allow_nan* is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the original JSON specification, instead of using
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
If *allow_nan* is true (default: ``False``), then out of range ``float``
values (``nan``, ``inf``, ``-inf``) will be serialized to
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If *indent* is a string, then JSON array elements and object members
@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
allow_nan=False, cls=None, indent=None, separators=None,
encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True,
bigint_as_string=False, sort_keys=False, item_sort_key=None,
@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If *allow_nan* is true (default: ``False``), then out of range ``float``
values (``nan``, ``inf``, ``-inf``) will be serialized to
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
instead of raising a ValueError. See
*ignore_nan* for ECMA-262 compliant behavior.
If ``indent`` is a string, then JSON array elements and object members
will be pretty-printed with a newline followed by that string repeated
@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
check_circular and not allow_nan and
cls is None and indent is None and separators is None and
encoding == 'utf-8' and default is None and use_decimal
and namedtuple_as_object and tuple_as_array and not iterable_as_array
@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
**kw).encode(obj)
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
_default_decoder = JSONDecoder()
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
**kw):
use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document as `str` or `bytes`) to a Python object.
@ -442,23 +423,27 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
takes priority.
*parse_float*, if specified, will be called with the string of every
JSON float to be decoded. By default, this is equivalent to
JSON float to be decoded. By default, this is equivalent to
``float(num_str)``. This can be used to use another datatype or parser
for JSON floats (e.g. :class:`decimal.Decimal`).
*parse_int*, if specified, will be called with the string of every
JSON int to be decoded. By default, this is equivalent to
JSON int to be decoded. By default, this is equivalent to
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
encoding=encoding, cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
use_decimal=use_decimal, **kw)
use_decimal=use_decimal, allow_nan=allow_nan, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None,
use_decimal=False, **kw):
use_decimal=False, allow_nan=False, **kw):
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
document) to a Python object.
@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
and enable the use of the deprecated *parse_constant*.
If *use_decimal* is true (default: ``False``) then it implies
parse_float=decimal.Decimal for parity with ``dump``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
of subclassing whenever possible.
@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None
and not use_decimal and not kw):
and not use_decimal and not allow_nan and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
if parse_float is not None:
raise TypeError("use_decimal=True implies parse_float=Decimal")
kw['parse_float'] = Decimal
if allow_nan:
kw['allow_nan'] = True
return cls(encoding=encoding, **kw).decode(s)
@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
scan.make_scanner = scan.py_make_scanner
dec.make_scanner = scan.make_scanner
global _default_decoder
_default_decoder = JSONDecoder(
encoding=None,
object_hook=None,
object_pairs_hook=None,
)
_default_decoder = JSONDecoder()
global _default_encoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
encoding='utf-8',
default=None,
)
_default_encoder = JSONEncoder()
def simple_first(kv):
"""Helper function to pass to item_sort_key to sort simple

View file

@ -46,9 +46,35 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8"
if hasattr(sys, 'get_int_max_str_digits'):
bounded_int = int
else:
def bounded_int(s, INT_MAX_STR_DIGITS=4300):
"""Backport of the integer string length conversion limitation
https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
"""
if len(s) > INT_MAX_STR_DIGITS:
raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
return int(s)
def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
"""Scan a four digit hex number from s[end:end + 4]
"""
msg = "Invalid \\uXXXX escape sequence"
esc = s[end:end + 4]
if not _m(esc):
raise JSONDecodeError(msg, s, end - 2)
try:
return int(esc, 16), end + 4
except ValueError:
raise JSONDecodeError(msg, s, end - 2)
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
_PY3=PY3, _maxunicode=sys.maxunicode):
_PY3=PY3, _maxunicode=sys.maxunicode,
_scan_four_digit_hex=scan_four_digit_hex):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
if chunk is None:
raise JSONDecodeError(
"Unterminated string starting at", s, begin)
prev_end = end
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
elif terminator != '\\':
if strict:
msg = "Invalid control character %r at"
raise JSONDecodeError(msg, s, end)
raise JSONDecodeError(msg, s, prev_end)
else:
_append(terminator)
continue
@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
end += 1
else:
# Unicode escape sequence
msg = "Invalid \\uXXXX escape sequence"
esc = s[end + 1:end + 5]
escX = esc[1:2]
if len(esc) != 4 or escX == 'x' or escX == 'X':
raise JSONDecodeError(msg, s, end - 1)
try:
uni = int(esc, 16)
except ValueError:
raise JSONDecodeError(msg, s, end - 1)
if uni < 0 or uni > _maxunicode:
raise JSONDecodeError(msg, s, end - 1)
end += 5
uni, end = _scan_four_digit_hex(s, end + 1)
# Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
esc2 = s[end + 2:end + 6]
escX = esc2[1:2]
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
try:
uni2 = int(esc2, 16)
except ValueError:
raise JSONDecodeError(msg, s, end)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end += 6
uni2, end2 = _scan_four_digit_hex(s, end + 2)
if uni2 & 0xfc00 == 0xdc00:
uni = 0x10000 + (((uni - 0xd800) << 10) |
(uni2 - 0xdc00))
end = end2
char = unichr(uni)
# Append the unescaped character
_append(char)
@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
return pairs, end + 1
elif nextchar != '"':
raise JSONDecodeError(
"Expecting property name enclosed in double quotes",
"Expecting property name enclosed in double quotes or '}'",
s, end)
end += 1
while True:
@ -296,14 +306,15 @@ class JSONDecoder(object):
| null | None |
+---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
When allow_nan=True, it also understands
``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
def __init__(self, encoding=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
object_pairs_hook=None, allow_nan=False):
"""
*encoding* determines the encoding used to interpret any
:class:`str` objects decoded by this instance (``'utf-8'`` by
@ -336,10 +347,13 @@ class JSONDecoder(object):
``int(num_str)``. This can be used to use another datatype or parser
for JSON integers (e.g. :class:`float`).
*parse_constant*, if specified, will be called with one of the
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
can be used to raise an exception if invalid JSON numbers are
encountered.
*allow_nan*, if True (default false), will allow the parser to
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
*parse_constant*, if specified, will be
called with one of the following strings: ``'-Infinity'``,
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
as it is rare to parse non-compliant JSON containing these values.
*strict* controls the parser's behavior when it encounters an
invalid control character in a string. The default setting of
@ -353,8 +367,8 @@ class JSONDecoder(object):
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.parse_int = parse_int or bounded_int
self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
self.strict = strict
self.parse_object = JSONObject
self.parse_array = JSONArray

View file

@ -5,7 +5,7 @@ import re
from operator import itemgetter
# Do not import Decimal directly to avoid reload issues
import decimal
from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
from .compat import binary_type, text_type, string_types, integer_types, PY3
def _import_speedups():
try:
from . import _speedups
@ -140,7 +140,7 @@ class JSONEncoder(object):
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
check_circular=True, allow_nan=False, sort_keys=False,
indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True,
tuple_as_array=True, bigint_as_string=False,
@ -161,10 +161,11 @@ class JSONEncoder(object):
prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place.
If allow_nan is true, then NaN, Infinity, and -Infinity will be
encoded as such. This behavior is not JSON specification compliant,
but is consistent with most JavaScript based encoders and decoders.
Otherwise, it will be a ValueError to encode such floats.
If allow_nan is true (default: False), then out of range float
values (nan, inf, -inf) will be serialized to
their JavaScript equivalents (NaN, Infinity, -Infinity)
instead of raising a ValueError. See
ignore_nan for ECMA-262 compliant behavior.
If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure
@ -294,7 +295,7 @@ class JSONEncoder(object):
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
chunks = self.iterencode(o)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
if self.ensure_ascii:
@ -302,7 +303,7 @@ class JSONEncoder(object):
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False):
def iterencode(self, o):
"""Encode the given object and yield each string
representation as available.
@ -356,8 +357,7 @@ class JSONEncoder(object):
key_memo = {}
int_as_string_bitcount = (
53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
if (c_make_encoder is not None and self.indent is None):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
@ -370,7 +370,7 @@ class JSONEncoder(object):
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.skipkeys, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
int_as_string_bitcount,
self.item_sort_key, self.encoding, self.for_json,
@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
def encode(self, o):
# Override JSONEncoder.encode because it has hacks for
# performance that make things more complicated.
chunks = self.iterencode(o, True)
chunks = self.iterencode(o)
if self.ensure_ascii:
return ''.join(chunks)
else:
return u''.join(chunks)
def iterencode(self, o, _one_shot=False):
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
def iterencode(self, o):
chunks = super(JSONEncoderForHTML, self).iterencode(o)
for chunk in chunks:
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_key_separator, _item_separator, _sort_keys, _skipkeys,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
_int_as_string_bitcount, _item_sort_key,
_encoding,_for_json,

View file

@ -60,11 +60,11 @@ def py_make_scanner(context):
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise JSONDecodeError(errmsg, string, idx)

View file

@ -2,6 +2,7 @@ from __future__ import absolute_import
import decimal
from unittest import TestCase
import sys
import simplejson as json
from simplejson.compat import StringIO, b, binary_type
from simplejson import OrderedDict
@ -117,3 +118,10 @@ class TestDecode(TestCase):
diff = id(x) - id(y)
self.assertRaises(ValueError, j.scan_once, y, diff)
self.assertRaises(ValueError, j.raw_decode, y, i)
def test_bounded_int(self):
# SJ-PT-23-03, limit quadratic number parsing per Python 3.11
max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
s = '1' + '0' * (max_str_digits - 1)
self.assertEqual(json.loads(s), int(s))
self.assertRaises(ValueError, json.loads, s + '0')

View file

@ -145,7 +145,7 @@ class TestFail(TestCase):
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{', "Expecting property name enclosed in double quotes or '}'", 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
@ -156,6 +156,8 @@ class TestFail(TestCase):
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
('[,', "Expecting value", 1),
('--', 'Expecting value', 0),
('"\x18d', "Invalid control character %r", 1),
]
for data, msg, idx in test_cases:
try:

View file

@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
class TestFloat(TestCase):
def test_degenerates_allow(self):
for inf in (PosInf, NegInf):
self.assertEqual(json.loads(json.dumps(inf)), inf)
self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
# Python 2.5 doesn't have math.isnan
nan = json.loads(json.dumps(NaN))
nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
self.assertTrue((0 + nan) != nan)
def test_degenerates_ignore(self):
@ -19,6 +19,9 @@ class TestFloat(TestCase):
def test_degenerates_deny(self):
for f in (PosInf, NegInf, NaN):
self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
for s in ('Infinity', '-Infinity', 'NaN'):
self.assertRaises(ValueError, json.loads, s, allow_nan=False)
self.assertRaises(ValueError, json.loads, s)
def test_floats(self):
for num in [1617161771.7650001, math.pi, math.pi**100,

View file

@ -132,7 +132,9 @@ class TestScanString(TestCase):
self.assertRaises(ValueError,
scanstring, '\\ud834\\x0123"', 0, None, True)
self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True)
self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
# SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,

View file

@ -32,7 +32,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Optional, Any, Iterator, Iterable
from typing import Any, Iterator, Iterable
__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
@ -45,10 +45,10 @@ SoupSieve = cm.SoupSieve
def compile( # noqa: A001
pattern: str,
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""
@ -79,10 +79,10 @@ def purge() -> None:
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""
@ -93,10 +93,10 @@ def closest(
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> bool:
"""Match node."""
@ -107,10 +107,10 @@ def match(
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Filter list of nodes."""
@ -121,10 +121,10 @@ def filter( # noqa: A001
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""
@ -135,11 +135,11 @@ def select_one(
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> list['bs4.Tag']:
"""Select the specified tags."""
@ -150,11 +150,11 @@ def select(
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[dict[str, str]] = None,
namespaces: dict[str, str] | None = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[dict[str, str]] = None,
custom: dict[str, str] | None = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""

View file

@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
return Version(major, minor, micro, release, pre, post, dev)
__version_info__ = Version(2, 4, 0, "final")
__version_info__ = Version(2, 4, 1, "final")
__version__ = __version_info__._get_canonical()

View file

@ -6,7 +6,7 @@ import re
from . import css_types as ct
import unicodedata
import bs4 # type: ignore[import]
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
@ -171,7 +171,7 @@ class _DocumentNav:
def get_children(
self,
el: bs4.Tag,
start: Optional[int] = None,
start: int | None = None,
reverse: bool = False,
tags: bool = True,
no_iframe: bool = False
@ -239,22 +239,22 @@ class _DocumentNav:
return parent
@staticmethod
def get_tag_name(el: bs4.Tag) -> Optional[str]:
def get_tag_name(el: bs4.Tag) -> str | None:
"""Get tag."""
return cast(Optional[str], el.name)
return cast('str | None', el.name)
@staticmethod
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
def get_prefix_name(el: bs4.Tag) -> str | None:
"""Get prefix."""
return cast(Optional[str], el.prefix)
return cast('str | None', el.prefix)
@staticmethod
def get_uri(el: bs4.Tag) -> Optional[str]:
def get_uri(el: bs4.Tag) -> str | None:
"""Get namespace `URI`."""
return cast(Optional[str], el.namespace)
return cast('str | None', el.namespace)
@classmethod
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
@ -287,7 +287,7 @@ class _DocumentNav:
return bool(ns and ns == NS_XHTML)
@staticmethod
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
"""Return namespace and attribute name without the prefix."""
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
@ -330,8 +330,8 @@ class _DocumentNav:
cls,
el: bs4.Tag,
name: str,
default: Optional[str | Sequence[str]] = None
) -> Optional[str | Sequence[str]]:
default: str | Sequence[str] | None = None
) -> str | Sequence[str] | None:
"""Get attribute by name."""
value = default
@ -348,7 +348,7 @@ class _DocumentNav:
return value
@classmethod
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
"""Iterate attributes."""
for k, v in el.attrs.items():
@ -424,10 +424,10 @@ class Inputs:
return 0 <= minutes <= 59
@classmethod
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
"""Parse the input value."""
parsed = None # type: Optional[tuple[float, ...]]
parsed = None # type: tuple[float, ...] | None
if value is None:
return value
if itype == "date":
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
self,
selectors: ct.SelectorList,
scope: bs4.Tag,
namespaces: Optional[ct.Namespaces],
namespaces: ct.Namespaces | None,
flags: int
) -> None:
"""Initialize."""
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
return self.get_tag_ns(el) == NS_XHTML
def get_tag(self, el: bs4.Tag) -> Optional[str]:
def get_tag(self, el: bs4.Tag) -> str | None:
"""Get tag."""
name = self.get_tag_name(el)
return util.lower(name) if name is not None and not self.is_xml else name
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
def get_prefix(self, el: bs4.Tag) -> str | None:
"""Get prefix."""
prefix = self.get_prefix_name(el)
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
def find_bidi(self, el: bs4.Tag) -> int | None:
"""Get directionality from element text."""
for node in self.get_children(el, tags=False):
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
self,
el: bs4.Tag,
attr: str,
prefix: Optional[str]
) -> Optional[str | Sequence[str]]:
prefix: str | None
) -> str | Sequence[str] | None:
"""Match attribute name and return value if it exists."""
value = None
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
name not in (self.get_tag(el), '*')
)
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
"""Match the tag."""
match = True
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
"""Match element if it contains text."""
match = True
content = None # type: Optional[str | Sequence[str]]
content = None # type: str | Sequence[str] | None
for contain_list in contains:
if content is None:
if contain_list.own:
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
match = False
name = cast(str, self.get_attribute_by_name(el, 'name'))
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
"""Find this input's form."""
form = None
parent = self.get_parent(el, no_iframe=True)
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
if lim < 1:
break
def closest(self) -> Optional[bs4.Tag]:
def closest(self) -> bs4.Tag | None:
"""Match closest ancestor."""
current = self.tag
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
pattern: str
selectors: ct.SelectorList
namespaces: Optional[ct.Namespaces]
namespaces: ct.Namespaces | None
custom: dict[str, str]
flags: int
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
self,
pattern: str,
selectors: ct.SelectorList,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
):
"""Initialize."""

View file

@ -7,7 +7,7 @@ from . import css_match as cm
from . import css_types as ct
from .util import SelectorSyntaxError
import warnings
from typing import Optional, Match, Any, Iterator, cast
from typing import Match, Any, Iterator, cast
UNICODE_REPLACEMENT_CHAR = 0xFFFD
@ -113,7 +113,7 @@ VALUE = r'''
'''.format(nl=NEWLINE, ident=IDENTIFIER)
# Attribute value comparison. `!=` is handled special as it is non-standard.
ATTR = r'''
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
'''.format(ws=WSC, value=VALUE)
# Selector patterns
@ -207,8 +207,8 @@ _MAXCACHE = 500
@lru_cache(maxsize=_MAXCACHE)
def _cached_css_compile(
pattern: str,
namespaces: Optional[ct.Namespaces],
custom: Optional[ct.CustomSelectors],
namespaces: ct.Namespaces | None,
custom: ct.CustomSelectors | None,
flags: int
) -> cm.SoupSieve:
"""Cached CSS compile."""
@ -233,7 +233,7 @@ def _purge_cache() -> None:
_cached_css_compile.cache_clear()
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
"""Process custom."""
custom_selectors = {}
@ -317,7 +317,7 @@ class SelectorPattern:
return self.name
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
return self.re_pattern.match(selector, index)
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
for pseudo in p[1]:
self.patterns[pseudo] = pattern
self.matched_name = None # type: Optional[SelectorPattern]
self.matched_name = None # type: SelectorPattern | None
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
def get_name(self) -> str:
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
return '' if self.matched_name is None else self.matched_name.get_name()
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
"""Match the selector."""
pseudo = None
@ -372,14 +372,14 @@ class _Selector:
def __init__(self, **kwargs: Any) -> None:
"""Initialize."""
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
self.ids = kwargs.get('ids', []) # type: list[str]
self.classes = kwargs.get('classes', []) # type: list[str]
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
self.relations = kwargs.get('relations', []) # type: list[_Selector]
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
self.rel_type = kwargs.get('rel_type', None) # type: str | None
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
self.flags = kwargs.get('flags', 0) # type: int
@ -462,7 +462,7 @@ class CSSParser:
def __init__(
self,
selector: str,
custom: Optional[dict[str, str | ct.SelectorList]] = None,
custom: dict[str, str | ct.SelectorList] | None = None,
flags: int = 0
) -> None:
"""Initialize."""

View file

@ -2,7 +2,7 @@
from __future__ import annotations
import copyreg
from .pretty import pretty
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
__all__ = (
'Selector',
@ -189,28 +189,28 @@ class Selector(Immutable):
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)
tag: Optional[SelectorTag]
tag: SelectorTag | None
ids: tuple[str, ...]
classes: tuple[str, ...]
attributes: tuple[SelectorAttribute, ...]
nth: tuple[SelectorNth, ...]
selectors: tuple[SelectorList, ...]
relation: SelectorList
rel_type: Optional[str]
rel_type: str | None
contains: tuple[SelectorContains, ...]
lang: tuple[SelectorLang, ...]
flags: int
def __init__(
self,
tag: Optional[SelectorTag],
tag: SelectorTag | None,
ids: tuple[str, ...],
classes: tuple[str, ...],
attributes: tuple[SelectorAttribute, ...],
nth: tuple[SelectorNth, ...],
selectors: tuple[SelectorList, ...],
relation: SelectorList,
rel_type: Optional[str],
rel_type: str | None,
contains: tuple[SelectorContains, ...],
lang: tuple[SelectorLang, ...],
flags: int
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
__slots__ = ("name", "prefix", "_hash")
name: str
prefix: Optional[str]
prefix: str | None
def __init__(self, name: str, prefix: Optional[str]) -> None:
def __init__(self, name: str, prefix: str | None) -> None:
"""Initialize."""
super().__init__(name=name, prefix=prefix)
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
attribute: str
prefix: str
pattern: Optional[Pattern[str]]
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None
xml_type_pattern: Pattern[str] | None
def __init__(
self,
attribute: str,
prefix: str,
pattern: Optional[Pattern[str]],
xml_type_pattern: Optional[Pattern[str]]
pattern: Pattern[str] | None,
xml_type_pattern: Pattern[str] | None
) -> None:
"""Initialize."""
@ -360,7 +360,7 @@ class SelectorList(Immutable):
def __init__(
self,
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
selectors: Iterable[Selector | SelectorNull] | None = None,
is_not: bool = False,
is_html: bool = False
) -> None:

View file

@ -3,7 +3,7 @@ from __future__ import annotations
from functools import wraps, lru_cache
import warnings
import re
from typing import Callable, Any, Optional
from typing import Callable, Any
DEBUG = 0x00001
@ -27,7 +27,7 @@ def lower(string: str) -> str:
class SelectorSyntaxError(Exception):
"""Syntax error in a CSS selector."""
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
"""Initialize."""
self.line = None
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
col = 1
text = [] # type: list[str]
line = 1
offset = None # type: Optional[int]
offset = None # type: int | None
# Split pattern by newline and handle the text before the newline
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):

View file

@ -1,6 +1,6 @@
# IANA versions like 2020a are not valid PEP 440 identifiers; the recommended
# way to translate the version is to use YYYY.n where `n` is a 0-based index.
__version__ = "2022.7"
__version__ = "2023.3"
# This exposes the original IANA version number.
IANA_VERSION = "2022g"
IANA_VERSION = "2023c"

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -238,7 +238,7 @@ SY Syria
SZ Eswatini (Swaziland)
TC Turks & Caicos Is
TD Chad
TF French Southern Territories
TF French S. Terr.
TG Togo
TH Thailand
TJ Tajikistan

View file

@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this.
# This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file.
#Expires 2023 Jun 28 00:00:00
#Expires 2023 Dec 28 00:00:00
# POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC)
#expires 1687910400 (2023-06-28 00:00:00 UTC)
#expires 1703721600 (2023-12-28 00:00:00 UTC)
# Updated through IERS Bulletin C64
# File expires on: 28 June 2023
# Updated through IERS Bulletin C65
# File expires on: 28 December 2023

View file

@ -1,4 +1,4 @@
# version 2022g
# version 2023c
# This zic input file is in the public domain.
R d 1916 o - Jun 14 23s 1 S
R d 1916 1919 - O Su>=1 23s 0 -
@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
R K 2014 o - Jun 26 24 0 -
R K 2014 o - Jul 31 24 1 S
R K 2014 o - S lastTh 24 0 -
R K 2023 ma - Ap lastF 0 1 S
R K 2023 ma - O lastTh 24 0 -
Z Africa/Cairo 2:5:9 - LMT 1900 O
2 K EE%sT
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
R M 2022 o - Mar 27 3 -1 -
R M 2022 o - May 8 2 0 -
R M 2023 o - Mar 19 3 -1 -
R M 2023 o - Ap 30 2 0 -
R M 2023 o - Ap 23 2 0 -
R M 2024 o - Mar 10 3 -1 -
R M 2024 o - Ap 14 2 0 -
R M 2025 o - F 23 3 -1 -
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
R M 2029 o - D 30 3 -1 -
R M 2030 o - F 10 2 0 -
R M 2030 o - D 22 3 -1 -
R M 2031 o - F 2 2 0 -
R M 2031 o - Ja 26 2 0 -
R M 2031 o - D 14 3 -1 -
R M 2032 o - Ja 18 2 0 -
R M 2032 o - N 28 3 -1 -
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
R M 2037 o - O 4 3 -1 -
R M 2037 o - N 15 2 0 -
R M 2038 o - S 26 3 -1 -
R M 2038 o - N 7 2 0 -
R M 2038 o - O 31 2 0 -
R M 2039 o - S 18 3 -1 -
R M 2039 o - O 23 2 0 -
R M 2040 o - S 2 3 -1 -
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
R M 2045 o - Jul 9 3 -1 -
R M 2045 o - Au 20 2 0 -
R M 2046 o - Jul 1 3 -1 -
R M 2046 o - Au 12 2 0 -
R M 2046 o - Au 5 2 0 -
R M 2047 o - Jun 23 3 -1 -
R M 2047 o - Jul 28 2 0 -
R M 2048 o - Jun 7 3 -1 -
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
R M 2053 o - Ap 13 3 -1 -
R M 2053 o - May 25 2 0 -
R M 2054 o - Ap 5 3 -1 -
R M 2054 o - May 17 2 0 -
R M 2054 o - May 10 2 0 -
R M 2055 o - Mar 28 3 -1 -
R M 2055 o - May 2 2 0 -
R M 2056 o - Mar 12 3 -1 -
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
R M 2061 o - Ja 16 3 -1 -
R M 2061 o - F 27 2 0 -
R M 2062 o - Ja 8 3 -1 -
R M 2062 o - F 19 2 0 -
R M 2062 o - F 12 2 0 -
R M 2062 o - D 31 3 -1 -
R M 2063 o - F 4 2 0 -
R M 2063 o - D 16 3 -1 -
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
R M 2068 o - O 21 3 -1 -
R M 2068 o - D 2 2 0 -
R M 2069 o - O 13 3 -1 -
R M 2069 o - N 24 2 0 -
R M 2069 o - N 17 2 0 -
R M 2070 o - O 5 3 -1 -
R M 2070 o - N 9 2 0 -
R M 2071 o - S 20 3 -1 -
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
R M 2076 o - Jul 26 3 -1 -
R M 2076 o - S 6 2 0 -
R M 2077 o - Jul 18 3 -1 -
R M 2077 o - Au 29 2 0 -
R M 2077 o - Au 22 2 0 -
R M 2078 o - Jul 10 3 -1 -
R M 2078 o - Au 14 2 0 -
R M 2079 o - Jun 25 3 -1 -
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
R M 2081 o - Jun 1 3 -1 -
R M 2081 o - Jul 13 2 0 -
R M 2082 o - May 24 3 -1 -
R M 2082 o - Jul 5 2 0 -
R M 2082 o - Jun 28 2 0 -
R M 2083 o - May 16 3 -1 -
R M 2083 o - Jun 20 2 0 -
R M 2084 o - Ap 30 3 -1 -
R M 2084 o - Jun 11 2 0 -
R M 2085 o - Ap 22 3 -1 -
R M 2085 o - Jun 3 2 0 -
R M 2085 o - May 27 2 0 -
R M 2086 o - Ap 14 3 -1 -
R M 2086 o - May 19 2 0 -
R M 2087 o - Mar 30 3 -1 -
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
R P 2020 o - O 24 1 0 -
R P 2021 o - O 29 1 0 -
R P 2022 o - Mar 27 0 1 S
R P 2022 ma - O Sa<=30 2 0 -
R P 2023 ma - Mar Sa<=30 2 1 S
R P 2022 2035 - O Sa<=30 2 0 -
R P 2023 o - Ap 29 2 1 S
R P 2024 o - Ap 13 2 1 S
R P 2025 o - Ap 5 2 1 S
R P 2026 2054 - Mar Sa<=30 2 1 S
R P 2036 o - O 18 2 0 -
R P 2037 o - O 10 2 0 -
R P 2038 o - S 25 2 0 -
R P 2039 o - S 17 2 0 -
R P 2039 o - O 22 2 1 S
R P 2039 2067 - O Sa<=30 2 0 -
R P 2040 o - S 1 2 0 -
R P 2040 o - O 13 2 1 S
R P 2041 o - Au 24 2 0 -
R P 2041 o - S 28 2 1 S
R P 2042 o - Au 16 2 0 -
R P 2042 o - S 20 2 1 S
R P 2043 o - Au 1 2 0 -
R P 2043 o - S 12 2 1 S
R P 2044 o - Jul 23 2 0 -
R P 2044 o - Au 27 2 1 S
R P 2045 o - Jul 15 2 0 -
R P 2045 o - Au 19 2 1 S
R P 2046 o - Jun 30 2 0 -
R P 2046 o - Au 11 2 1 S
R P 2047 o - Jun 22 2 0 -
R P 2047 o - Jul 27 2 1 S
R P 2048 o - Jun 6 2 0 -
R P 2048 o - Jul 18 2 1 S
R P 2049 o - May 29 2 0 -
R P 2049 o - Jul 3 2 1 S
R P 2050 o - May 21 2 0 -
R P 2050 o - Jun 25 2 1 S
R P 2051 o - May 6 2 0 -
R P 2051 o - Jun 17 2 1 S
R P 2052 o - Ap 27 2 0 -
R P 2052 o - Jun 1 2 1 S
R P 2053 o - Ap 12 2 0 -
R P 2053 o - May 24 2 1 S
R P 2054 o - Ap 4 2 0 -
R P 2054 o - May 16 2 1 S
R P 2055 o - May 1 2 1 S
R P 2056 o - Ap 22 2 1 S
R P 2057 o - Ap 7 2 1 S
R P 2058 ma - Mar Sa<=30 2 1 S
R P 2068 o - O 20 2 0 -
R P 2069 o - O 12 2 0 -
R P 2070 o - O 4 2 0 -
R P 2071 o - S 19 2 0 -
R P 2072 o - S 10 2 0 -
R P 2072 o - O 15 2 1 S
R P 2073 o - S 2 2 0 -
R P 2073 o - O 7 2 1 S
R P 2074 o - Au 18 2 0 -
R P 2074 o - S 29 2 1 S
R P 2075 o - Au 10 2 0 -
R P 2075 o - S 14 2 1 S
R P 2075 ma - O Sa<=30 2 0 -
R P 2076 o - Jul 25 2 0 -
R P 2076 o - S 5 2 1 S
R P 2077 o - Jul 17 2 0 -
R P 2077 o - Au 28 2 1 S
R P 2078 o - Jul 9 2 0 -
R P 2078 o - Au 13 2 1 S
R P 2079 o - Jun 24 2 0 -
R P 2079 o - Au 5 2 1 S
R P 2080 o - Jun 15 2 0 -
R P 2080 o - Jul 20 2 1 S
R P 2081 o - Jun 7 2 0 -
R P 2081 o - Jul 12 2 1 S
R P 2082 o - May 23 2 0 -
R P 2082 o - Jul 4 2 1 S
R P 2083 o - May 15 2 0 -
R P 2083 o - Jun 19 2 1 S
R P 2084 o - Ap 29 2 0 -
R P 2084 o - Jun 10 2 1 S
R P 2085 o - Ap 21 2 0 -
R P 2085 o - Jun 2 2 1 S
R P 2086 o - Ap 13 2 0 -
R P 2086 o - May 18 2 1 S
Z Asia/Gaza 2:17:52 - LMT 1900 O
2 Z EET/EEST 1948 May 15
2 K EE%sT 1967 Jun 5
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
-1 E -01/+00
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
-3 - -03 1980 Ap 6 2
-3 E -03/-02 2023 Mar 25 22
-2 - -02
-3 E -03/-02 2023 O 29 1u
-2 E -02/-01
Z America/Thule -4:35:8 - LMT 1916 Jul 28
-4 Th A%sT
Z Europe/Tallinn 1:39 - LMT 1880
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
3 - +03 1930 Jun 21
4 - +04 1961 N 11
4 R +04/+05 1988 Mar 27 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03 2018 O 28 2s
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK 2018 O 28 2s
4 - +04 2020 D 27 2s
3 - +03
3 - MSK
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1988 Mar 27 2s
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 R +04/+05 1989 Mar 26 2s
3 R +03/+04 1991 Mar 31 2s
3 R MSK/MSD 1991 Mar 31 2s
4 - +04 1992 Mar 29 2s
3 R +03/+04 2011 Mar 27 2s
4 - +04 2014 O 26 2s
3 - +03
3 R MSK/MSD 2011 Mar 27 2s
4 - MSK 2014 O 26 2s
3 - MSK
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
3 - +03 1930 Jun 21
4 - +04 1935 Ja 27
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
-5 - EST 2000 N 5
-6 - CST 2001 Ap 1 3
-7 C M%sT
Z America/Yellowknife 0 - -00 1935
-7 Y M%sT 1980
-7 C M%sT
Z America/Inuvik 0 - -00 1953
-8 Y P%sT 1979 Ap lastSu 2
-7 Y M%sT 1980
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
L America/Tijuana America/Santa_Isabel
L America/Denver America/Shiprock
L America/Toronto America/Thunder_Bay
L America/Edmonton America/Yellowknife
L Pacific/Auckland Antarctica/South_Pole
L Asia/Shanghai Asia/Chongqing
L Asia/Shanghai Asia/Harbin

View file

@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +4906-11631 America/Creston MST - BC (Creston)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
CH +4723+00832 Europe/Zurich
CI +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CM +0403+00942 Africa/Douala
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CW +1211-06900 America/Curacao
CX -1025+10543 Indian/Christmas
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ +5005+01426 Europe/Prague
DE +5230+01322 Europe/Berlin Germany (most areas)
DE +5230+01322 Europe/Berlin most of Germany
DE +4742+00841 Europe/Busingen Busingen
DJ +1136+04309 Africa/Djibouti
DK +5540+01235 Europe/Copenhagen
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
GG +492717-0023210 Europe/Guernsey
GH +0533-00013 Africa/Accra
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KW +2920+04759 Asia/Kuwait
KY +1918-08123 America/Cayman
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
ME +4226+01916 Europe/Podgorica
MF +1804-06305 America/Marigot
MG -1855+04731 Indian/Antananarivo
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas)
MH +0709+17112 Pacific/Majuro most of Marshall Islands
MH +0905+16720 Pacific/Kwajalein Kwajalein
MK +4159+02126 Europe/Skopje
ML +1239-00800 Africa/Bamako
MM +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
MO +221150+1133230 Asia/Macau
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
NP +2743+08519 Asia/Kathmandu
NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ -3652+17446 Pacific/Auckland New Zealand (most areas)
NZ -3652+17446 Pacific/Auckland most of New Zealand
NZ -4357-17633 Pacific/Chatham Chatham Islands
OM +2336+05835 Asia/Muscat
PA +0858-07932 America/Panama
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
PF -2308-13457 Pacific/Gambier Gambier Islands
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas)
PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
PG -0613+15534 Pacific/Bougainville Bougainville
PH +1435+12100 Asia/Manila
PK +2452+06703 Asia/Karachi
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
RW -0157+03004 Africa/Kigali
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
TV -0831+17913 Pacific/Funafuti
TW +2503+12130 Asia/Taipei
TZ -0648+03917 Africa/Dar_es_Salaam
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
UG +0019+03225 Africa/Kampala
UM +2813-17722 Pacific/Midway Midway Islands
UM +1917+16637 Pacific/Wake Wake Island
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo)
US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)

View file

@ -18,7 +18,10 @@
# Please see the theory.html file for how these names are chosen.
# If multiple timezones overlap a country, each has a row in the
# table, with each column 1 containing the country code.
# 4. Comments; present if and only if a country has multiple timezones.
# 4. Comments; present if and only if countries have multiple timezones,
# and useful only for those countries. For example, the comments
# for the row with countries CH,DE,LI and name Europe/Zurich
# are useful only for DE, since CH and LI have no other timezones.
#
# If a timezone covers multiple countries, the most-populous city is used,
# and that country is listed first in column 1; any other countries
@ -34,7 +37,7 @@
#country-
#codes coordinates TZ comments
AD +4230+00131 Europe/Andorra
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
AF +3431+06912 Asia/Kabul
AL +4120+01950 Europe/Tirane
AM +4011+04430 Asia/Yerevan
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
AQ -6734-06808 Antarctica/Rothera Rothera
AQ -720041+0023206 Antarctica/Troll Troll
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway
AS,UM -1416-17042 Pacific/Pago_Pago Midway
AT +4813+01620 Europe/Vienna
AU -3133+15905 Australia/Lord_Howe Lord Howe Island
AU -5430+15857 Antarctica/Macquarie Macquarie Island
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
CA +4606-06447 America/Moncton Atlantic - New Brunswick
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
CA +744144-0944945 America/Resolute Central - NU (Resolute)
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
CA +5024-10439 America/Regina CST - SK (most areas)
CA +5017-10750 America/Swift_Current CST - SK (midwest)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
CA +6227-11421 America/Yellowknife Mountain - NT (central)
CA +682059-1334300 America/Inuvik Mountain - NT (west)
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
CA +6043-13503 America/Whitehorse MST - Yukon (east)
CA +6404-13925 America/Dawson MST - Yukon (west)
CA +4916-12307 America/Vancouver Pacific - BC (most areas)
CH,DE,LI +4723+00832 Europe/Zurich Swiss time
CH,DE,LI +4723+00832 Europe/Zurich Büsingen
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
CK -2114-15946 Pacific/Rarotonga
CL -3327-07040 America/Santiago Chile (most areas)
CL -3327-07040 America/Santiago most of Chile
CL -5309-07055 America/Punta_Arenas Region of Magallanes
CL -2709-10926 Pacific/Easter Easter Island
CN +3114+12128 Asia/Shanghai Beijing Time
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
CR +0956-08405 America/Costa_Rica
CU +2308-08222 America/Havana
CV +1455-02331 Atlantic/Cape_Verde
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
CY +3510+03322 Asia/Nicosia most of Cyprus
CY +3507+03357 Asia/Famagusta Northern Cyprus
CZ,SK +5005+01426 Europe/Prague
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
DO +1828-06954 America/Santo_Domingo
DZ +3647+00303 Africa/Algiers
EC -0210-07950 America/Guayaquil Ecuador (mainland)
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
GE +4143+04449 Asia/Tbilisi
GF +0456-05220 America/Cayenne
GI +3608-00521 Europe/Gibraltar
GL +6411-05144 America/Nuuk Greenland (most areas)
GL +6411-05144 America/Nuuk most of Greenland
GL +7646-01840 America/Danmarkshavn National Park (east coast)
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
GL +7634-06847 America/Thule Thule/Pituffik
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
JP +353916+1394441 Asia/Tokyo
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
KG +4254+07436 Asia/Bishkek
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
KI -0247-17143 Pacific/Kanton Phoenix Islands
KI +0152-15720 Pacific/Kiritimati Line Islands
KP +3901+12545 Asia/Pyongyang
KR +3733+12658 Asia/Seoul
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
KZ +4315+07657 Asia/Almaty most of Kazakhstan
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
MD +4700+02850 Europe/Chisinau
MH +0905+16720 Pacific/Kwajalein Kwajalein
MM,CC +1647+09610 Asia/Yangon
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
MO +221150+1133230 Asia/Macau
MQ +1436-06105 America/Martinique
MT +3554+01431 Europe/Malta
MU -2010+05730 Indian/Mauritius
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I
MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
MX +1924-09909 America/Mexico_City Central Mexico
MX +2105-08646 America/Cancun Quintana Roo
MX +2058-08937 America/Merida Campeche, Yucatán
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
MX +2904-11058 America/Hermosillo Sonora
MX +3232-11701 America/Tijuana Baja California
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
NA -2234+01706 Africa/Windhoek
NC -2216+16627 Pacific/Noumea
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
NU -1901-16955 Pacific/Niue
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
NZ -4357-17633 Pacific/Chatham Chatham Islands
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
PE -1203-07703 America/Lima
PF -1732-14934 Pacific/Tahiti Society Islands
PF -0900-13930 Pacific/Marquesas Marquesas Islands
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
SD +1536+03232 Africa/Khartoum
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia
SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
SR +0550-05510 America/Paramaribo
SS +0451+03137 Africa/Juba
ST +0020+00644 Africa/Sao_Tome
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
SY +3330+03618 Asia/Damascus
TC +2128-07108 America/Grand_Turk
TD +1207+01503 Africa/Ndjamena
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas)
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
TJ +3835+06848 Asia/Dushanbe
TK -0922-17114 Pacific/Fakaofo
TL -0833+12535 Asia/Dili
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
TO -210800-1751200 Pacific/Tongatapu
TR +4101+02858 Europe/Istanbul
TW +2503+12130 Asia/Taipei
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
UA +5026+03031 Europe/Kyiv most of Ukraine
US +404251-0740023 America/New_York Eastern (most areas)
US +421953-0830245 America/Detroit Eastern - MI (most areas)
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
US +394421-1045903 America/Denver Mountain (most areas)
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC
US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
US +340308-1181434 America/Los_Angeles Pacific
US +611305-1495401 America/Anchorage Alaska (most areas)
US +581807-1342511 America/Juneau Alaska - Juneau area
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
US +550737-1313435 America/Metlakatla Alaska - Annette Island
US +593249-1394338 America/Yakutat Alaska - Yakutat
US +643004-1652423 America/Nome Alaska (west)
US +515248-1763929 America/Adak Aleutian Islands
US,UM +211825-1575130 Pacific/Honolulu Hawaii
US +515248-1763929 America/Adak Alaska - western Aleutians
US +211825-1575130 Pacific/Honolulu Hawaii
UY -345433-0561245 America/Montevideo
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
VE +1030-06656 America/Caracas
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south)
VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
VU -1740+16825 Pacific/Efate
WS -1350-17144 Pacific/Apia
ZA,LS,SZ -2615+02800 Africa/Johannesburg

View file

@ -243,7 +243,6 @@ America/Iqaluit
America/Resolute
America/Rankin_Inlet
America/Cambridge_Bay
America/Yellowknife
America/Inuvik
America/Whitehorse
America/Dawson
@ -561,6 +560,7 @@ America/Rosario
America/Santa_Isabel
America/Shiprock
America/Thunder_Bay
America/Yellowknife
Antarctica/South_Pole
Asia/Chongqing
Asia/Harbin

View file

@ -1,11 +1,11 @@
apscheduler==3.10.1
importlib-metadata==6.0.0
importlib-metadata==6.8.0
importlib-resources==5.12.0
pyinstaller==5.8.0
pyopenssl==23.0.0
pycryptodomex==3.17
pyinstaller==5.13.0
pyopenssl==23.2.0
pycryptodomex==3.18.0
pyobjc-core==9.0.1; platform_system == "Darwin"
pyobjc-framework-Cocoa==9.0.1; platform_system == "Darwin"
pyobjc-core==9.2; platform_system == "Darwin"
pyobjc-framework-Cocoa==9.2; platform_system == "Darwin"
pywin32==305; platform_system == "Windows"
pywin32==306; platform_system == "Windows"

View file

@ -326,70 +326,7 @@ class ActivityProcessor(object):
# Get the last insert row id
last_id = self.db.last_insert_id()
new_session = prev_session = None
watched = False
if session['live']:
# Check if we should group the session, select the last guid from the user
query = "SELECT session_history.id, session_history_metadata.guid, session_history.reference_id " \
"FROM session_history " \
"JOIN session_history_metadata ON session_history.id == session_history_metadata.id " \
"WHERE session_history.user_id = ? ORDER BY session_history.id DESC LIMIT 1 "
args = [session['user_id']]
result = self.db.select(query=query, args=args)
if len(result) > 0:
new_session = {'id': last_id,
'guid': metadata['guid'],
'reference_id': last_id}
prev_session = {'id': result[0]['id'],
'guid': result[0]['guid'],
'reference_id': result[0]['reference_id']}
else:
# Check if we should group the session, select the last two rows from the user
query = "SELECT id, rating_key, view_offset, reference_id FROM session_history " \
"WHERE user_id = ? AND rating_key = ? ORDER BY id DESC LIMIT 2 "
args = [session['user_id'], session['rating_key']]
result = self.db.select(query=query, args=args)
if len(result) > 1:
new_session = {'id': result[0]['id'],
'rating_key': result[0]['rating_key'],
'view_offset': result[0]['view_offset'],
'reference_id': result[0]['reference_id']}
prev_session = {'id': result[1]['id'],
'rating_key': result[1]['rating_key'],
'view_offset': result[1]['view_offset'],
'reference_id': result[1]['reference_id']}
marker_first, marker_final = helpers.get_first_final_marker(metadata['markers'])
watched = helpers.check_watched(
session['media_type'], session['view_offset'], session['duration'],
marker_first, marker_final
)
query = "UPDATE session_history SET reference_id = ? WHERE id = ? "
# If previous session view offset less than watched percent,
# and new session view offset is greater,
# then set the reference_id to the previous row,
# else set the reference_id to the new id
if prev_session is None and new_session is None:
args = [last_id, last_id]
elif watched and prev_session['view_offset'] <= new_session['view_offset'] or \
session['live'] and prev_session['guid'] == new_session['guid']:
args = [prev_session['reference_id'], new_session['id']]
else:
args = [new_session['id'], new_session['id']]
self.db.action(query=query, args=args)
self.group_history(last_id, session, metadata)
# logger.debug("Tautulli ActivityProcessor :: Successfully written history item, last id for session_history is %s"
# % last_id)
@ -546,6 +483,80 @@ class ActivityProcessor(object):
# Return the session row id when the session is successfully written to the database
return session['id']
def group_history(self, last_id, session, metadata=None):
new_session = prev_session = None
prev_watched = None
if session['live']:
# Check if we should group the session, select the last guid from the user
query = "SELECT session_history.id, session_history_metadata.guid, session_history.reference_id " \
"FROM session_history " \
"JOIN session_history_metadata ON session_history.id == session_history_metadata.id " \
"WHERE session_history.id <= ? AND session_history.user_id = ? ORDER BY session_history.id DESC LIMIT 1 "
args = [last_id, session['user_id']]
result = self.db.select(query=query, args=args)
if len(result) > 0:
new_session = {'id': last_id,
'guid': metadata['guid'] if metadata else session['guid'],
'reference_id': last_id}
prev_session = {'id': result[0]['id'],
'guid': result[0]['guid'],
'reference_id': result[0]['reference_id']}
else:
# Check if we should group the session, select the last two rows from the user
query = "SELECT id, rating_key, view_offset, reference_id FROM session_history " \
"WHERE id <= ? AND user_id = ? AND rating_key = ? ORDER BY id DESC LIMIT 2 "
args = [last_id, session['user_id'], session['rating_key']]
result = self.db.select(query=query, args=args)
if len(result) > 1:
new_session = {'id': result[0]['id'],
'rating_key': result[0]['rating_key'],
'view_offset': helpers.cast_to_int(result[0]['view_offset']),
'reference_id': result[0]['reference_id']}
prev_session = {'id': result[1]['id'],
'rating_key': result[1]['rating_key'],
'view_offset': helpers.cast_to_int(result[1]['view_offset']),
'reference_id': result[1]['reference_id']}
if metadata:
marker_first, marker_final = helpers.get_first_final_marker(metadata['markers'])
else:
marker_first = session['marker_credits_first']
marker_final = session['marker_credits_final']
prev_watched = helpers.check_watched(
session['media_type'], prev_session['view_offset'], session['duration'],
marker_first, marker_final
)
query = "UPDATE session_history SET reference_id = ? WHERE id = ? "
# If previous session view offset less than watched threshold,
# and new session view offset is greater,
# then set the reference_id to the previous row,
# else set the reference_id to the new id
if (prev_watched is False and prev_session['view_offset'] <= new_session['view_offset'] or
session['live'] and prev_session['guid'] == new_session['guid']):
if metadata:
logger.debug("Tautulli ActivityProcessor :: Grouping history for sessionKey %s", session['session_key'])
args = [prev_session['reference_id'], new_session['id']]
else:
if metadata:
logger.debug("Tautulli ActivityProcessor :: Not grouping history for sessionKey %s", session['session_key'])
args = [last_id, last_id]
self.db.action(query=query, args=args)
def get_sessions(self, user_id=None, ip_address=None):
query = "SELECT * FROM sessions"
args = []
@ -695,3 +706,36 @@ class ActivityProcessor(object):
"ORDER BY stopped DESC",
[user_id, machine_id, media_type])
return int(started - last_session.get('stopped', 0) >= plexpy.CONFIG.NOTIFY_CONTINUED_SESSION_THRESHOLD)
def regroup_history(self):
logger.info("Tautulli ActivityProcessor :: Creating database backup...")
if not database.make_backup():
return False
logger.info("Tautulli ActivityProcessor :: Regrouping session history...")
query = (
"SELECT * FROM session_history "
"JOIN session_history_metadata ON session_history.id = session_history_metadata.id"
)
results = self.db.select(query)
count = len(results)
progress = 0
for i, session in enumerate(results, start=1):
if int(i / count * 10) > progress:
progress = int(i / count * 10)
logger.info("Tautulli ActivityProcessor :: Regrouping session history: %d%%", progress * 10)
try:
self.group_history(session['id'], session)
except Exception as e:
logger.error("Tautulli ActivityProcessor :: Error regrouping session history: %s", e)
return False
logger.info("Tautulli ActivityProcessor :: Regrouping session history complete.")
return True
def regroup_history():
ActivityProcessor().regroup_history()

View file

@ -216,6 +216,7 @@ AUDIO_QUALITY_PROFILES = {
AUDIO_QUALITY_PROFILES = OrderedDict(sorted(list(AUDIO_QUALITY_PROFILES.items()), key=lambda k: k[0], reverse=True))
HW_DECODERS = [
'd3d11va',
'dxva2',
'videotoolbox',
'mediacodecndk',

View file

@ -177,6 +177,7 @@ _CONFIG_DEFINITIONS = {
'NOTIFY_RECENTLY_ADDED_UPGRADE': (int, 'Monitoring', 0),
'NOTIFY_REMOTE_ACCESS_THRESHOLD': (int, 'Monitoring', 60),
'NOTIFY_CONCURRENT_BY_IP': (int, 'Monitoring', 0),
'NOTIFY_CONCURRENT_IPV6_CIDR': (str, 'Monitoring', '/64'),
'NOTIFY_CONCURRENT_THRESHOLD': (int, 'Monitoring', 2),
'NOTIFY_NEW_DEVICE_INITIAL_ONLY': (int, 'Monitoring', 1),
'NOTIFY_SERVER_CONNECTION_THRESHOLD': (int, 'Monitoring', 60),
@ -536,7 +537,7 @@ class Config(object):
Returns something from the ini unless it is a real property
of the configuration object or is not all caps.
"""
if not re.match(r'[A-Z_]+$', name):
if not re.match(r'[A-Z0-9_]+$', name):
return super(Config, self).__getattr__(name)
else:
return self.check_setting(name)

Some files were not shown because too many files have changed in this diff Show more