mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-08-22 06:13:25 -07:00
Merge branch 'nightly' into dependabot/pip/nightly/requests-2.31.0
This commit is contained in:
commit
127f2c0321
109 changed files with 2801 additions and 1322 deletions
4
.github/workflows/issues-stale.yml
vendored
4
.github/workflows/issues-stale.yml
vendored
|
@ -10,7 +10,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stale
|
||||
uses: actions/stale@v7
|
||||
uses: actions/stale@v8
|
||||
with:
|
||||
stale-issue-message: >
|
||||
This issue is stale because it has been open for 30 days with no activity.
|
||||
|
@ -30,7 +30,7 @@ jobs:
|
|||
days-before-close: 5
|
||||
|
||||
- name: Invalid Template
|
||||
uses: actions/stale@v7
|
||||
uses: actions/stale@v8
|
||||
with:
|
||||
stale-issue-message: >
|
||||
Invalid issues template.
|
||||
|
|
2
.github/workflows/publish-docker.yml
vendored
2
.github/workflows/publish-docker.yml
vendored
|
@ -95,7 +95,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Build Job Status
|
||||
uses: technote-space/workflow-conclusion-action@v3.0
|
||||
uses: technote-space/workflow-conclusion-action@v3
|
||||
|
||||
- name: Combine Job Status
|
||||
id: status
|
||||
|
|
8
.github/workflows/publish-installers.yml
vendored
8
.github/workflows/publish-installers.yml
vendored
|
@ -68,7 +68,7 @@ jobs:
|
|||
pyinstaller -y ./package/Tautulli-${{ matrix.os }}.spec
|
||||
|
||||
- name: Create Windows Installer
|
||||
uses: joncloud/makensis-action@v3.7
|
||||
uses: joncloud/makensis-action@v4
|
||||
if: matrix.os == 'windows'
|
||||
with:
|
||||
script-file: ./package/Tautulli.nsi
|
||||
|
@ -100,10 +100,10 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Build Job Status
|
||||
uses: technote-space/workflow-conclusion-action@v3.0
|
||||
uses: technote-space/workflow-conclusion-action@v3
|
||||
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v3.2.0
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set Release Version
|
||||
id: get_version
|
||||
|
@ -168,7 +168,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Build Job Status
|
||||
uses: technote-space/workflow-conclusion-action@v3.0
|
||||
uses: technote-space/workflow-conclusion-action@v3
|
||||
|
||||
- name: Combine Job Status
|
||||
id: status
|
||||
|
|
2
.github/workflows/publish-snap.yml
vendored
2
.github/workflows/publish-snap.yml
vendored
|
@ -70,7 +70,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get Build Job Status
|
||||
uses: technote-space/workflow-conclusion-action@v3.0
|
||||
uses: technote-space/workflow-conclusion-action@v3
|
||||
|
||||
- name: Combine Job Status
|
||||
id: status
|
||||
|
|
1
.github/workflows/pull-requests.yml
vendored
1
.github/workflows/pull-requests.yml
vendored
|
@ -18,7 +18,6 @@ jobs:
|
|||
with:
|
||||
message: Pull requests must be made to the `nightly` branch. Thanks.
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: 'github-actions[bot]'
|
||||
|
||||
- name: Fail Workflow
|
||||
if: github.base_ref != 'nightly'
|
||||
|
|
28
CHANGELOG.md
28
CHANGELOG.md
|
@ -1,5 +1,33 @@
|
|||
# Changelog
|
||||
|
||||
## v2.12.5 (2023-07-13)
|
||||
|
||||
* Activity:
|
||||
* New: Added d3d11va to list of hardware decoders.
|
||||
* History:
|
||||
* Fix: Incorrect grouping of play history.
|
||||
* New: Added button in settings to regroup play history.
|
||||
* Notifications:
|
||||
* Fix: Incorrect concurrent streams notifications by IP addresss for IPv6 addresses (#2096) (Thanks @pooley182)
|
||||
* UI:
|
||||
* Fix: Occasional UI crashing on Python 3.11.
|
||||
* New: Added multiselect user filters to History and Graphs pages. (#2090) (Thanks @zdimension)
|
||||
* API:
|
||||
* New: Added regroup_history API command.
|
||||
* Change: Updated graph API commands to accept a comma separated list of user IDs.
|
||||
|
||||
|
||||
## v2.12.4 (2023-05-23)
|
||||
|
||||
* History:
|
||||
* Fix: Set view offset equal to duration if a stream is stopped within the last 10 sec.
|
||||
* Other:
|
||||
* Fix: Database import may fail for some older databases.
|
||||
* Fix: Double-quoted strings for newer versions of SQLite. (#2015, #2057)
|
||||
* API:
|
||||
* Change: Return the ID for async API calls (export_metadata, notify, notify_newsletter).
|
||||
|
||||
|
||||
## v2.12.3 (2023-04-14)
|
||||
|
||||
* Activity:
|
||||
|
|
6
data/interfaces/default/css/bootstrap-select.min.css
vendored
Normal file
6
data/interfaces/default/css/bootstrap-select.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
|
@ -2914,7 +2914,7 @@ a .home-platforms-list-cover-face:hover
|
|||
margin-bottom: -20px;
|
||||
width: 100%;
|
||||
max-width: 1750px;
|
||||
overflow: hidden;
|
||||
display: flow-root;
|
||||
}
|
||||
.table-card-back td {
|
||||
font-size: 12px;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
<%inherit file="base.html"/>
|
||||
|
||||
<%def name="headIncludes()">
|
||||
<link rel="stylesheet" href="${http_root}css/bootstrap-select.min.css">
|
||||
<link rel="stylesheet" href="${http_root}css/dataTables.bootstrap.min.css">
|
||||
<link rel="stylesheet" href="${http_root}css/tautulli-dataTables.css">
|
||||
</%def>
|
||||
|
@ -14,9 +15,7 @@
|
|||
<div class="button-bar">
|
||||
<div class="btn-group" id="user-selection">
|
||||
<label>
|
||||
<select name="graph-user" id="graph-user" class="btn" style="color: inherit;">
|
||||
<option value="">All Users</option>
|
||||
<option disabled>────────────</option>
|
||||
<select name="graph-user" id="graph-user" multiple>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
|
@ -225,6 +224,7 @@
|
|||
</%def>
|
||||
|
||||
<%def name="javascriptIncludes()">
|
||||
<script src="${http_root}js/bootstrap-select.min.js"></script>
|
||||
<script src="${http_root}js/highcharts.min.js"></script>
|
||||
<script src="${http_root}js/jquery.dataTables.min.js"></script>
|
||||
<script src="${http_root}js/dataTables.bootstrap.min.js"></script>
|
||||
|
@ -373,14 +373,35 @@
|
|||
type: 'get',
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
var select = $('#graph-user');
|
||||
let select = $('#graph-user');
|
||||
let by_id = {};
|
||||
data.sort(function(a, b) {
|
||||
return a.friendly_name.localeCompare(b.friendly_name);
|
||||
});
|
||||
data.forEach(function(item) {
|
||||
select.append('<option value="' + item.user_id + '">' +
|
||||
item.friendly_name + '</option>');
|
||||
by_id[item.user_id] = item.friendly_name;
|
||||
});
|
||||
select.selectpicker({
|
||||
countSelectedText: function(sel, total) {
|
||||
if (sel === 0 || sel === total) {
|
||||
return 'All users';
|
||||
} else if (sel > 1) {
|
||||
return sel + ' users';
|
||||
} else {
|
||||
return select.val().map(function(id) {
|
||||
return by_id[id];
|
||||
}).join(', ');
|
||||
}
|
||||
},
|
||||
style: 'btn-dark',
|
||||
actionsBox: true,
|
||||
selectedTextFormat: 'count',
|
||||
noneSelectedText: 'All users'
|
||||
});
|
||||
select.selectpicker('render');
|
||||
select.selectpicker('selectAll');
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -602,11 +623,6 @@
|
|||
$('#nav-tabs-total').tab('show');
|
||||
}
|
||||
|
||||
// Set initial state
|
||||
if (current_tab === '#tabs-plays') { loadGraphsTab1(current_day_range, yaxis); }
|
||||
if (current_tab === '#tabs-stream') { loadGraphsTab2(current_day_range, yaxis); }
|
||||
if (current_tab === '#tabs-total') { loadGraphsTab3(current_month_range, yaxis); }
|
||||
|
||||
// Tab1 opened
|
||||
$('#nav-tabs-plays').on('shown.bs.tab', function (e) {
|
||||
e.preventDefault();
|
||||
|
@ -652,9 +668,20 @@
|
|||
$('.months').text(current_month_range);
|
||||
});
|
||||
|
||||
let graph_user_last_id = undefined;
|
||||
|
||||
// User changed
|
||||
$('#graph-user').on('change', function() {
|
||||
selected_user_id = $(this).val() || null;
|
||||
let val = $(this).val();
|
||||
if (val.length === 0 || val.length === $(this).children().length) {
|
||||
selected_user_id = null; // if all users are selected, just send an empty list
|
||||
} else {
|
||||
selected_user_id = val.join(",");
|
||||
}
|
||||
if (selected_user_id === graph_user_last_id) {
|
||||
return;
|
||||
}
|
||||
graph_user_last_id = selected_user_id;
|
||||
if (current_tab === '#tabs-plays') { loadGraphsTab1(current_day_range, yaxis); }
|
||||
if (current_tab === '#tabs-stream') { loadGraphsTab2(current_day_range, yaxis); }
|
||||
if (current_tab === '#tabs-total') { loadGraphsTab3(current_month_range, yaxis); }
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
<%inherit file="base.html"/>
|
||||
|
||||
<%def name="headIncludes()">
|
||||
<link rel="stylesheet" href="${http_root}css/bootstrap-select.min.css">
|
||||
<link rel="stylesheet" href="${http_root}css/dataTables.bootstrap.min.css">
|
||||
<link rel="stylesheet" href="${http_root}css/dataTables.colVis.css">
|
||||
<link rel="stylesheet" href="${http_root}css/tautulli-dataTables.css">
|
||||
|
@ -31,9 +32,7 @@
|
|||
% if _session['user_group'] == 'admin':
|
||||
<div class="btn-group" id="user-selection">
|
||||
<label>
|
||||
<select name="history-user" id="history-user" class="btn" style="color: inherit;">
|
||||
<option value="">All Users</option>
|
||||
<option disabled>────────────</option>
|
||||
<select name="history-user" id="history-user" multiple>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
|
@ -121,6 +120,7 @@
|
|||
</%def>
|
||||
|
||||
<%def name="javascriptIncludes()">
|
||||
<script src="${http_root}js/bootstrap-select.min.js"></script>
|
||||
<script src="${http_root}js/jquery.dataTables.min.js"></script>
|
||||
<script src="${http_root}js/dataTables.colVis.js"></script>
|
||||
<script src="${http_root}js/dataTables.bootstrap.min.js"></script>
|
||||
|
@ -134,17 +134,40 @@
|
|||
type: 'GET',
|
||||
dataType: 'json',
|
||||
success: function (data) {
|
||||
var select = $('#history-user');
|
||||
let select = $('#history-user');
|
||||
let by_id = {};
|
||||
data.sort(function (a, b) {
|
||||
return a.friendly_name.localeCompare(b.friendly_name);
|
||||
});
|
||||
data.forEach(function (item) {
|
||||
select.append('<option value="' + item.user_id + '">' +
|
||||
item.friendly_name + '</option>');
|
||||
by_id[item.user_id] = item.friendly_name;
|
||||
});
|
||||
select.selectpicker({
|
||||
countSelectedText: function(sel, total) {
|
||||
if (sel === 0 || sel === total) {
|
||||
return 'All users';
|
||||
} else if (sel > 1) {
|
||||
return sel + ' users';
|
||||
} else {
|
||||
return select.val().map(function(id) {
|
||||
return by_id[id];
|
||||
}).join(', ');
|
||||
}
|
||||
},
|
||||
style: 'btn-dark',
|
||||
actionsBox: true,
|
||||
selectedTextFormat: 'count',
|
||||
noneSelectedText: 'All users'
|
||||
});
|
||||
select.selectpicker('render');
|
||||
select.selectpicker('selectAll');
|
||||
}
|
||||
});
|
||||
|
||||
let history_user_last_id = undefined;
|
||||
|
||||
function loadHistoryTable(media_type, transcode_decision, selected_user_id) {
|
||||
history_table_options.ajax = {
|
||||
url: 'get_history',
|
||||
|
@ -187,7 +210,16 @@
|
|||
});
|
||||
|
||||
$('#history-user').on('change', function () {
|
||||
selected_user_id = $(this).val() || null;
|
||||
let val = $(this).val();
|
||||
if (val.length === 0 || val.length === $(this).children().length) {
|
||||
selected_user_id = null; // if all users are selected, just send an empty list
|
||||
} else {
|
||||
selected_user_id = val.join(",");
|
||||
}
|
||||
if (selected_user_id === history_user_last_id) {
|
||||
return;
|
||||
}
|
||||
history_user_last_id = selected_user_id;
|
||||
history_table.draw();
|
||||
});
|
||||
}
|
||||
|
|
9
data/interfaces/default/js/bootstrap-select.min.js
vendored
Normal file
9
data/interfaces/default/js/bootstrap-select.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
|
@ -132,12 +132,6 @@
|
|||
</label>
|
||||
<p class="help-block">Change the "<em>Play by day of week</em>" graph to start on Monday. Default is start on Sunday.</p>
|
||||
</div>
|
||||
<div class="checkbox advanced-setting">
|
||||
<label>
|
||||
<input type="checkbox" id="group_history_tables" name="group_history_tables" value="1" ${config['group_history_tables']}> Group Play History
|
||||
</label>
|
||||
<p class="help-block">Group play history for the same item and user as a single entry when progress is less than the watched percent.</p>
|
||||
</div>
|
||||
<div class="checkbox advanced-setting">
|
||||
<label>
|
||||
<input type="checkbox" id="history_table_activity" name="history_table_activity" value="1" ${config['history_table_activity']}> Current Activity in History Tables
|
||||
|
@ -227,6 +221,25 @@
|
|||
</div>
|
||||
<p class="help-block">Decide whether to use end credits markers to determine the 'watched' state of video items. When markers are not available the selected threshold percentage will be used.</p>
|
||||
</div>
|
||||
<div class="checkbox advanced-setting">
|
||||
<label>
|
||||
<input type="checkbox" id="group_history_tables" name="group_history_tables" value="1" ${config['group_history_tables']}> Group Play History
|
||||
</label>
|
||||
<p class="help-block">Group play history for the same item and user as a single entry when progress is less than the watched percent.</p>
|
||||
</div>
|
||||
<div class="form-group advanced-setting">
|
||||
<label>Regroup Play History</label>
|
||||
<p class="help-block">
|
||||
Fix grouping of play history in the database.<br />
|
||||
</p>
|
||||
<div class="row">
|
||||
<div class="col-md-4">
|
||||
<div class="btn-group">
|
||||
<button class="btn btn-form" type="button" id="regroup_history">Regroup</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group advanced-setting">
|
||||
<label>Flush Temporary Sessions</label>
|
||||
<p class="help-block">
|
||||
|
@ -2484,6 +2497,12 @@ $(document).ready(function() {
|
|||
confirmAjaxCall(url, msg);
|
||||
});
|
||||
|
||||
$("#regroup_history").click(function () {
|
||||
var msg = 'Are you sure you want to regroup play history in the database?<br /><br /><strong>This make take a long time for large databases.<br />Regrouping will continue in the background.</strong>';
|
||||
var url = 'regroup_history';
|
||||
confirmAjaxCall(url, msg);
|
||||
});
|
||||
|
||||
$("#delete_temp_sessions").click(function () {
|
||||
var msg = 'Are you sure you want to flush the temporary sessions?<br /><br /><strong>This will reset all currently active sessions.</strong>';
|
||||
var url = 'delete_temp_sessions';
|
||||
|
|
|
@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
|||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.11.2"
|
||||
__version__ = "4.12.2"
|
||||
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
|
||||
# Use of this source code is governed by the MIT license.
|
||||
__license__ = "MIT"
|
||||
|
@ -38,11 +38,13 @@ from .builder import (
|
|||
builder_registry,
|
||||
ParserRejectedMarkup,
|
||||
XMLParsedAsHTMLWarning,
|
||||
HTMLParserTreeBuilder
|
||||
)
|
||||
from .dammit import UnicodeDammit
|
||||
from .element import (
|
||||
CData,
|
||||
Comment,
|
||||
CSS,
|
||||
DEFAULT_OUTPUT_ENCODING,
|
||||
Declaration,
|
||||
Doctype,
|
||||
|
@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
|
|||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
|
||||
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
element_classes=None, **kwargs):
|
||||
|
@ -348,25 +350,49 @@ class BeautifulSoup(Tag):
|
|||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
"""Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
|
||||
copy = type(self)(
|
||||
self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
|
||||
)
|
||||
def _clone(self):
|
||||
"""Create a new BeautifulSoup object with the same TreeBuilder,
|
||||
but not associated with any markup.
|
||||
|
||||
# Although we encoded the tree to UTF-8, that may not have
|
||||
# been the encoding of the original markup. Set the copy's
|
||||
# .original_encoding to reflect the original object's
|
||||
# .original_encoding.
|
||||
copy.original_encoding = self.original_encoding
|
||||
return copy
|
||||
This is the first step of the deepcopy process.
|
||||
"""
|
||||
clone = type(self)("", None, self.builder)
|
||||
|
||||
# Keep track of the encoding of the original document,
|
||||
# since we won't be parsing it again.
|
||||
clone.original_encoding = self.original_encoding
|
||||
return clone
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
|
||||
d['builder'] = None
|
||||
d['builder'] = type(self.builder)
|
||||
# Store the contents as a Unicode string.
|
||||
d['contents'] = []
|
||||
d['markup'] = self.decode()
|
||||
|
||||
# If _most_recent_element is present, it's a Tag object left
|
||||
# over from initial parse. It might not be picklable and we
|
||||
# don't need it.
|
||||
if '_most_recent_element' in d:
|
||||
del d['_most_recent_element']
|
||||
return d
|
||||
|
||||
def __setstate__(self, state):
|
||||
# If necessary, restore the TreeBuilder by looking it up.
|
||||
self.__dict__ = state
|
||||
if isinstance(self.builder, type):
|
||||
self.builder = self.builder()
|
||||
elif not self.builder:
|
||||
# We don't know which builder was used to build this
|
||||
# parse tree, so use a default we know is always available.
|
||||
self.builder = HTMLParserTreeBuilder()
|
||||
self.builder.soup = self
|
||||
self.reset()
|
||||
self._feed()
|
||||
return state
|
||||
|
||||
|
||||
@classmethod
|
||||
def _decode_markup(cls, markup):
|
||||
|
@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
|
|||
self.open_tag_counter = Counter()
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.string_container_stack = []
|
||||
self._most_recent_element = None
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
|
||||
|
@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
|
|||
|
||||
def decode(self, pretty_print=False,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
formatter="minimal", iterator=None):
|
||||
"""Returns a string or Unicode representation of the parse tree
|
||||
as an HTML or XML document.
|
||||
|
||||
|
@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
|
|||
else:
|
||||
indent_level = 0
|
||||
return prefix + super(BeautifulSoup, self).decode(
|
||||
indent_level, eventual_encoding, formatter)
|
||||
indent_level, eventual_encoding, formatter, iterator)
|
||||
|
||||
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
|
||||
_s = BeautifulSoup
|
||||
|
|
|
@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
|
|||
|
||||
from bs4.builder import (
|
||||
DetectsXMLParsedAsHTML,
|
||||
ParserRejectedMarkup,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
|
@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
|
|||
|
||||
self._initialize_xml_detector()
|
||||
|
||||
def error(self, message):
|
||||
# NOTE: This method is required so long as Python 3.9 is
|
||||
# supported. The corresponding code is removed from HTMLParser
|
||||
# in 3.5, but not removed from ParserBase until 3.10.
|
||||
# https://github.com/python/cpython/issues/76025
|
||||
#
|
||||
# The original implementation turned the error into a warning,
|
||||
# but in every case I discovered, this made HTMLParser
|
||||
# immediately crash with an error message that was less
|
||||
# helpful than the warning. The new implementation makes it
|
||||
# more clear that html.parser just can't parse this
|
||||
# markup. The 3.10 implementation does the same, though it
|
||||
# raises AssertionError rather than calling a method. (We
|
||||
# catch this error and wrap it in a ParserRejectedMarkup.)
|
||||
raise ParserRejectedMarkup(message)
|
||||
|
||||
def handle_startendtag(self, name, attrs):
|
||||
"""Handle an incoming empty-element tag.
|
||||
|
||||
|
@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
|||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
parser.feed(markup)
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except AssertionError as e:
|
||||
# html.parser raises AssertionError in rare cases to
|
||||
# indicate a fatal problem with the markup, especially
|
||||
# when there's an error in the doctype declaration.
|
||||
raise ParserRejectedMarkup(e)
|
||||
parser.close()
|
||||
parser.already_closed_empty_element = []
|
||||
|
|
280
lib/bs4/css.py
Normal file
280
lib/bs4/css.py
Normal file
|
@ -0,0 +1,280 @@
|
|||
"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
|
||||
|
||||
import warnings
|
||||
try:
|
||||
import soupsieve
|
||||
except ImportError as e:
|
||||
soupsieve = None
|
||||
warnings.warn(
|
||||
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
||||
)
|
||||
|
||||
|
||||
class CSS(object):
|
||||
"""A proxy object against the soupsieve library, to simplify its
|
||||
CSS selector API.
|
||||
|
||||
Acquire this object through the .css attribute on the
|
||||
BeautifulSoup object, or on the Tag you want to use as the
|
||||
starting point for a CSS selector.
|
||||
|
||||
The main advantage of doing this is that the tag to be selected
|
||||
against doesn't need to be explicitly specified in the function
|
||||
calls, since it's already scoped to a tag.
|
||||
"""
|
||||
|
||||
def __init__(self, tag, api=soupsieve):
|
||||
"""Constructor.
|
||||
|
||||
You don't need to instantiate this class yourself; instead,
|
||||
access the .css attribute on the BeautifulSoup object, or on
|
||||
the Tag you want to use as the starting point for your CSS
|
||||
selector.
|
||||
|
||||
:param tag: All CSS selectors will use this as their starting
|
||||
point.
|
||||
|
||||
:param api: A plug-in replacement for the soupsieve module,
|
||||
designed mainly for use in tests.
|
||||
"""
|
||||
if api is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot execute CSS selectors because the soupsieve package is not installed."
|
||||
)
|
||||
self.api = api
|
||||
self.tag = tag
|
||||
|
||||
def escape(self, ident):
|
||||
"""Escape a CSS identifier.
|
||||
|
||||
This is a simple wrapper around soupselect.escape(). See the
|
||||
documentation for that function for more information.
|
||||
"""
|
||||
if soupsieve is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot escape CSS identifiers because the soupsieve package is not installed."
|
||||
)
|
||||
return self.api.escape(ident)
|
||||
|
||||
def _ns(self, ns, select):
|
||||
"""Normalize a dictionary of namespaces."""
|
||||
if not isinstance(select, self.api.SoupSieve) and ns is None:
|
||||
# If the selector is a precompiled pattern, it already has
|
||||
# a namespace context compiled in, which cannot be
|
||||
# replaced.
|
||||
ns = self.tag._namespaces
|
||||
return ns
|
||||
|
||||
def _rs(self, results):
|
||||
"""Normalize a list of results to a Resultset.
|
||||
|
||||
A ResultSet is more consistent with the rest of Beautiful
|
||||
Soup's API, and ResultSet.__getattr__ has a helpful error
|
||||
message if you try to treat a list of results as a single
|
||||
result (a common mistake).
|
||||
"""
|
||||
# Import here to avoid circular import
|
||||
from bs4.element import ResultSet
|
||||
return ResultSet(None, results)
|
||||
|
||||
def compile(self, select, namespaces=None, flags=0, **kwargs):
|
||||
"""Pre-compile a selector and return the compiled object.
|
||||
|
||||
:param selector: A CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.compile() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.compile() method.
|
||||
|
||||
:return: A precompiled selector object.
|
||||
:rtype: soupsieve.SoupSieve
|
||||
"""
|
||||
return self.api.compile(
|
||||
select, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def select_one(self, select, namespaces=None, flags=0, **kwargs):
|
||||
"""Perform a CSS selection operation on the current Tag and return the
|
||||
first result.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the soupsieve.select_one()
|
||||
method.
|
||||
|
||||
:param selector: A CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.select_one() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.select_one() method.
|
||||
|
||||
:return: A Tag, or None if the selector has no match.
|
||||
:rtype: bs4.element.Tag
|
||||
|
||||
"""
|
||||
return self.api.select_one(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
|
||||
"""Perform a CSS selection operation on the current Tag.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the soupsieve.select()
|
||||
method.
|
||||
|
||||
:param selector: A string containing a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:return: A ResultSet of Tag objects.
|
||||
:rtype: bs4.element.ResultSet
|
||||
|
||||
"""
|
||||
if limit is None:
|
||||
limit = 0
|
||||
|
||||
return self._rs(
|
||||
self.api.select(
|
||||
select, self.tag, self._ns(namespaces, select), limit, flags,
|
||||
**kwargs
|
||||
)
|
||||
)
|
||||
|
||||
def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
|
||||
"""Perform a CSS selection operation on the current Tag.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the soupsieve.iselect()
|
||||
method. It is the same as select(), but it returns a generator
|
||||
instead of a list.
|
||||
|
||||
:param selector: A string containing a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.iselect() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.iselect() method.
|
||||
|
||||
:return: A generator
|
||||
:rtype: types.GeneratorType
|
||||
"""
|
||||
return self.api.iselect(
|
||||
select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
|
||||
)
|
||||
|
||||
def closest(self, select, namespaces=None, flags=0, **kwargs):
|
||||
"""Find the Tag closest to this one that matches the given selector.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the soupsieve.closest()
|
||||
method.
|
||||
|
||||
:param selector: A string containing a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.closest() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.closest() method.
|
||||
|
||||
:return: A Tag, or None if there is no match.
|
||||
:rtype: bs4.Tag
|
||||
|
||||
"""
|
||||
return self.api.closest(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def match(self, select, namespaces=None, flags=0, **kwargs):
|
||||
"""Check whether this Tag matches the given CSS selector.
|
||||
|
||||
This uses the Soup Sieve library. For more information, see
|
||||
that library's documentation for the soupsieve.match()
|
||||
method.
|
||||
|
||||
:param: a CSS selector.
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.match() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.match() method.
|
||||
|
||||
:return: True if this Tag matches the selector; False otherwise.
|
||||
:rtype: bool
|
||||
"""
|
||||
return self.api.match(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
|
||||
def filter(self, select, namespaces=None, flags=0, **kwargs):
|
||||
"""Filter this Tag's direct children based on the given CSS selector.
|
||||
|
||||
This uses the Soup Sieve library. It works the same way as
|
||||
passing this Tag into that library's soupsieve.filter()
|
||||
method. More information, for more information see the
|
||||
documentation for soupsieve.filter().
|
||||
|
||||
:param namespaces: A dictionary mapping namespace prefixes
|
||||
used in the CSS selector to namespace URIs. By default,
|
||||
Beautiful Soup will pass in the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param flags: Flags to be passed into Soup Sieve's
|
||||
soupsieve.filter() method.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.filter() method.
|
||||
|
||||
:return: A ResultSet of Tag objects.
|
||||
:rtype: bs4.element.ResultSet
|
||||
|
||||
"""
|
||||
return self._rs(
|
||||
self.api.filter(
|
||||
select, self.tag, self._ns(namespaces, select), flags, **kwargs
|
||||
)
|
||||
)
|
|
@ -59,21 +59,6 @@ def diagnose(data):
|
|||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
|
||||
print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
|
||||
return
|
||||
else:
|
||||
try:
|
||||
if os.path.exists(data):
|
||||
print(('"%s" looks like a filename. Reading data from the file.' % data))
|
||||
with open(data) as fp:
|
||||
data = fp.read()
|
||||
except ValueError:
|
||||
# This can happen on some platforms when the 'filename' is
|
||||
# too long. Assume it's data and not a filename.
|
||||
pass
|
||||
print("")
|
||||
|
||||
for parser in basic_parsers:
|
||||
print(("Trying to parse your markup with %s" % parser))
|
||||
|
|
|
@ -8,14 +8,8 @@ except ImportError as e:
|
|||
import re
|
||||
import sys
|
||||
import warnings
|
||||
try:
|
||||
import soupsieve
|
||||
except ImportError as e:
|
||||
soupsieve = None
|
||||
warnings.warn(
|
||||
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
||||
)
|
||||
|
||||
from bs4.css import CSS
|
||||
from bs4.formatter import (
|
||||
Formatter,
|
||||
HTMLFormatter,
|
||||
|
@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
|
|||
"string-escape",
|
||||
"string_escape",
|
||||
])
|
||||
|
||||
|
||||
|
||||
class NamespacedAttribute(str):
|
||||
"""A namespaced string (e.g. 'xml:lang') that remembers the namespace
|
||||
('xml') and the name ('lang') that were used to create it.
|
||||
"""
|
||||
|
||||
|
||||
def __new__(cls, prefix, name=None, namespace=None):
|
||||
if not name:
|
||||
# This is the default namespace. Its name "has no value"
|
||||
|
@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
|
|||
return match.group(1) + encoding
|
||||
return self.CHARSET_RE.sub(rewrite, self.original_value)
|
||||
|
||||
|
||||
|
||||
class PageElement(object):
|
||||
"""Contains the navigational information for some part of the page:
|
||||
that is, its current location in the parse tree.
|
||||
|
||||
NavigableString, Tag, etc. are all subclasses of PageElement.
|
||||
"""
|
||||
|
||||
|
||||
# In general, we can't tell just by looking at an element whether
|
||||
# it's contained in an XML document or an HTML document. But for
|
||||
# Tags (q.v.) we can store this information at parse time.
|
||||
known_xml = None
|
||||
|
||||
def setup(self, parent=None, previous_element=None, next_element=None,
|
||||
previous_sibling=None, next_sibling=None):
|
||||
"""Sets up the initial relations between this element and
|
||||
|
@ -163,7 +162,7 @@ class PageElement(object):
|
|||
|
||||
:param previous_element: The element parsed immediately before
|
||||
this one.
|
||||
|
||||
|
||||
:param next_element: The element parsed immediately before
|
||||
this one.
|
||||
|
||||
|
@ -257,11 +256,11 @@ class PageElement(object):
|
|||
default = object()
|
||||
def _all_strings(self, strip=False, types=default):
|
||||
"""Yield all strings of certain classes, possibly stripping them.
|
||||
|
||||
|
||||
This is implemented differently in Tag and NavigableString.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@property
|
||||
def stripped_strings(self):
|
||||
"""Yield all strings in this PageElement, stripping them first.
|
||||
|
@ -294,11 +293,11 @@ class PageElement(object):
|
|||
strip, types=types)])
|
||||
getText = get_text
|
||||
text = property(get_text)
|
||||
|
||||
|
||||
def replace_with(self, *args):
|
||||
"""Replace this PageElement with one or more PageElements, keeping the
|
||||
"""Replace this PageElement with one or more PageElements, keeping the
|
||||
rest of the tree the same.
|
||||
|
||||
|
||||
:param args: One or more PageElements.
|
||||
:return: `self`, no longer part of the tree.
|
||||
"""
|
||||
|
@ -410,7 +409,7 @@ class PageElement(object):
|
|||
This works the same way as `list.insert`.
|
||||
|
||||
:param position: The numeric position that should be occupied
|
||||
in `self.children` by the new PageElement.
|
||||
in `self.children` by the new PageElement.
|
||||
:param new_child: A PageElement.
|
||||
"""
|
||||
if new_child is None:
|
||||
|
@ -546,7 +545,7 @@ class PageElement(object):
|
|||
"Element has no parent, so 'after' has no meaning.")
|
||||
if any(x is self for x in args):
|
||||
raise ValueError("Can't insert an element after itself.")
|
||||
|
||||
|
||||
offset = 0
|
||||
for successor in args:
|
||||
# Extract first so that the index won't be screwed up if they
|
||||
|
@ -912,7 +911,7 @@ class PageElement(object):
|
|||
:rtype: bool
|
||||
"""
|
||||
return getattr(self, '_decomposed', False) or False
|
||||
|
||||
|
||||
# Old non-property versions of the generators, for backwards
|
||||
# compatibility with BS3.
|
||||
def nextGenerator(self):
|
||||
|
@ -936,16 +935,11 @@ class NavigableString(str, PageElement):
|
|||
|
||||
When Beautiful Soup parses the markup <b>penguin</b>, it will
|
||||
create a NavigableString for the string "penguin".
|
||||
"""
|
||||
"""
|
||||
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
# We can't tell just by looking at a string whether it's contained
|
||||
# in an XML document or an HTML document.
|
||||
|
||||
known_xml = None
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
|
||||
|
@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
|
|||
u.setup()
|
||||
return u
|
||||
|
||||
def __copy__(self):
|
||||
def __deepcopy__(self, memo, recursive=False):
|
||||
"""A copy of a NavigableString has the same contents and class
|
||||
as the original, but it is not connected to the parse tree.
|
||||
|
||||
:param recursive: This parameter is ignored; it's only defined
|
||||
so that NavigableString.__deepcopy__ implements the same
|
||||
signature as Tag.__deepcopy__.
|
||||
"""
|
||||
return type(self)(self)
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a NavigableString can only be a deep copy, because
|
||||
only one PageElement can occupy a given place in a parse tree.
|
||||
"""
|
||||
return self.__deepcopy__({})
|
||||
|
||||
def __getnewargs__(self):
|
||||
return (str(self),)
|
||||
|
||||
|
@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString):
|
|||
as comments (the Comment class) and CDATA blocks (the CData
|
||||
class).
|
||||
"""
|
||||
|
||||
|
||||
PREFIX = ''
|
||||
SUFFIX = ''
|
||||
|
||||
|
||||
def output_ready(self, formatter=None):
|
||||
"""Make this string ready for output by adding any subclass-specific
|
||||
prefix or suffix.
|
||||
|
@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString):
|
|||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Script(NavigableString):
|
||||
"""A NavigableString representing an executable script (probably
|
||||
Javascript).
|
||||
|
@ -1250,7 +1254,7 @@ class Tag(PageElement):
|
|||
if ((not builder or builder.store_line_numbers)
|
||||
and (sourceline is not None or sourcepos is not None)):
|
||||
self.sourceline = sourceline
|
||||
self.sourcepos = sourcepos
|
||||
self.sourcepos = sourcepos
|
||||
if attrs is None:
|
||||
attrs = {}
|
||||
elif attrs:
|
||||
|
@ -1308,13 +1312,49 @@ class Tag(PageElement):
|
|||
self.interesting_string_types = builder.string_containers[self.name]
|
||||
else:
|
||||
self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
|
||||
|
||||
|
||||
parserClass = _alias("parser_class") # BS3
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a Tag is a new Tag, unconnected to the parse tree.
|
||||
def __deepcopy__(self, memo, recursive=True):
|
||||
"""A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
|
||||
Its contents are a copy of the old Tag's contents.
|
||||
"""
|
||||
clone = self._clone()
|
||||
|
||||
if recursive:
|
||||
# Clone this tag's descendants recursively, but without
|
||||
# making any recursive function calls.
|
||||
tag_stack = [clone]
|
||||
for event, element in self._event_stream(self.descendants):
|
||||
if event is Tag.END_ELEMENT_EVENT:
|
||||
# Stop appending incoming Tags to the Tag that was
|
||||
# just closed.
|
||||
tag_stack.pop()
|
||||
else:
|
||||
descendant_clone = element.__deepcopy__(
|
||||
memo, recursive=False
|
||||
)
|
||||
# Add to its parent's .contents
|
||||
tag_stack[-1].append(descendant_clone)
|
||||
|
||||
if event is Tag.START_ELEMENT_EVENT:
|
||||
# Add the Tag itself to the stack so that its
|
||||
# children will be .appended to it.
|
||||
tag_stack.append(descendant_clone)
|
||||
return clone
|
||||
|
||||
def __copy__(self):
|
||||
"""A copy of a Tag must always be a deep copy, because a Tag's
|
||||
children can only have one parent at a time.
|
||||
"""
|
||||
return self.__deepcopy__({})
|
||||
|
||||
def _clone(self):
|
||||
"""Create a new Tag just like this one, but with no
|
||||
contents and unattached to any parse tree.
|
||||
|
||||
This is the first step in the deepcopy process.
|
||||
"""
|
||||
clone = type(self)(
|
||||
None, self.builder, self.name, self.namespace,
|
||||
self.prefix, self.attrs, is_xml=self._is_xml,
|
||||
|
@ -1326,8 +1366,6 @@ class Tag(PageElement):
|
|||
)
|
||||
for attr in ('can_be_empty_element', 'hidden'):
|
||||
setattr(clone, attr, getattr(self, attr))
|
||||
for child in self.contents:
|
||||
clone.append(child.__copy__())
|
||||
return clone
|
||||
|
||||
@property
|
||||
|
@ -1433,7 +1471,7 @@ class Tag(PageElement):
|
|||
i.contents = []
|
||||
i._decomposed = True
|
||||
i = n
|
||||
|
||||
|
||||
def clear(self, decompose=False):
|
||||
"""Wipe out all children of this PageElement by calling extract()
|
||||
on them.
|
||||
|
@ -1521,7 +1559,7 @@ class Tag(PageElement):
|
|||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return value
|
||||
|
||||
|
||||
def has_attr(self, key):
|
||||
"""Does this PageElement have an attribute with the given name?"""
|
||||
return key in self.attrs
|
||||
|
@ -1608,7 +1646,7 @@ class Tag(PageElement):
|
|||
def __repr__(self, encoding="unicode-escape"):
|
||||
"""Renders this PageElement as a string.
|
||||
|
||||
:param encoding: The encoding to use (Python 2 only).
|
||||
:param encoding: The encoding to use (Python 2 only).
|
||||
TODO: This is now ignored and a warning should be issued
|
||||
if a value is provided.
|
||||
:return: A (Unicode) string.
|
||||
|
@ -1650,106 +1688,212 @@ class Tag(PageElement):
|
|||
|
||||
def decode(self, indent_level=None,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Render a Unicode representation of this PageElement and its
|
||||
contents.
|
||||
|
||||
:param indent_level: Each line of the rendering will be
|
||||
indented this many spaces. Used internally in
|
||||
recursive calls while pretty-printing.
|
||||
:param eventual_encoding: The tag is destined to be
|
||||
encoded into this encoding. This method is _not_
|
||||
responsible for performing that encoding. This information
|
||||
is passed in so that it can be substituted in if the
|
||||
document contains a <META> tag that mentions the document's
|
||||
encoding.
|
||||
:param formatter: A Formatter object, or a string naming one of
|
||||
the standard formatters.
|
||||
"""
|
||||
|
||||
formatter="minimal",
|
||||
iterator=None):
|
||||
pieces = []
|
||||
# First off, turn a non-Formatter `formatter` into a Formatter
|
||||
# object. This will stop the lookup from happening over and
|
||||
# over again.
|
||||
if not isinstance(formatter, Formatter):
|
||||
formatter = self.formatter_for_name(formatter)
|
||||
attributes = formatter.attributes(self)
|
||||
attrs = []
|
||||
for key, val in attributes:
|
||||
if val is None:
|
||||
decoded = key
|
||||
|
||||
if indent_level is True:
|
||||
indent_level = 0
|
||||
|
||||
# The currently active tag that put us into string literal
|
||||
# mode. Until this element is closed, children will be treated
|
||||
# as string literals and not pretty-printed. String literal
|
||||
# mode is turned on immediately after this tag begins, and
|
||||
# turned off immediately before it's closed. This means there
|
||||
# will be whitespace before and after the tag itself.
|
||||
string_literal_tag = None
|
||||
|
||||
for event, element in self._event_stream(iterator):
|
||||
if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
|
||||
piece = element._format_tag(
|
||||
eventual_encoding, formatter, opening=True
|
||||
)
|
||||
elif event is Tag.END_ELEMENT_EVENT:
|
||||
piece = element._format_tag(
|
||||
eventual_encoding, formatter, opening=False
|
||||
)
|
||||
if indent_level is not None:
|
||||
indent_level -= 1
|
||||
else:
|
||||
if isinstance(val, list) or isinstance(val, tuple):
|
||||
val = ' '.join(val)
|
||||
elif not isinstance(val, str):
|
||||
val = str(val)
|
||||
elif (
|
||||
isinstance(val, AttributeValueWithCharsetSubstitution)
|
||||
and eventual_encoding is not None
|
||||
):
|
||||
val = val.encode(eventual_encoding)
|
||||
piece = element.output_ready(formatter)
|
||||
|
||||
text = formatter.attribute_value(val)
|
||||
decoded = (
|
||||
str(key) + '='
|
||||
+ formatter.quoted_attribute_value(text))
|
||||
attrs.append(decoded)
|
||||
close = ''
|
||||
closeTag = ''
|
||||
# Now we need to apply the 'prettiness' -- extra
|
||||
# whitespace before and/or after this tag. This can get
|
||||
# complicated because certain tags, like <pre> and
|
||||
# <script>, can't be prettified, since adding whitespace would
|
||||
# change the meaning of the content.
|
||||
|
||||
# The default behavior is to add whitespace before and
|
||||
# after an element when string literal mode is off, and to
|
||||
# leave things as they are when string literal mode is on.
|
||||
if string_literal_tag:
|
||||
indent_before = indent_after = False
|
||||
else:
|
||||
indent_before = indent_after = True
|
||||
|
||||
# The only time the behavior is more complex than that is
|
||||
# when we encounter an opening or closing tag that might
|
||||
# put us into or out of string literal mode.
|
||||
if (event is Tag.START_ELEMENT_EVENT
|
||||
and not string_literal_tag
|
||||
and not element._should_pretty_print()):
|
||||
# We are about to enter string literal mode. Add
|
||||
# whitespace before this tag, but not after. We
|
||||
# will stay in string literal mode until this tag
|
||||
# is closed.
|
||||
indent_before = True
|
||||
indent_after = False
|
||||
string_literal_tag = element
|
||||
elif (event is Tag.END_ELEMENT_EVENT
|
||||
and element is string_literal_tag):
|
||||
# We are about to exit string literal mode by closing
|
||||
# the tag that sent us into that mode. Add whitespace
|
||||
# after this tag, but not before.
|
||||
indent_before = False
|
||||
indent_after = True
|
||||
string_literal_tag = None
|
||||
|
||||
# Now we know whether to add whitespace before and/or
|
||||
# after this element.
|
||||
if indent_level is not None:
|
||||
if (indent_before or indent_after):
|
||||
if isinstance(element, NavigableString):
|
||||
piece = piece.strip()
|
||||
if piece:
|
||||
piece = self._indent_string(
|
||||
piece, indent_level, formatter,
|
||||
indent_before, indent_after
|
||||
)
|
||||
if event == Tag.START_ELEMENT_EVENT:
|
||||
indent_level += 1
|
||||
pieces.append(piece)
|
||||
return "".join(pieces)
|
||||
|
||||
# Names for the different events yielded by _event_stream
|
||||
START_ELEMENT_EVENT = object()
|
||||
END_ELEMENT_EVENT = object()
|
||||
EMPTY_ELEMENT_EVENT = object()
|
||||
STRING_ELEMENT_EVENT = object()
|
||||
|
||||
def _event_stream(self, iterator=None):
|
||||
"""Yield a sequence of events that can be used to reconstruct the DOM
|
||||
for this element.
|
||||
|
||||
This lets us recreate the nested structure of this element
|
||||
(e.g. when formatting it as a string) without using recursive
|
||||
method calls.
|
||||
|
||||
This is similar in concept to the SAX API, but it's a simpler
|
||||
interface designed for internal use. The events are different
|
||||
from SAX and the arguments associated with the events are Tags
|
||||
and other Beautiful Soup objects.
|
||||
|
||||
:param iterator: An alternate iterator to use when traversing
|
||||
the tree.
|
||||
"""
|
||||
tag_stack = []
|
||||
|
||||
iterator = iterator or self.self_and_descendants
|
||||
|
||||
for c in iterator:
|
||||
# If the parent of the element we're about to yield is not
|
||||
# the tag currently on the stack, it means that the tag on
|
||||
# the stack closed before this element appeared.
|
||||
while tag_stack and c.parent != tag_stack[-1]:
|
||||
now_closed_tag = tag_stack.pop()
|
||||
yield Tag.END_ELEMENT_EVENT, now_closed_tag
|
||||
|
||||
if isinstance(c, Tag):
|
||||
if c.is_empty_element:
|
||||
yield Tag.EMPTY_ELEMENT_EVENT, c
|
||||
else:
|
||||
yield Tag.START_ELEMENT_EVENT, c
|
||||
tag_stack.append(c)
|
||||
continue
|
||||
else:
|
||||
yield Tag.STRING_ELEMENT_EVENT, c
|
||||
|
||||
while tag_stack:
|
||||
now_closed_tag = tag_stack.pop()
|
||||
yield Tag.END_ELEMENT_EVENT, now_closed_tag
|
||||
|
||||
def _indent_string(self, s, indent_level, formatter,
|
||||
indent_before, indent_after):
|
||||
"""Add indentation whitespace before and/or after a string.
|
||||
|
||||
:param s: The string to amend with whitespace.
|
||||
:param indent_level: The indentation level; affects how much
|
||||
whitespace goes before the string.
|
||||
:param indent_before: Whether or not to add whitespace
|
||||
before the string.
|
||||
:param indent_after: Whether or not to add whitespace
|
||||
(a newline) after the string.
|
||||
"""
|
||||
space_before = ''
|
||||
if indent_before and indent_level:
|
||||
space_before = (formatter.indent * indent_level)
|
||||
|
||||
space_after = ''
|
||||
if indent_after:
|
||||
space_after = "\n"
|
||||
|
||||
return space_before + s + space_after
|
||||
|
||||
def _format_tag(self, eventual_encoding, formatter, opening):
|
||||
# A tag starts with the < character (see below).
|
||||
|
||||
# Then the / character, if this is a closing tag.
|
||||
closing_slash = ''
|
||||
if not opening:
|
||||
closing_slash = '/'
|
||||
|
||||
# Then an optional namespace prefix.
|
||||
prefix = ''
|
||||
if self.prefix:
|
||||
prefix = self.prefix + ":"
|
||||
|
||||
if self.is_empty_element:
|
||||
close = formatter.void_element_close_prefix or ''
|
||||
else:
|
||||
closeTag = '</%s%s>' % (prefix, self.name)
|
||||
# Then a list of attribute values, if this is an opening tag.
|
||||
attribute_string = ''
|
||||
if opening:
|
||||
attributes = formatter.attributes(self)
|
||||
attrs = []
|
||||
for key, val in attributes:
|
||||
if val is None:
|
||||
decoded = key
|
||||
else:
|
||||
if isinstance(val, list) or isinstance(val, tuple):
|
||||
val = ' '.join(val)
|
||||
elif not isinstance(val, str):
|
||||
val = str(val)
|
||||
elif (
|
||||
isinstance(val, AttributeValueWithCharsetSubstitution)
|
||||
and eventual_encoding is not None
|
||||
):
|
||||
val = val.encode(eventual_encoding)
|
||||
|
||||
pretty_print = self._should_pretty_print(indent_level)
|
||||
space = ''
|
||||
indent_space = ''
|
||||
if indent_level is not None:
|
||||
indent_space = (formatter.indent * (indent_level - 1))
|
||||
if pretty_print:
|
||||
space = indent_space
|
||||
indent_contents = indent_level + 1
|
||||
else:
|
||||
indent_contents = None
|
||||
contents = self.decode_contents(
|
||||
indent_contents, eventual_encoding, formatter
|
||||
)
|
||||
|
||||
if self.hidden:
|
||||
# This is the 'document root' object.
|
||||
s = contents
|
||||
else:
|
||||
s = []
|
||||
attribute_string = ''
|
||||
text = formatter.attribute_value(val)
|
||||
decoded = (
|
||||
str(key) + '='
|
||||
+ formatter.quoted_attribute_value(text))
|
||||
attrs.append(decoded)
|
||||
if attrs:
|
||||
attribute_string = ' ' + ' '.join(attrs)
|
||||
if indent_level is not None:
|
||||
# Even if this particular tag is not pretty-printed,
|
||||
# we should indent up to the start of the tag.
|
||||
s.append(indent_space)
|
||||
s.append('<%s%s%s%s>' % (
|
||||
prefix, self.name, attribute_string, close))
|
||||
if pretty_print:
|
||||
s.append("\n")
|
||||
s.append(contents)
|
||||
if pretty_print and contents and contents[-1] != "\n":
|
||||
s.append("\n")
|
||||
if pretty_print and closeTag:
|
||||
s.append(space)
|
||||
s.append(closeTag)
|
||||
if indent_level is not None and closeTag and self.next_sibling:
|
||||
# Even if this particular tag is not pretty-printed,
|
||||
# we're now done with the tag, and we should add a
|
||||
# newline if appropriate.
|
||||
s.append("\n")
|
||||
s = ''.join(s)
|
||||
return s
|
||||
|
||||
def _should_pretty_print(self, indent_level):
|
||||
# Then an optional closing slash (for a void element in an
|
||||
# XML document).
|
||||
void_element_closing_slash = ''
|
||||
if self.is_empty_element:
|
||||
void_element_closing_slash = formatter.void_element_close_prefix or ''
|
||||
|
||||
# Put it all together.
|
||||
return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
|
||||
|
||||
def _should_pretty_print(self, indent_level=1):
|
||||
"""Should this tag be pretty-printed?
|
||||
|
||||
Most of them should, but some (such as <pre> in HTML
|
||||
|
@ -1770,7 +1914,7 @@ class Tag(PageElement):
|
|||
a Unicode string will be returned.
|
||||
:param formatter: A Formatter object, or a string naming one of
|
||||
the standard formatters.
|
||||
:return: A Unicode string (if encoding==None) or a bytestring
|
||||
:return: A Unicode string (if encoding==None) or a bytestring
|
||||
(otherwise).
|
||||
"""
|
||||
if encoding is None:
|
||||
|
@ -1800,33 +1944,9 @@ class Tag(PageElement):
|
|||
the standard Formatters.
|
||||
|
||||
"""
|
||||
# First off, turn a string formatter into a Formatter object. This
|
||||
# will stop the lookup from happening over and over again.
|
||||
if not isinstance(formatter, Formatter):
|
||||
formatter = self.formatter_for_name(formatter)
|
||||
return self.decode(indent_level, eventual_encoding, formatter,
|
||||
iterator=self.descendants)
|
||||
|
||||
pretty_print = (indent_level is not None)
|
||||
s = []
|
||||
for c in self:
|
||||
text = None
|
||||
if isinstance(c, NavigableString):
|
||||
text = c.output_ready(formatter)
|
||||
elif isinstance(c, Tag):
|
||||
s.append(c.decode(indent_level, eventual_encoding,
|
||||
formatter))
|
||||
preserve_whitespace = (
|
||||
self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
|
||||
)
|
||||
if text and indent_level and not preserve_whitespace:
|
||||
text = text.strip()
|
||||
if text:
|
||||
if pretty_print and not preserve_whitespace:
|
||||
s.append(formatter.indent * (indent_level - 1))
|
||||
s.append(text)
|
||||
if pretty_print and not preserve_whitespace:
|
||||
s.append("\n")
|
||||
return ''.join(s)
|
||||
|
||||
def encode_contents(
|
||||
self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
|
@ -1922,6 +2042,18 @@ class Tag(PageElement):
|
|||
# return iter() to make the purpose of the method clear
|
||||
return iter(self.contents) # XXX This seems to be untested.
|
||||
|
||||
@property
|
||||
def self_and_descendants(self):
|
||||
"""Iterate over this PageElement and its children in a
|
||||
breadth-first sequence.
|
||||
|
||||
:yield: A sequence of PageElements.
|
||||
"""
|
||||
if not self.hidden:
|
||||
yield self
|
||||
for i in self.descendants:
|
||||
yield i
|
||||
|
||||
@property
|
||||
def descendants(self):
|
||||
"""Iterate over all children of this PageElement in a
|
||||
|
@ -1948,16 +2080,13 @@ class Tag(PageElement):
|
|||
Beautiful Soup will use the prefixes it encountered while
|
||||
parsing the document.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
:param kwargs: Keyword arguments to be passed into Soup Sieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:return: A Tag.
|
||||
:rtype: bs4.element.Tag
|
||||
"""
|
||||
value = self.select(selector, namespaces, 1, **kwargs)
|
||||
if value:
|
||||
return value[0]
|
||||
return None
|
||||
return self.css.select_one(selector, namespaces, **kwargs)
|
||||
|
||||
def select(self, selector, namespaces=None, limit=None, **kwargs):
|
||||
"""Perform a CSS selection operation on the current element.
|
||||
|
@ -1973,27 +2102,18 @@ class Tag(PageElement):
|
|||
|
||||
:param limit: After finding this number of results, stop looking.
|
||||
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
:param kwargs: Keyword arguments to be passed into SoupSieve's
|
||||
soupsieve.select() method.
|
||||
|
||||
:return: A ResultSet of Tags.
|
||||
:rtype: bs4.element.ResultSet
|
||||
"""
|
||||
if namespaces is None:
|
||||
namespaces = self._namespaces
|
||||
|
||||
if limit is None:
|
||||
limit = 0
|
||||
if soupsieve is None:
|
||||
raise NotImplementedError(
|
||||
"Cannot execute CSS selectors because the soupsieve package is not installed."
|
||||
)
|
||||
|
||||
results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
|
||||
return self.css.select(selector, namespaces, limit, **kwargs)
|
||||
|
||||
# We do this because it's more consistent and because
|
||||
# ResultSet.__getattr__ has a helpful error message.
|
||||
return ResultSet(None, results)
|
||||
@property
|
||||
def css(self):
|
||||
"""Return an interface to the CSS selector API."""
|
||||
return CSS(self)
|
||||
|
||||
# Old names for backwards compatibility
|
||||
def childGenerator(self):
|
||||
|
@ -2038,7 +2158,7 @@ class SoupStrainer(object):
|
|||
:param attrs: A dictionary of filters on attribute values.
|
||||
:param string: A filter for a NavigableString with specific text.
|
||||
:kwargs: A dictionary of filters on attribute values.
|
||||
"""
|
||||
"""
|
||||
if string is None and 'text' in kwargs:
|
||||
string = kwargs.pop('text')
|
||||
warnings.warn(
|
||||
|
@ -2137,7 +2257,7 @@ class SoupStrainer(object):
|
|||
# looking at a tag with a different name.
|
||||
if markup and not markup.prefix and self.name != markup.name:
|
||||
return False
|
||||
|
||||
|
||||
call_function_with_tag_data = (
|
||||
isinstance(self.name, Callable)
|
||||
and not isinstance(markup_name, Tag))
|
||||
|
@ -2223,7 +2343,7 @@ class SoupStrainer(object):
|
|||
if self._matches(' '.join(markup), match_against):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if match_against is True:
|
||||
# True matches any non-None value.
|
||||
return markup is not None
|
||||
|
@ -2267,11 +2387,11 @@ class SoupStrainer(object):
|
|||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# Beyond this point we might need to run the test twice: once against
|
||||
# the tag's name and once against its prefixed name.
|
||||
match = False
|
||||
|
||||
|
||||
if not match and isinstance(match_against, str):
|
||||
# Exact string match
|
||||
match = markup == match_against
|
||||
|
|
|
@ -97,7 +97,7 @@ class Formatter(EntitySubstitution):
|
|||
else:
|
||||
indent = ' '
|
||||
self.indent = indent
|
||||
|
||||
|
||||
def substitute(self, ns):
|
||||
"""Process a string that needs to undergo entity substitution.
|
||||
This may be a string encountered in an attribute value or as
|
||||
|
|
|
@ -297,37 +297,11 @@ class TreeBuilderSmokeTest(object):
|
|||
markup, multi_valued_attributes=multi_valued_attributes
|
||||
)
|
||||
assert soup.a['class'] == ['a', 'b', 'c']
|
||||
|
||||
def test_fuzzed_input(self):
|
||||
# This test centralizes in one place the various fuzz tests
|
||||
# for Beautiful Soup created by the oss-fuzz project.
|
||||
|
||||
# These strings superficially resemble markup, but they
|
||||
# generally can't be parsed into anything. The best we can
|
||||
# hope for is that parsing these strings won't crash the
|
||||
# parser.
|
||||
#
|
||||
# n.b. This markup is commented out because these fuzz tests
|
||||
# _do_ crash the parser. However the crashes are due to bugs
|
||||
# in html.parser, not Beautiful Soup -- otherwise I'd fix the
|
||||
# bugs!
|
||||
|
||||
bad_markup = [
|
||||
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
|
||||
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
|
||||
# https://bugs.python.org/issue37747
|
||||
#
|
||||
#b'\n<![\xff\xfe\xfe\xcd\x00',
|
||||
|
||||
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
|
||||
# https://bugs.python.org/issue34480
|
||||
#
|
||||
#b'<![n\x00'
|
||||
]
|
||||
for markup in bad_markup:
|
||||
with warnings.catch_warnings(record=False):
|
||||
soup = self.soup(markup)
|
||||
|
||||
def test_invalid_doctype(self):
|
||||
markup = '<![if word]>content<![endif]>'
|
||||
markup = '<!DOCTYPE html]ff>'
|
||||
soup = self.soup(markup)
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(TreeBuilderSmokeTest):
|
||||
|
||||
|
@ -577,8 +551,8 @@ Hello, world!
|
|||
"""Whitespace must be preserved in <pre> and <textarea> tags,
|
||||
even if that would mean not prettifying the markup.
|
||||
"""
|
||||
pre_markup = "<pre> </pre>"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>"
|
||||
pre_markup = "<pre>a z</pre>\n"
|
||||
textarea_markup = "<textarea> woo\nwoo </textarea>\n"
|
||||
self.assert_soup(pre_markup)
|
||||
self.assert_soup(textarea_markup)
|
||||
|
||||
|
@ -589,7 +563,7 @@ Hello, world!
|
|||
assert soup.textarea.prettify() == textarea_markup
|
||||
|
||||
soup = self.soup("<textarea></textarea>")
|
||||
assert soup.textarea.prettify() == "<textarea></textarea>"
|
||||
assert soup.textarea.prettify() == "<textarea></textarea>\n"
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
˙<!DOCTyPEV PUBLIC'''Đ'
|
|
@ -0,0 +1 @@
|
|||
)<a><math><TR><a><mI><a><p><a>
|
Binary file not shown.
|
@ -0,0 +1,2 @@
|
|||
|
||||
<![
|
|
@ -0,0 +1 @@
|
|||
-<math><sElect><mi><sElect><sElect>
|
Binary file not shown.
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
ñ<table><svg><html>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
487
lib/bs4/tests/test_css.py
Normal file
487
lib/bs4/tests/test_css.py
Normal file
|
@ -0,0 +1,487 @@
|
|||
import pytest
|
||||
import types
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from bs4 import (
|
||||
CSS,
|
||||
BeautifulSoup,
|
||||
ResultSet,
|
||||
)
|
||||
|
||||
from . import (
|
||||
SoupTest,
|
||||
SOUP_SIEVE_PRESENT,
|
||||
)
|
||||
|
||||
if SOUP_SIEVE_PRESENT:
|
||||
from soupsieve import SelectorSyntaxError
|
||||
|
||||
|
||||
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
|
||||
class TestCSSSelectors(SoupTest):
|
||||
"""Test basic CSS selector functionality.
|
||||
|
||||
This functionality is implemented in soupsieve, which has a much
|
||||
more comprehensive test suite, so this is basically an extra check
|
||||
that soupsieve works as expected.
|
||||
"""
|
||||
|
||||
HTML = """
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>The title</title>
|
||||
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
|
||||
</head>
|
||||
<body>
|
||||
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
|
||||
<div id="main" class="fancy">
|
||||
<div id="inner">
|
||||
<h1 id="header1">An H1</h1>
|
||||
<p>Some text</p>
|
||||
<p class="onep" id="p1">Some more text</p>
|
||||
<h2 id="header2">An H2</h2>
|
||||
<p class="class1 class2 class3" id="pmulti">Another</p>
|
||||
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
|
||||
<h2 id="header3">Another H2</h2>
|
||||
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
|
||||
<span class="s1">
|
||||
<a href="#" id="s1a1">span1a1</a>
|
||||
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
|
||||
<span class="span2">
|
||||
<a href="#" id="s2a1">span2a1</a>
|
||||
</span>
|
||||
<span class="span3"></span>
|
||||
<custom-dashed-tag class="dashed" id="dash2"/>
|
||||
<div data-tag="dashedvalue" id="data1"/>
|
||||
</span>
|
||||
</div>
|
||||
<x id="xid">
|
||||
<z id="zida"/>
|
||||
<z id="zidab"/>
|
||||
<z id="zidac"/>
|
||||
</x>
|
||||
<y id="yid">
|
||||
<z id="zidb"/>
|
||||
</y>
|
||||
<p lang="en" id="lang-en">English</p>
|
||||
<p lang="en-gb" id="lang-en-gb">English UK</p>
|
||||
<p lang="en-us" id="lang-en-us">English US</p>
|
||||
<p lang="fr" id="lang-fr">French</p>
|
||||
</div>
|
||||
|
||||
<div id="footer">
|
||||
</div>
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assert_selects(self, selector, expected_ids, **kwargs):
|
||||
results = self.soup.select(selector, **kwargs)
|
||||
assert isinstance(results, ResultSet)
|
||||
el_ids = [el['id'] for el in results]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
|
||||
selector, ', '.join(expected_ids), ', '.join(el_ids)
|
||||
)
|
||||
|
||||
assertSelect = assert_selects
|
||||
|
||||
def assert_select_multiple(self, *tests):
|
||||
for selector, expected_ids in tests:
|
||||
self.assert_selects(selector, expected_ids)
|
||||
|
||||
def test_precompiled(self):
|
||||
sel = self.soup.css.compile('div')
|
||||
|
||||
els = self.soup.select(sel)
|
||||
assert len(els) == 4
|
||||
for div in els:
|
||||
assert div.name == 'div'
|
||||
|
||||
el = self.soup.select_one(sel)
|
||||
assert 'main' == el['id']
|
||||
|
||||
def test_one_tag_one(self):
|
||||
els = self.soup.select('title')
|
||||
assert len(els) == 1
|
||||
assert els[0].name == 'title'
|
||||
assert els[0].contents == ['The title']
|
||||
|
||||
def test_one_tag_many(self):
|
||||
els = self.soup.select('div')
|
||||
assert len(els) == 4
|
||||
for div in els:
|
||||
assert div.name == 'div'
|
||||
|
||||
el = self.soup.select_one('div')
|
||||
assert 'main' == el['id']
|
||||
|
||||
def test_select_one_returns_none_if_no_match(self):
|
||||
match = self.soup.select_one('nonexistenttag')
|
||||
assert None == match
|
||||
|
||||
|
||||
def test_tag_in_tag_one(self):
|
||||
els = self.soup.select('div div')
|
||||
self.assert_selects('div div', ['inner', 'data1'])
|
||||
|
||||
def test_tag_in_tag_many(self):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assert_selects('html div', ['main'], limit=1)
|
||||
self.assert_selects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
assert len(self.soup.select('del')) == 0
|
||||
|
||||
def test_invalid_tag(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('tag%t')
|
||||
|
||||
def test_select_dashed_tag_ids(self):
|
||||
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
|
||||
|
||||
def test_select_dashed_by_id(self):
|
||||
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
|
||||
assert dashed[0].name == 'custom-dashed-tag'
|
||||
assert dashed[0]['id'] == 'dash2'
|
||||
|
||||
def test_dashed_tag_text(self):
|
||||
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
|
||||
|
||||
def test_select_dashed_matches_find_all(self):
|
||||
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
|
||||
|
||||
def test_header_tags(self):
|
||||
self.assert_select_multiple(
|
||||
('h1', ['header1']),
|
||||
('h2', ['header2', 'header3']),
|
||||
)
|
||||
|
||||
def test_class_one(self):
|
||||
for selector in ('.onep', 'p.onep', 'html p.onep'):
|
||||
els = self.soup.select(selector)
|
||||
assert len(els) == 1
|
||||
assert els[0].name == 'p'
|
||||
assert els[0]['class'] == ['onep']
|
||||
|
||||
def test_class_mismatched_tag(self):
|
||||
els = self.soup.select('div.onep')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_one_id(self):
|
||||
for selector in ('div#inner', '#inner', 'div div#inner'):
|
||||
self.assert_selects(selector, ['inner'])
|
||||
|
||||
def test_bad_id(self):
|
||||
els = self.soup.select('#doesnotexist')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_items_in_id(self):
|
||||
els = self.soup.select('div#inner p')
|
||||
assert len(els) == 3
|
||||
for el in els:
|
||||
assert el.name == 'p'
|
||||
assert els[1]['class'] == ['onep']
|
||||
assert not els[0].has_attr('class')
|
||||
|
||||
def test_a_bunch_of_emptys(self):
|
||||
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
|
||||
assert len(self.soup.select(selector)) == 0
|
||||
|
||||
def test_multi_class_support(self):
|
||||
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
|
||||
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
|
||||
self.assert_selects(selector, ['pmulti'])
|
||||
|
||||
def test_multi_class_selection(self):
|
||||
for selector in ('.class1.class3', '.class3.class2',
|
||||
'.class1.class2.class3'):
|
||||
self.assert_selects(selector, ['pmulti'])
|
||||
|
||||
def test_child_selector(self):
|
||||
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
|
||||
self.assert_selects('.s1 > a span', ['s1a2s1'])
|
||||
|
||||
def test_child_selector_id(self):
|
||||
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
|
||||
|
||||
def test_attribute_equals(self):
|
||||
self.assert_select_multiple(
|
||||
('p[class="onep"]', ['p1']),
|
||||
('p[id="p1"]', ['p1']),
|
||||
('[class="onep"]', ['p1']),
|
||||
('[id="p1"]', ['p1']),
|
||||
('link[rel="stylesheet"]', ['l1']),
|
||||
('link[type="text/css"]', ['l1']),
|
||||
('link[href="blah.css"]', ['l1']),
|
||||
('link[href="no-blah.css"]', []),
|
||||
('[rel="stylesheet"]', ['l1']),
|
||||
('[type="text/css"]', ['l1']),
|
||||
('[href="blah.css"]', ['l1']),
|
||||
('[href="no-blah.css"]', []),
|
||||
('p[href="no-blah.css"]', []),
|
||||
('[href="no-blah.css"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_tilde(self):
|
||||
self.assert_select_multiple(
|
||||
('p[class~="class1"]', ['pmulti']),
|
||||
('p[class~="class2"]', ['pmulti']),
|
||||
('p[class~="class3"]', ['pmulti']),
|
||||
('[class~="class1"]', ['pmulti']),
|
||||
('[class~="class2"]', ['pmulti']),
|
||||
('[class~="class3"]', ['pmulti']),
|
||||
('a[rel~="friend"]', ['bob']),
|
||||
('a[rel~="met"]', ['bob']),
|
||||
('[rel~="friend"]', ['bob']),
|
||||
('[rel~="met"]', ['bob']),
|
||||
)
|
||||
|
||||
def test_attribute_startswith(self):
|
||||
self.assert_select_multiple(
|
||||
('[rel^="style"]', ['l1']),
|
||||
('link[rel^="style"]', ['l1']),
|
||||
('notlink[rel^="notstyle"]', []),
|
||||
('[rel^="notstyle"]', []),
|
||||
('link[rel^="notstyle"]', []),
|
||||
('link[href^="bla"]', ['l1']),
|
||||
('a[href^="http://"]', ['bob', 'me']),
|
||||
('[href^="http://"]', ['bob', 'me']),
|
||||
('[id^="p"]', ['pmulti', 'p1']),
|
||||
('[id^="m"]', ['me', 'main']),
|
||||
('div[id^="m"]', ['main']),
|
||||
('a[id^="m"]', ['me']),
|
||||
('div[data-tag^="dashed"]', ['data1'])
|
||||
)
|
||||
|
||||
def test_attribute_endswith(self):
|
||||
self.assert_select_multiple(
|
||||
('[href$=".css"]', ['l1']),
|
||||
('link[href$=".css"]', ['l1']),
|
||||
('link[id$="1"]', ['l1']),
|
||||
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
|
||||
('div[id$="1"]', ['data1']),
|
||||
('[id$="noending"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_contains(self):
|
||||
self.assert_select_multiple(
|
||||
# From test_attribute_startswith
|
||||
('[rel*="style"]', ['l1']),
|
||||
('link[rel*="style"]', ['l1']),
|
||||
('notlink[rel*="notstyle"]', []),
|
||||
('[rel*="notstyle"]', []),
|
||||
('link[rel*="notstyle"]', []),
|
||||
('link[href*="bla"]', ['l1']),
|
||||
('[href*="http://"]', ['bob', 'me']),
|
||||
('[id*="p"]', ['pmulti', 'p1']),
|
||||
('div[id*="m"]', ['main']),
|
||||
('a[id*="m"]', ['me']),
|
||||
# From test_attribute_endswith
|
||||
('[href*=".css"]', ['l1']),
|
||||
('link[href*=".css"]', ['l1']),
|
||||
('link[id*="1"]', ['l1']),
|
||||
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
|
||||
('div[id*="1"]', ['data1']),
|
||||
('[id*="noending"]', []),
|
||||
# New for this test
|
||||
('[href*="."]', ['bob', 'me', 'l1']),
|
||||
('a[href*="."]', ['bob', 'me']),
|
||||
('link[href*="."]', ['l1']),
|
||||
('div[id*="n"]', ['main', 'inner']),
|
||||
('div[id*="nn"]', ['inner']),
|
||||
('div[data-tag*="edval"]', ['data1'])
|
||||
)
|
||||
|
||||
def test_attribute_exact_or_hypen(self):
|
||||
self.assert_select_multiple(
|
||||
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
|
||||
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
|
||||
('p[lang|="fr"]', ['lang-fr']),
|
||||
('p[lang|="gb"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_exists(self):
|
||||
self.assert_select_multiple(
|
||||
('[rel]', ['l1', 'bob', 'me']),
|
||||
('link[rel]', ['l1']),
|
||||
('a[rel]', ['bob', 'me']),
|
||||
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
|
||||
('p[class]', ['p1', 'pmulti']),
|
||||
('[blah]', []),
|
||||
('p[blah]', []),
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
assert "yes" == chosen.string
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
with pytest.raises(NotImplementedError):
|
||||
self.soup.select("a:no-such-pseudoclass")
|
||||
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select("a:nth-of-type(a)")
|
||||
|
||||
def test_nth_of_type(self):
|
||||
# Try to select first paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(1)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Some text'
|
||||
|
||||
# Try to select third paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(3)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Another'
|
||||
|
||||
# Try to select (non-existent!) fourth paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(4)')
|
||||
assert len(els) == 0
|
||||
|
||||
# Zero will select no tags.
|
||||
els = self.soup.select('div p:nth-of-type(0)')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_nth_of_type_direct_descendant(self):
|
||||
els = self.soup.select('div#inner > p:nth-of-type(1)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Some text'
|
||||
|
||||
def test_id_child_selector_nth_of_type(self):
|
||||
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
|
||||
|
||||
def test_select_on_element(self):
|
||||
# Other tests operate on the tree; this operates on an element
|
||||
# within the tree.
|
||||
inner = self.soup.find("div", id="main")
|
||||
selected = inner.select("div")
|
||||
# The <div id="inner"> tag was selected. The <div id="footer">
|
||||
# tag was not.
|
||||
self.assert_selects_ids(selected, ['inner', 'data1'])
|
||||
|
||||
def test_overspecified_child_id(self):
|
||||
self.assert_selects(".fancy #inner", ['inner'])
|
||||
self.assert_selects(".normal #inner", [])
|
||||
|
||||
def test_adjacent_sibling_selector(self):
|
||||
self.assert_selects('#p1 + h2', ['header2'])
|
||||
self.assert_selects('#p1 + h2 + p', ['pmulti'])
|
||||
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
|
||||
assert [] == self.soup.select('#p1 + p')
|
||||
|
||||
def test_general_sibling_selector(self):
|
||||
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
|
||||
self.assert_selects('#p1 ~ #header2', ['header2'])
|
||||
self.assert_selects('#p1 ~ h2 + a', ['me'])
|
||||
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
|
||||
assert [] == self.soup.select('#inner ~ h2')
|
||||
|
||||
def test_dangling_combinator(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('h1 >')
|
||||
|
||||
def test_sibling_combinator_wont_select_same_tag_twice(self):
|
||||
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
|
||||
|
||||
# Test the selector grouping operator (the comma)
|
||||
def test_multiple_select(self):
|
||||
self.assert_selects('x, y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_with_no_space(self):
|
||||
self.assert_selects('x,y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_with_more_space(self):
|
||||
self.assert_selects('x, y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_duplicated(self):
|
||||
self.assert_selects('x, x', ['xid'])
|
||||
|
||||
def test_multiple_select_sibling(self):
|
||||
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
|
||||
|
||||
def test_multiple_select_tag_and_direct_descendant(self):
|
||||
self.assert_selects('x, y > z', ['xid', 'zidb'])
|
||||
|
||||
def test_multiple_select_direct_descendant_and_tags(self):
|
||||
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
|
||||
|
||||
def test_multiple_select_indirect_descendant(self):
|
||||
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
|
||||
|
||||
def test_invalid_multiple_select(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select(',x, y')
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('x,,y')
|
||||
|
||||
def test_multiple_select_attrs(self):
|
||||
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
|
||||
|
||||
def test_multiple_select_ids(self):
|
||||
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
|
||||
|
||||
def test_multiple_select_nested(self):
|
||||
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
|
||||
|
||||
def test_select_duplicate_elements(self):
|
||||
# When markup contains duplicate elements, a multiple select
|
||||
# will find all of them.
|
||||
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
|
||||
soup = BeautifulSoup(markup, 'html.parser')
|
||||
selected = soup.select(".c1, .c2")
|
||||
assert 3 == len(selected)
|
||||
|
||||
# Verify that find_all finds the same elements, though because
|
||||
# of an implementation detail it finds them in a different
|
||||
# order.
|
||||
for element in soup.find_all(class_=['c1', 'c2']):
|
||||
assert element in selected
|
||||
|
||||
def test_closest(self):
|
||||
inner = self.soup.find("div", id="inner")
|
||||
closest = inner.css.closest("div[id=main]")
|
||||
assert closest == self.soup.find("div", id="main")
|
||||
|
||||
def test_match(self):
|
||||
inner = self.soup.find("div", id="inner")
|
||||
main = self.soup.find("div", id="main")
|
||||
assert inner.css.match("div[id=main]") == False
|
||||
assert main.css.match("div[id=main]") == True
|
||||
|
||||
def test_iselect(self):
|
||||
gen = self.soup.css.iselect("h2")
|
||||
assert isinstance(gen, types.GeneratorType)
|
||||
[header2, header3] = gen
|
||||
assert header2['id'] == 'header2'
|
||||
assert header3['id'] == 'header3'
|
||||
|
||||
def test_filter(self):
|
||||
inner = self.soup.find("div", id="inner")
|
||||
results = inner.css.filter("h2")
|
||||
assert len(inner.css.filter("h2")) == 2
|
||||
|
||||
results = inner.css.filter("h2[id=header3]")
|
||||
assert isinstance(results, ResultSet)
|
||||
[result] = results
|
||||
assert result['id'] == 'header3'
|
||||
|
||||
def test_escape(self):
|
||||
m = self.soup.css.escape
|
||||
assert m(".foo#bar") == '\\.foo\\#bar'
|
||||
assert m("()[]{}") == '\\(\\)\\[\\]\\{\\}'
|
||||
assert m(".foo") == self.soup.css.escape(".foo")
|
|
@ -80,20 +80,20 @@ class TestFormatter(SoupTest):
|
|||
@pytest.mark.parametrize(
|
||||
"indent,expect",
|
||||
[
|
||||
(None, '<a>\n<b>\ntext\n</b>\n</a>'),
|
||||
(-1, '<a>\n<b>\ntext\n</b>\n</a>'),
|
||||
(0, '<a>\n<b>\ntext\n</b>\n</a>'),
|
||||
("", '<a>\n<b>\ntext\n</b>\n</a>'),
|
||||
(None, '<a>\n<b>\ntext\n</b>\n</a>\n'),
|
||||
(-1, '<a>\n<b>\ntext\n</b>\n</a>\n'),
|
||||
(0, '<a>\n<b>\ntext\n</b>\n</a>\n'),
|
||||
("", '<a>\n<b>\ntext\n</b>\n</a>\n'),
|
||||
|
||||
(1, '<a>\n <b>\n text\n </b>\n</a>'),
|
||||
(2, '<a>\n <b>\n text\n </b>\n</a>'),
|
||||
(1, '<a>\n <b>\n text\n </b>\n</a>\n'),
|
||||
(2, '<a>\n <b>\n text\n </b>\n</a>\n'),
|
||||
|
||||
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>'),
|
||||
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>'),
|
||||
("\t", '<a>\n\t<b>\n\t\ttext\n\t</b>\n</a>\n'),
|
||||
('abc', '<a>\nabc<b>\nabcabctext\nabc</b>\n</a>\n'),
|
||||
|
||||
# Some invalid inputs -- the default behavior is used.
|
||||
(object(), '<a>\n <b>\n text\n </b>\n</a>'),
|
||||
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>'),
|
||||
(object(), '<a>\n <b>\n text\n </b>\n</a>\n'),
|
||||
(b'bytes', '<a>\n <b>\n text\n </b>\n</a>\n'),
|
||||
]
|
||||
)
|
||||
def test_indent(self, indent, expect):
|
||||
|
|
91
lib/bs4/tests/test_fuzz.py
Normal file
91
lib/bs4/tests/test_fuzz.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
"""This file contains test cases reported by third parties using
|
||||
fuzzing tools, primarily from Google's oss-fuzz project. Some of these
|
||||
represent real problems with Beautiful Soup, but many are problems in
|
||||
libraries that Beautiful Soup depends on, and many of the test cases
|
||||
represent different ways of triggering the same problem.
|
||||
|
||||
Grouping these test cases together makes it easy to see which test
|
||||
cases represent the same problem, and puts the test cases in close
|
||||
proximity to code that can trigger the problems.
|
||||
"""
|
||||
import os
|
||||
import pytest
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
ParserRejectedMarkup,
|
||||
)
|
||||
|
||||
class TestFuzz(object):
|
||||
|
||||
# Test case markup files from fuzzers are given this extension so
|
||||
# they can be included in builds.
|
||||
TESTCASE_SUFFIX = ".testcase"
|
||||
|
||||
# This class of error has been fixed by catching a less helpful
|
||||
# exception from html.parser and raising ParserRejectedMarkup
|
||||
# instead.
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5703933063462912",
|
||||
]
|
||||
)
|
||||
def test_rejected_markup(self, filename):
|
||||
markup = self.__markup(filename)
|
||||
with pytest.raises(ParserRejectedMarkup):
|
||||
BeautifulSoup(markup, 'html.parser')
|
||||
|
||||
# This class of error has to do with very deeply nested documents
|
||||
# which overflow the Python call stack when the tree is converted
|
||||
# to a string. This is an issue with Beautiful Soup which was fixed
|
||||
# as part of [bug=1471755].
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5984173902397440",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5167584867909632",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6124268085182464",
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6450958476902400",
|
||||
]
|
||||
)
|
||||
def test_deeply_nested_document(self, filename):
|
||||
# Parsing the document and encoding it back to a string is
|
||||
# sufficient to demonstrate that the overflow problem has
|
||||
# been fixed.
|
||||
markup = self.__markup(filename)
|
||||
BeautifulSoup(markup, 'html.parser').encode()
|
||||
|
||||
# This class of error represents problems with html5lib's parser,
|
||||
# not Beautiful Soup. I use
|
||||
# https://github.com/html5lib/html5lib-python/issues/568 to notify
|
||||
# the html5lib developers of these issues.
|
||||
@pytest.mark.skip("html5lib problems")
|
||||
@pytest.mark.parametrize(
|
||||
"filename", [
|
||||
# b"""ÿ<!DOCTyPEV PUBLIC'''Ð'"""
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-4818336571064320",
|
||||
|
||||
# b')<a><math><TR><a><mI><a><p><a>'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-4999465949331456",
|
||||
|
||||
# b'-<math><sElect><mi><sElect><sElect>'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-5843991618256896",
|
||||
|
||||
# b'ñ<table><svg><html>'
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6241471367348224",
|
||||
|
||||
# <TABLE>, some ^@ characters, some <math> tags.
|
||||
"clusterfuzz-testcase-minimized-bs4_fuzzer-6600557255327744",
|
||||
|
||||
# Nested table
|
||||
"crash-0d306a50c8ed8bcd0785b67000fcd5dea1d33f08"
|
||||
]
|
||||
)
|
||||
def test_html5lib_parse_errors(self, filename):
|
||||
markup = self.__markup(filename)
|
||||
print(BeautifulSoup(markup, 'html5lib').encode())
|
||||
|
||||
def __markup(self, filename):
|
||||
if not filename.endswith(self.TESTCASE_SUFFIX):
|
||||
filename += self.TESTCASE_SUFFIX
|
||||
this_dir = os.path.split(__file__)[0]
|
||||
path = os.path.join(this_dir, 'fuzz', filename)
|
||||
return open(path, 'rb').read()
|
|
@ -3,9 +3,11 @@ trees."""
|
|||
|
||||
from pdb import set_trace
|
||||
import pickle
|
||||
import pytest
|
||||
import warnings
|
||||
from bs4.builder import (
|
||||
HTMLParserTreeBuilder,
|
||||
ParserRejectedMarkup,
|
||||
XMLParsedAsHTMLWarning,
|
||||
)
|
||||
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||
|
@ -15,6 +17,28 @@ class TestHTMLParserTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
|
|||
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
def test_rejected_input(self):
|
||||
# Python's html.parser will occasionally reject markup,
|
||||
# especially when there is a problem with the initial DOCTYPE
|
||||
# declaration. Different versions of Python sound the alarm in
|
||||
# different ways, but Beautiful Soup consistently raises
|
||||
# errors as ParserRejectedMarkup exceptions.
|
||||
bad_markup = [
|
||||
# https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=28873
|
||||
# https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/519e5b4269a01185a0d5e76295251921da2f0700
|
||||
# https://github.com/python/cpython/issues/81928
|
||||
b'\n<![\xff\xfe\xfe\xcd\x00',
|
||||
|
||||
#https://github.com/guidovranken/python-library-fuzzers/blob/master/corp-html/de32aa55785be29bbc72a1a8e06b00611fb3d9f8
|
||||
# https://github.com/python/cpython/issues/78661
|
||||
#
|
||||
b'<![n\x00',
|
||||
b"<![UNKNOWN[]]>",
|
||||
]
|
||||
for markup in bad_markup:
|
||||
with pytest.raises(ParserRejectedMarkup):
|
||||
soup = self.soup(markup)
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
|
|
@ -189,13 +189,15 @@ class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
|
|||
assert soup.find('prefix:tag3').name == 'tag3'
|
||||
assert soup.subtag.find('prefix:tag3').name == 'tag3'
|
||||
|
||||
def test_pickle_removes_builder(self):
|
||||
# The lxml TreeBuilder is not picklable, so it won't be
|
||||
# preserved in a pickle/unpickle operation.
|
||||
|
||||
def test_pickle_restores_builder(self):
|
||||
# The lxml TreeBuilder is not picklable, so when unpickling
|
||||
# a document created with it, a new TreeBuilder of the
|
||||
# appropriate class is created.
|
||||
soup = self.soup("<a>some markup</a>")
|
||||
assert isinstance(soup.builder, self.default_builder)
|
||||
pickled = pickle.dumps(soup)
|
||||
unpickled = pickle.loads(pickled)
|
||||
|
||||
assert "some markup" == unpickled.a.string
|
||||
assert unpickled.builder is None
|
||||
assert unpickled.builder != soup.builder
|
||||
assert isinstance(unpickled.builder, self.default_builder)
|
||||
|
|
|
@ -2,20 +2,18 @@
|
|||
import copy
|
||||
import pickle
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
ResultSet,
|
||||
SoupStrainer,
|
||||
)
|
||||
from . import (
|
||||
SoupTest,
|
||||
SOUP_SIEVE_PRESENT,
|
||||
)
|
||||
|
||||
if SOUP_SIEVE_PRESENT:
|
||||
from soupsieve import SelectorSyntaxError
|
||||
|
||||
class TestEncoding(SoupTest):
|
||||
"""Test the ability to encode objects into strings."""
|
||||
|
||||
|
@ -51,10 +49,21 @@ class TestEncoding(SoupTest):
|
|||
assert "\N{SNOWMAN}".encode("utf8") == soup.b.encode_contents(
|
||||
encoding="utf8"
|
||||
)
|
||||
|
||||
def test_encode_deeply_nested_document(self):
|
||||
# This test verifies that encoding a string doesn't involve
|
||||
# any recursive function calls. If it did, this test would
|
||||
# overflow the Python interpreter stack.
|
||||
limit = sys.getrecursionlimit() + 1
|
||||
markup = "<span>" * limit
|
||||
soup = self.soup(markup)
|
||||
encoded = soup.encode()
|
||||
assert limit == encoded.count(b"<span>")
|
||||
|
||||
def test_deprecated_renderContents(self):
|
||||
html = "<b>\N{SNOWMAN}</b>"
|
||||
soup = self.soup(html)
|
||||
soup.renderContents()
|
||||
assert "\N{SNOWMAN}".encode("utf8") == soup.b.renderContents()
|
||||
|
||||
def test_repr(self):
|
||||
|
@ -159,7 +168,31 @@ class TestFormatters(SoupTest):
|
|||
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz <textarea> eee\nfff\t</textarea></div>")
|
||||
# Everything outside the <pre> tag is reformatted, but everything
|
||||
# inside is left alone.
|
||||
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>' == soup.div.prettify()
|
||||
assert '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n <textarea> eee\nfff\t</textarea>\n</div>\n' == soup.div.prettify()
|
||||
|
||||
def test_prettify_handles_nested_string_literal_tags(self):
|
||||
# Most of this markup is inside a <pre> tag, so prettify()
|
||||
# only does three things to it:
|
||||
# 1. Add a newline and a space between the <div> and the <pre>
|
||||
# 2. Add a newline after the </pre>
|
||||
# 3. Add a newline at the end.
|
||||
#
|
||||
# The contents of the <pre> tag are left completely alone. In
|
||||
# particular, we don't start adding whitespace again once we
|
||||
# encounter the first </pre> tag, because we know it's not
|
||||
# the one that put us into string literal mode.
|
||||
markup = """<div><pre><code>some
|
||||
<script><pre>code</pre></script> for you
|
||||
</code></pre></div>"""
|
||||
|
||||
expect = """<div>
|
||||
<pre><code>some
|
||||
<script><pre>code</pre></script> for you
|
||||
</code></pre>
|
||||
</div>
|
||||
"""
|
||||
soup = self.soup(markup)
|
||||
assert expect == soup.div.prettify()
|
||||
|
||||
def test_prettify_accepts_formatter_function(self):
|
||||
soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
|
||||
|
@ -216,429 +249,6 @@ class TestFormatters(SoupTest):
|
|||
assert soup.contents[0].name == 'pre'
|
||||
|
||||
|
||||
@pytest.mark.skipif(not SOUP_SIEVE_PRESENT, reason="Soup Sieve not installed")
|
||||
class TestCSSSelectors(SoupTest):
|
||||
"""Test basic CSS selector functionality.
|
||||
|
||||
This functionality is implemented in soupsieve, which has a much
|
||||
more comprehensive test suite, so this is basically an extra check
|
||||
that soupsieve works as expected.
|
||||
"""
|
||||
|
||||
HTML = """
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>The title</title>
|
||||
<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
|
||||
</head>
|
||||
<body>
|
||||
<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
|
||||
<div id="main" class="fancy">
|
||||
<div id="inner">
|
||||
<h1 id="header1">An H1</h1>
|
||||
<p>Some text</p>
|
||||
<p class="onep" id="p1">Some more text</p>
|
||||
<h2 id="header2">An H2</h2>
|
||||
<p class="class1 class2 class3" id="pmulti">Another</p>
|
||||
<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
|
||||
<h2 id="header3">Another H2</h2>
|
||||
<a id="me" href="http://simonwillison.net/" rel="me">me</a>
|
||||
<span class="s1">
|
||||
<a href="#" id="s1a1">span1a1</a>
|
||||
<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
|
||||
<span class="span2">
|
||||
<a href="#" id="s2a1">span2a1</a>
|
||||
</span>
|
||||
<span class="span3"></span>
|
||||
<custom-dashed-tag class="dashed" id="dash2"/>
|
||||
<div data-tag="dashedvalue" id="data1"/>
|
||||
</span>
|
||||
</div>
|
||||
<x id="xid">
|
||||
<z id="zida"/>
|
||||
<z id="zidab"/>
|
||||
<z id="zidac"/>
|
||||
</x>
|
||||
<y id="yid">
|
||||
<z id="zidb"/>
|
||||
</y>
|
||||
<p lang="en" id="lang-en">English</p>
|
||||
<p lang="en-gb" id="lang-en-gb">English UK</p>
|
||||
<p lang="en-us" id="lang-en-us">English US</p>
|
||||
<p lang="fr" id="lang-fr">French</p>
|
||||
</div>
|
||||
|
||||
<div id="footer">
|
||||
</div>
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
self.soup = BeautifulSoup(self.HTML, 'html.parser')
|
||||
|
||||
def assert_selects(self, selector, expected_ids, **kwargs):
|
||||
el_ids = [el['id'] for el in self.soup.select(selector, **kwargs)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
assert expected_ids == el_ids, "Selector %s, expected [%s], got [%s]" % (
|
||||
selector, ', '.join(expected_ids), ', '.join(el_ids)
|
||||
)
|
||||
|
||||
assertSelect = assert_selects
|
||||
|
||||
def assert_select_multiple(self, *tests):
|
||||
for selector, expected_ids in tests:
|
||||
self.assert_selects(selector, expected_ids)
|
||||
|
||||
def test_one_tag_one(self):
|
||||
els = self.soup.select('title')
|
||||
assert len(els) == 1
|
||||
assert els[0].name == 'title'
|
||||
assert els[0].contents == ['The title']
|
||||
|
||||
def test_one_tag_many(self):
|
||||
els = self.soup.select('div')
|
||||
assert len(els) == 4
|
||||
for div in els:
|
||||
assert div.name == 'div'
|
||||
|
||||
el = self.soup.select_one('div')
|
||||
assert 'main' == el['id']
|
||||
|
||||
def test_select_one_returns_none_if_no_match(self):
|
||||
match = self.soup.select_one('nonexistenttag')
|
||||
assert None == match
|
||||
|
||||
|
||||
def test_tag_in_tag_one(self):
|
||||
els = self.soup.select('div div')
|
||||
self.assert_selects('div div', ['inner', 'data1'])
|
||||
|
||||
def test_tag_in_tag_many(self):
|
||||
for selector in ('html div', 'html body div', 'body div'):
|
||||
self.assert_selects(selector, ['data1', 'main', 'inner', 'footer'])
|
||||
|
||||
|
||||
def test_limit(self):
|
||||
self.assert_selects('html div', ['main'], limit=1)
|
||||
self.assert_selects('html body div', ['inner', 'main'], limit=2)
|
||||
self.assert_selects('body div', ['data1', 'main', 'inner', 'footer'],
|
||||
limit=10)
|
||||
|
||||
def test_tag_no_match(self):
|
||||
assert len(self.soup.select('del')) == 0
|
||||
|
||||
def test_invalid_tag(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('tag%t')
|
||||
|
||||
def test_select_dashed_tag_ids(self):
|
||||
self.assert_selects('custom-dashed-tag', ['dash1', 'dash2'])
|
||||
|
||||
def test_select_dashed_by_id(self):
|
||||
dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
|
||||
assert dashed[0].name == 'custom-dashed-tag'
|
||||
assert dashed[0]['id'] == 'dash2'
|
||||
|
||||
def test_dashed_tag_text(self):
|
||||
assert self.soup.select('body > custom-dashed-tag')[0].text == 'Hello there.'
|
||||
|
||||
def test_select_dashed_matches_find_all(self):
|
||||
assert self.soup.select('custom-dashed-tag') == self.soup.find_all('custom-dashed-tag')
|
||||
|
||||
def test_header_tags(self):
|
||||
self.assert_select_multiple(
|
||||
('h1', ['header1']),
|
||||
('h2', ['header2', 'header3']),
|
||||
)
|
||||
|
||||
def test_class_one(self):
|
||||
for selector in ('.onep', 'p.onep', 'html p.onep'):
|
||||
els = self.soup.select(selector)
|
||||
assert len(els) == 1
|
||||
assert els[0].name == 'p'
|
||||
assert els[0]['class'] == ['onep']
|
||||
|
||||
def test_class_mismatched_tag(self):
|
||||
els = self.soup.select('div.onep')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_one_id(self):
|
||||
for selector in ('div#inner', '#inner', 'div div#inner'):
|
||||
self.assert_selects(selector, ['inner'])
|
||||
|
||||
def test_bad_id(self):
|
||||
els = self.soup.select('#doesnotexist')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_items_in_id(self):
|
||||
els = self.soup.select('div#inner p')
|
||||
assert len(els) == 3
|
||||
for el in els:
|
||||
assert el.name == 'p'
|
||||
assert els[1]['class'] == ['onep']
|
||||
assert not els[0].has_attr('class')
|
||||
|
||||
def test_a_bunch_of_emptys(self):
|
||||
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
|
||||
assert len(self.soup.select(selector)) == 0
|
||||
|
||||
def test_multi_class_support(self):
|
||||
for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
|
||||
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
|
||||
self.assert_selects(selector, ['pmulti'])
|
||||
|
||||
def test_multi_class_selection(self):
|
||||
for selector in ('.class1.class3', '.class3.class2',
|
||||
'.class1.class2.class3'):
|
||||
self.assert_selects(selector, ['pmulti'])
|
||||
|
||||
def test_child_selector(self):
|
||||
self.assert_selects('.s1 > a', ['s1a1', 's1a2'])
|
||||
self.assert_selects('.s1 > a span', ['s1a2s1'])
|
||||
|
||||
def test_child_selector_id(self):
|
||||
self.assert_selects('.s1 > a#s1a2 span', ['s1a2s1'])
|
||||
|
||||
def test_attribute_equals(self):
|
||||
self.assert_select_multiple(
|
||||
('p[class="onep"]', ['p1']),
|
||||
('p[id="p1"]', ['p1']),
|
||||
('[class="onep"]', ['p1']),
|
||||
('[id="p1"]', ['p1']),
|
||||
('link[rel="stylesheet"]', ['l1']),
|
||||
('link[type="text/css"]', ['l1']),
|
||||
('link[href="blah.css"]', ['l1']),
|
||||
('link[href="no-blah.css"]', []),
|
||||
('[rel="stylesheet"]', ['l1']),
|
||||
('[type="text/css"]', ['l1']),
|
||||
('[href="blah.css"]', ['l1']),
|
||||
('[href="no-blah.css"]', []),
|
||||
('p[href="no-blah.css"]', []),
|
||||
('[href="no-blah.css"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_tilde(self):
|
||||
self.assert_select_multiple(
|
||||
('p[class~="class1"]', ['pmulti']),
|
||||
('p[class~="class2"]', ['pmulti']),
|
||||
('p[class~="class3"]', ['pmulti']),
|
||||
('[class~="class1"]', ['pmulti']),
|
||||
('[class~="class2"]', ['pmulti']),
|
||||
('[class~="class3"]', ['pmulti']),
|
||||
('a[rel~="friend"]', ['bob']),
|
||||
('a[rel~="met"]', ['bob']),
|
||||
('[rel~="friend"]', ['bob']),
|
||||
('[rel~="met"]', ['bob']),
|
||||
)
|
||||
|
||||
def test_attribute_startswith(self):
|
||||
self.assert_select_multiple(
|
||||
('[rel^="style"]', ['l1']),
|
||||
('link[rel^="style"]', ['l1']),
|
||||
('notlink[rel^="notstyle"]', []),
|
||||
('[rel^="notstyle"]', []),
|
||||
('link[rel^="notstyle"]', []),
|
||||
('link[href^="bla"]', ['l1']),
|
||||
('a[href^="http://"]', ['bob', 'me']),
|
||||
('[href^="http://"]', ['bob', 'me']),
|
||||
('[id^="p"]', ['pmulti', 'p1']),
|
||||
('[id^="m"]', ['me', 'main']),
|
||||
('div[id^="m"]', ['main']),
|
||||
('a[id^="m"]', ['me']),
|
||||
('div[data-tag^="dashed"]', ['data1'])
|
||||
)
|
||||
|
||||
def test_attribute_endswith(self):
|
||||
self.assert_select_multiple(
|
||||
('[href$=".css"]', ['l1']),
|
||||
('link[href$=".css"]', ['l1']),
|
||||
('link[id$="1"]', ['l1']),
|
||||
('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
|
||||
('div[id$="1"]', ['data1']),
|
||||
('[id$="noending"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_contains(self):
|
||||
self.assert_select_multiple(
|
||||
# From test_attribute_startswith
|
||||
('[rel*="style"]', ['l1']),
|
||||
('link[rel*="style"]', ['l1']),
|
||||
('notlink[rel*="notstyle"]', []),
|
||||
('[rel*="notstyle"]', []),
|
||||
('link[rel*="notstyle"]', []),
|
||||
('link[href*="bla"]', ['l1']),
|
||||
('[href*="http://"]', ['bob', 'me']),
|
||||
('[id*="p"]', ['pmulti', 'p1']),
|
||||
('div[id*="m"]', ['main']),
|
||||
('a[id*="m"]', ['me']),
|
||||
# From test_attribute_endswith
|
||||
('[href*=".css"]', ['l1']),
|
||||
('link[href*=".css"]', ['l1']),
|
||||
('link[id*="1"]', ['l1']),
|
||||
('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
|
||||
('div[id*="1"]', ['data1']),
|
||||
('[id*="noending"]', []),
|
||||
# New for this test
|
||||
('[href*="."]', ['bob', 'me', 'l1']),
|
||||
('a[href*="."]', ['bob', 'me']),
|
||||
('link[href*="."]', ['l1']),
|
||||
('div[id*="n"]', ['main', 'inner']),
|
||||
('div[id*="nn"]', ['inner']),
|
||||
('div[data-tag*="edval"]', ['data1'])
|
||||
)
|
||||
|
||||
def test_attribute_exact_or_hypen(self):
|
||||
self.assert_select_multiple(
|
||||
('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
|
||||
('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
|
||||
('p[lang|="fr"]', ['lang-fr']),
|
||||
('p[lang|="gb"]', []),
|
||||
)
|
||||
|
||||
def test_attribute_exists(self):
|
||||
self.assert_select_multiple(
|
||||
('[rel]', ['l1', 'bob', 'me']),
|
||||
('link[rel]', ['l1']),
|
||||
('a[rel]', ['bob', 'me']),
|
||||
('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
|
||||
('p[class]', ['p1', 'pmulti']),
|
||||
('[blah]', []),
|
||||
('p[blah]', []),
|
||||
('div[data-tag]', ['data1'])
|
||||
)
|
||||
|
||||
def test_quoted_space_in_selector_name(self):
|
||||
html = """<div style="display: wrong">nope</div>
|
||||
<div style="display: right">yes</div>
|
||||
"""
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
[chosen] = soup.select('div[style="display: right"]')
|
||||
assert "yes" == chosen.string
|
||||
|
||||
def test_unsupported_pseudoclass(self):
|
||||
with pytest.raises(NotImplementedError):
|
||||
self.soup.select("a:no-such-pseudoclass")
|
||||
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select("a:nth-of-type(a)")
|
||||
|
||||
def test_nth_of_type(self):
|
||||
# Try to select first paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(1)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Some text'
|
||||
|
||||
# Try to select third paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(3)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Another'
|
||||
|
||||
# Try to select (non-existent!) fourth paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(4)')
|
||||
assert len(els) == 0
|
||||
|
||||
# Zero will select no tags.
|
||||
els = self.soup.select('div p:nth-of-type(0)')
|
||||
assert len(els) == 0
|
||||
|
||||
def test_nth_of_type_direct_descendant(self):
|
||||
els = self.soup.select('div#inner > p:nth-of-type(1)')
|
||||
assert len(els) == 1
|
||||
assert els[0].string == 'Some text'
|
||||
|
||||
def test_id_child_selector_nth_of_type(self):
|
||||
self.assert_selects('#inner > p:nth-of-type(2)', ['p1'])
|
||||
|
||||
def test_select_on_element(self):
|
||||
# Other tests operate on the tree; this operates on an element
|
||||
# within the tree.
|
||||
inner = self.soup.find("div", id="main")
|
||||
selected = inner.select("div")
|
||||
# The <div id="inner"> tag was selected. The <div id="footer">
|
||||
# tag was not.
|
||||
self.assert_selects_ids(selected, ['inner', 'data1'])
|
||||
|
||||
def test_overspecified_child_id(self):
|
||||
self.assert_selects(".fancy #inner", ['inner'])
|
||||
self.assert_selects(".normal #inner", [])
|
||||
|
||||
def test_adjacent_sibling_selector(self):
|
||||
self.assert_selects('#p1 + h2', ['header2'])
|
||||
self.assert_selects('#p1 + h2 + p', ['pmulti'])
|
||||
self.assert_selects('#p1 + #header2 + .class1', ['pmulti'])
|
||||
assert [] == self.soup.select('#p1 + p')
|
||||
|
||||
def test_general_sibling_selector(self):
|
||||
self.assert_selects('#p1 ~ h2', ['header2', 'header3'])
|
||||
self.assert_selects('#p1 ~ #header2', ['header2'])
|
||||
self.assert_selects('#p1 ~ h2 + a', ['me'])
|
||||
self.assert_selects('#p1 ~ h2 + [rel="me"]', ['me'])
|
||||
assert [] == self.soup.select('#inner ~ h2')
|
||||
|
||||
def test_dangling_combinator(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('h1 >')
|
||||
|
||||
def test_sibling_combinator_wont_select_same_tag_twice(self):
|
||||
self.assert_selects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
|
||||
|
||||
# Test the selector grouping operator (the comma)
|
||||
def test_multiple_select(self):
|
||||
self.assert_selects('x, y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_with_no_space(self):
|
||||
self.assert_selects('x,y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_with_more_space(self):
|
||||
self.assert_selects('x, y', ['xid', 'yid'])
|
||||
|
||||
def test_multiple_select_duplicated(self):
|
||||
self.assert_selects('x, x', ['xid'])
|
||||
|
||||
def test_multiple_select_sibling(self):
|
||||
self.assert_selects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
|
||||
|
||||
def test_multiple_select_tag_and_direct_descendant(self):
|
||||
self.assert_selects('x, y > z', ['xid', 'zidb'])
|
||||
|
||||
def test_multiple_select_direct_descendant_and_tags(self):
|
||||
self.assert_selects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
|
||||
|
||||
def test_multiple_select_indirect_descendant(self):
|
||||
self.assert_selects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
|
||||
|
||||
def test_invalid_multiple_select(self):
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select(',x, y')
|
||||
with pytest.raises(SelectorSyntaxError):
|
||||
self.soup.select('x,,y')
|
||||
|
||||
def test_multiple_select_attrs(self):
|
||||
self.assert_selects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
|
||||
|
||||
def test_multiple_select_ids(self):
|
||||
self.assert_selects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
|
||||
|
||||
def test_multiple_select_nested(self):
|
||||
self.assert_selects('body > div > x, y > z', ['xid', 'zidb'])
|
||||
|
||||
def test_select_duplicate_elements(self):
|
||||
# When markup contains duplicate elements, a multiple select
|
||||
# will find all of them.
|
||||
markup = '<div class="c1"/><div class="c2"/><div class="c1"/>'
|
||||
soup = BeautifulSoup(markup, 'html.parser')
|
||||
selected = soup.select(".c1, .c2")
|
||||
assert 3 == len(selected)
|
||||
|
||||
# Verify that find_all finds the same elements, though because
|
||||
# of an implementation detail it finds them in a different
|
||||
# order.
|
||||
for element in soup.find_all(class_=['c1', 'c2']):
|
||||
assert element in selected
|
||||
|
||||
|
||||
class TestPersistence(SoupTest):
|
||||
"Testing features like pickle and deepcopy."
|
||||
|
||||
|
@ -668,12 +278,24 @@ class TestPersistence(SoupTest):
|
|||
loaded = pickle.loads(dumped)
|
||||
assert loaded.__class__ == BeautifulSoup
|
||||
assert loaded.decode() == self.tree.decode()
|
||||
|
||||
|
||||
def test_deepcopy_identity(self):
|
||||
# Making a deepcopy of a tree yields an identical tree.
|
||||
copied = copy.deepcopy(self.tree)
|
||||
assert copied.decode() == self.tree.decode()
|
||||
|
||||
def test_copy_deeply_nested_document(self):
|
||||
# This test verifies that copy and deepcopy don't involve any
|
||||
# recursive function calls. If they did, this test would
|
||||
# overflow the Python interpreter stack.
|
||||
limit = sys.getrecursionlimit() + 1
|
||||
markup = "<span>" * limit
|
||||
|
||||
soup = self.soup(markup)
|
||||
|
||||
copied = copy.copy(soup)
|
||||
copied = copy.deepcopy(soup)
|
||||
|
||||
def test_copy_preserves_encoding(self):
|
||||
soup = BeautifulSoup(b'<p> </p>', 'html.parser')
|
||||
encoding = soup.original_encoding
|
||||
|
|
|
@ -24,6 +24,7 @@ from bs4.builder import (
|
|||
from bs4.element import (
|
||||
Comment,
|
||||
SoupStrainer,
|
||||
PYTHON_SPECIFIC_ENCODINGS,
|
||||
Tag,
|
||||
NavigableString,
|
||||
)
|
||||
|
@ -210,6 +211,47 @@ class TestConstructor(SoupTest):
|
|||
assert [] == soup.string_container_stack
|
||||
|
||||
|
||||
class TestOutput(SoupTest):
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"eventual_encoding,actual_encoding", [
|
||||
("utf-8", "utf-8"),
|
||||
("utf-16", "utf-16"),
|
||||
]
|
||||
)
|
||||
def test_decode_xml_declaration(self, eventual_encoding, actual_encoding):
|
||||
# Most of the time, calling decode() on an XML document will
|
||||
# give you a document declaration that mentions the encoding
|
||||
# you intend to use when encoding the document as a
|
||||
# bytestring.
|
||||
soup = self.soup("<tag></tag>")
|
||||
soup.is_xml = True
|
||||
assert (f'<?xml version="1.0" encoding="{actual_encoding}"?>\n<tag></tag>'
|
||||
== soup.decode(eventual_encoding=eventual_encoding))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"eventual_encoding", [x for x in PYTHON_SPECIFIC_ENCODINGS] + [None]
|
||||
)
|
||||
def test_decode_xml_declaration_with_missing_or_python_internal_eventual_encoding(self, eventual_encoding):
|
||||
# But if you pass a Python internal encoding into decode(), or
|
||||
# omit the eventual_encoding altogether, the document
|
||||
# declaration won't mention any particular encoding.
|
||||
soup = BeautifulSoup("<tag></tag>", "html.parser")
|
||||
soup.is_xml = True
|
||||
assert (f'<?xml version="1.0"?>\n<tag></tag>'
|
||||
== soup.decode(eventual_encoding=eventual_encoding))
|
||||
|
||||
def test(self):
|
||||
# BeautifulSoup subclasses Tag and extends the decode() method.
|
||||
# Make sure the other Tag methods which call decode() call
|
||||
# it correctly.
|
||||
soup = self.soup("<tag></tag>")
|
||||
assert b"<tag></tag>" == soup.encode(encoding="utf-8")
|
||||
assert b"<tag></tag>" == soup.encode_contents(encoding="utf-8")
|
||||
assert "<tag></tag>" == soup.decode_contents()
|
||||
assert "<tag>\n</tag>\n" == soup.prettify()
|
||||
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
# Note that some of the tests in this class create BeautifulSoup
|
||||
# objects directly rather than using self.soup(). That's
|
||||
|
|
|
@ -6,7 +6,7 @@ __title__ = "packaging"
|
|||
__summary__ = "Core utilities for Python packages"
|
||||
__uri__ = "https://github.com/pypa/packaging"
|
||||
|
||||
__version__ = "23.0"
|
||||
__version__ = "23.1"
|
||||
|
||||
__author__ = "Donald Stufft and individual contributors"
|
||||
__email__ = "donald@stufft.io"
|
||||
|
|
|
@ -14,6 +14,8 @@ EF_ARM_ABI_VER5 = 0x05000000
|
|||
EF_ARM_ABI_FLOAT_HARD = 0x00000400
|
||||
|
||||
|
||||
# `os.PathLike` not a generic type until Python 3.9, so sticking with `str`
|
||||
# as the type for `path` until then.
|
||||
@contextlib.contextmanager
|
||||
def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]:
|
||||
try:
|
||||
|
|
|
@ -163,7 +163,11 @@ def _parse_extras(tokenizer: Tokenizer) -> List[str]:
|
|||
if not tokenizer.check("LEFT_BRACKET", peek=True):
|
||||
return []
|
||||
|
||||
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"):
|
||||
with tokenizer.enclosing_tokens(
|
||||
"LEFT_BRACKET",
|
||||
"RIGHT_BRACKET",
|
||||
around="extras",
|
||||
):
|
||||
tokenizer.consume("WS")
|
||||
extras = _parse_extras_list(tokenizer)
|
||||
tokenizer.consume("WS")
|
||||
|
@ -203,7 +207,11 @@ def _parse_specifier(tokenizer: Tokenizer) -> str:
|
|||
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
|
||||
| WS? version_many WS?
|
||||
"""
|
||||
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
|
||||
with tokenizer.enclosing_tokens(
|
||||
"LEFT_PARENTHESIS",
|
||||
"RIGHT_PARENTHESIS",
|
||||
around="version specifier",
|
||||
):
|
||||
tokenizer.consume("WS")
|
||||
parsed_specifiers = _parse_version_many(tokenizer)
|
||||
tokenizer.consume("WS")
|
||||
|
@ -217,7 +225,20 @@ def _parse_version_many(tokenizer: Tokenizer) -> str:
|
|||
"""
|
||||
parsed_specifiers = ""
|
||||
while tokenizer.check("SPECIFIER"):
|
||||
span_start = tokenizer.position
|
||||
parsed_specifiers += tokenizer.read().text
|
||||
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
|
||||
tokenizer.raise_syntax_error(
|
||||
".* suffix can only be used with `==` or `!=` operators",
|
||||
span_start=span_start,
|
||||
span_end=tokenizer.position + 1,
|
||||
)
|
||||
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
|
||||
tokenizer.raise_syntax_error(
|
||||
"Local version label can only be used with `==` or `!=` operators",
|
||||
span_start=span_start,
|
||||
span_end=tokenizer.position,
|
||||
)
|
||||
tokenizer.consume("WS")
|
||||
if not tokenizer.check("COMMA"):
|
||||
break
|
||||
|
@ -254,7 +275,11 @@ def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
|
|||
|
||||
tokenizer.consume("WS")
|
||||
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
|
||||
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
|
||||
with tokenizer.enclosing_tokens(
|
||||
"LEFT_PARENTHESIS",
|
||||
"RIGHT_PARENTHESIS",
|
||||
around="marker expression",
|
||||
):
|
||||
tokenizer.consume("WS")
|
||||
marker: MarkerAtom = _parse_marker(tokenizer)
|
||||
tokenizer.consume("WS")
|
||||
|
|
|
@ -78,6 +78,8 @@ DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = {
|
|||
"AT": r"\@",
|
||||
"URL": r"[^ \t]+",
|
||||
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
|
||||
"VERSION_PREFIX_TRAIL": r"\.\*",
|
||||
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
|
||||
"WS": r"[ \t]+",
|
||||
"END": r"$",
|
||||
}
|
||||
|
@ -167,21 +169,23 @@ class Tokenizer:
|
|||
)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def enclosing_tokens(self, open_token: str, close_token: str) -> Iterator[bool]:
|
||||
def enclosing_tokens(
|
||||
self, open_token: str, close_token: str, *, around: str
|
||||
) -> Iterator[None]:
|
||||
if self.check(open_token):
|
||||
open_position = self.position
|
||||
self.read()
|
||||
else:
|
||||
open_position = None
|
||||
|
||||
yield open_position is not None
|
||||
yield
|
||||
|
||||
if open_position is None:
|
||||
return
|
||||
|
||||
if not self.check(close_token):
|
||||
self.raise_syntax_error(
|
||||
f"Expected closing {close_token}",
|
||||
f"Expected matching {close_token} for {open_token}, after {around}",
|
||||
span_start=open_position,
|
||||
)
|
||||
|
||||
|
|
|
@ -8,7 +8,14 @@ import platform
|
|||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from ._parser import MarkerAtom, MarkerList, Op, Value, Variable, parse_marker
|
||||
from ._parser import (
|
||||
MarkerAtom,
|
||||
MarkerList,
|
||||
Op,
|
||||
Value,
|
||||
Variable,
|
||||
parse_marker as _parse_marker,
|
||||
)
|
||||
from ._tokenizer import ParserSyntaxError
|
||||
from .specifiers import InvalidSpecifier, Specifier
|
||||
from .utils import canonicalize_name
|
||||
|
@ -189,7 +196,7 @@ class Marker:
|
|||
# packaging.requirements.Requirement. If any additional logic is
|
||||
# added here, make sure to mirror/adapt Requirement.
|
||||
try:
|
||||
self._markers = _normalize_extra_values(parse_marker(marker))
|
||||
self._markers = _normalize_extra_values(_parse_marker(marker))
|
||||
# The attribute `_markers` can be described in terms of a recursive type:
|
||||
# MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]]
|
||||
#
|
||||
|
|
408
lib/packaging/metadata.py
Normal file
408
lib/packaging/metadata.py
Normal file
|
@ -0,0 +1,408 @@
|
|||
import email.feedparser
|
||||
import email.header
|
||||
import email.message
|
||||
import email.parser
|
||||
import email.policy
|
||||
import sys
|
||||
import typing
|
||||
from typing import Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
if sys.version_info >= (3, 8): # pragma: no cover
|
||||
from typing import TypedDict
|
||||
else: # pragma: no cover
|
||||
if typing.TYPE_CHECKING:
|
||||
from typing_extensions import TypedDict
|
||||
else:
|
||||
try:
|
||||
from typing_extensions import TypedDict
|
||||
except ImportError:
|
||||
|
||||
class TypedDict:
|
||||
def __init_subclass__(*_args, **_kwargs):
|
||||
pass
|
||||
|
||||
|
||||
# The RawMetadata class attempts to make as few assumptions about the underlying
|
||||
# serialization formats as possible. The idea is that as long as a serialization
|
||||
# formats offer some very basic primitives in *some* way then we can support
|
||||
# serializing to and from that format.
|
||||
class RawMetadata(TypedDict, total=False):
|
||||
"""A dictionary of raw core metadata.
|
||||
|
||||
Each field in core metadata maps to a key of this dictionary (when data is
|
||||
provided). The key is lower-case and underscores are used instead of dashes
|
||||
compared to the equivalent core metadata field. Any core metadata field that
|
||||
can be specified multiple times or can hold multiple values in a single
|
||||
field have a key with a plural name.
|
||||
|
||||
Core metadata fields that can be specified multiple times are stored as a
|
||||
list or dict depending on which is appropriate for the field. Any fields
|
||||
which hold multiple values in a single field are stored as a list.
|
||||
|
||||
"""
|
||||
|
||||
# Metadata 1.0 - PEP 241
|
||||
metadata_version: str
|
||||
name: str
|
||||
version: str
|
||||
platforms: List[str]
|
||||
summary: str
|
||||
description: str
|
||||
keywords: List[str]
|
||||
home_page: str
|
||||
author: str
|
||||
author_email: str
|
||||
license: str
|
||||
|
||||
# Metadata 1.1 - PEP 314
|
||||
supported_platforms: List[str]
|
||||
download_url: str
|
||||
classifiers: List[str]
|
||||
requires: List[str]
|
||||
provides: List[str]
|
||||
obsoletes: List[str]
|
||||
|
||||
# Metadata 1.2 - PEP 345
|
||||
maintainer: str
|
||||
maintainer_email: str
|
||||
requires_dist: List[str]
|
||||
provides_dist: List[str]
|
||||
obsoletes_dist: List[str]
|
||||
requires_python: str
|
||||
requires_external: List[str]
|
||||
project_urls: Dict[str, str]
|
||||
|
||||
# Metadata 2.0
|
||||
# PEP 426 attempted to completely revamp the metadata format
|
||||
# but got stuck without ever being able to build consensus on
|
||||
# it and ultimately ended up withdrawn.
|
||||
#
|
||||
# However, a number of tools had started emiting METADATA with
|
||||
# `2.0` Metadata-Version, so for historical reasons, this version
|
||||
# was skipped.
|
||||
|
||||
# Metadata 2.1 - PEP 566
|
||||
description_content_type: str
|
||||
provides_extra: List[str]
|
||||
|
||||
# Metadata 2.2 - PEP 643
|
||||
dynamic: List[str]
|
||||
|
||||
# Metadata 2.3 - PEP 685
|
||||
# No new fields were added in PEP 685, just some edge case were
|
||||
# tightened up to provide better interoptability.
|
||||
|
||||
|
||||
_STRING_FIELDS = {
|
||||
"author",
|
||||
"author_email",
|
||||
"description",
|
||||
"description_content_type",
|
||||
"download_url",
|
||||
"home_page",
|
||||
"license",
|
||||
"maintainer",
|
||||
"maintainer_email",
|
||||
"metadata_version",
|
||||
"name",
|
||||
"requires_python",
|
||||
"summary",
|
||||
"version",
|
||||
}
|
||||
|
||||
_LIST_STRING_FIELDS = {
|
||||
"classifiers",
|
||||
"dynamic",
|
||||
"obsoletes",
|
||||
"obsoletes_dist",
|
||||
"platforms",
|
||||
"provides",
|
||||
"provides_dist",
|
||||
"provides_extra",
|
||||
"requires",
|
||||
"requires_dist",
|
||||
"requires_external",
|
||||
"supported_platforms",
|
||||
}
|
||||
|
||||
|
||||
def _parse_keywords(data: str) -> List[str]:
|
||||
"""Split a string of comma-separate keyboards into a list of keywords."""
|
||||
return [k.strip() for k in data.split(",")]
|
||||
|
||||
|
||||
def _parse_project_urls(data: List[str]) -> Dict[str, str]:
|
||||
"""Parse a list of label/URL string pairings separated by a comma."""
|
||||
urls = {}
|
||||
for pair in data:
|
||||
# Our logic is slightly tricky here as we want to try and do
|
||||
# *something* reasonable with malformed data.
|
||||
#
|
||||
# The main thing that we have to worry about, is data that does
|
||||
# not have a ',' at all to split the label from the Value. There
|
||||
# isn't a singular right answer here, and we will fail validation
|
||||
# later on (if the caller is validating) so it doesn't *really*
|
||||
# matter, but since the missing value has to be an empty str
|
||||
# and our return value is dict[str, str], if we let the key
|
||||
# be the missing value, then they'd have multiple '' values that
|
||||
# overwrite each other in a accumulating dict.
|
||||
#
|
||||
# The other potentional issue is that it's possible to have the
|
||||
# same label multiple times in the metadata, with no solid "right"
|
||||
# answer with what to do in that case. As such, we'll do the only
|
||||
# thing we can, which is treat the field as unparseable and add it
|
||||
# to our list of unparsed fields.
|
||||
parts = [p.strip() for p in pair.split(",", 1)]
|
||||
parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items
|
||||
|
||||
# TODO: The spec doesn't say anything about if the keys should be
|
||||
# considered case sensitive or not... logically they should
|
||||
# be case-preserving and case-insensitive, but doing that
|
||||
# would open up more cases where we might have duplicate
|
||||
# entries.
|
||||
label, url = parts
|
||||
if label in urls:
|
||||
# The label already exists in our set of urls, so this field
|
||||
# is unparseable, and we can just add the whole thing to our
|
||||
# unparseable data and stop processing it.
|
||||
raise KeyError("duplicate labels in project urls")
|
||||
urls[label] = url
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str:
|
||||
"""Get the body of the message."""
|
||||
# If our source is a str, then our caller has managed encodings for us,
|
||||
# and we don't need to deal with it.
|
||||
if isinstance(source, str):
|
||||
payload: str = msg.get_payload()
|
||||
return payload
|
||||
# If our source is a bytes, then we're managing the encoding and we need
|
||||
# to deal with it.
|
||||
else:
|
||||
bpayload: bytes = msg.get_payload(decode=True)
|
||||
try:
|
||||
return bpayload.decode("utf8", "strict")
|
||||
except UnicodeDecodeError:
|
||||
raise ValueError("payload in an invalid encoding")
|
||||
|
||||
|
||||
# The various parse_FORMAT functions here are intended to be as lenient as
|
||||
# possible in their parsing, while still returning a correctly typed
|
||||
# RawMetadata.
|
||||
#
|
||||
# To aid in this, we also generally want to do as little touching of the
|
||||
# data as possible, except where there are possibly some historic holdovers
|
||||
# that make valid data awkward to work with.
|
||||
#
|
||||
# While this is a lower level, intermediate format than our ``Metadata``
|
||||
# class, some light touch ups can make a massive difference in usability.
|
||||
|
||||
# Map METADATA fields to RawMetadata.
|
||||
_EMAIL_TO_RAW_MAPPING = {
|
||||
"author": "author",
|
||||
"author-email": "author_email",
|
||||
"classifier": "classifiers",
|
||||
"description": "description",
|
||||
"description-content-type": "description_content_type",
|
||||
"download-url": "download_url",
|
||||
"dynamic": "dynamic",
|
||||
"home-page": "home_page",
|
||||
"keywords": "keywords",
|
||||
"license": "license",
|
||||
"maintainer": "maintainer",
|
||||
"maintainer-email": "maintainer_email",
|
||||
"metadata-version": "metadata_version",
|
||||
"name": "name",
|
||||
"obsoletes": "obsoletes",
|
||||
"obsoletes-dist": "obsoletes_dist",
|
||||
"platform": "platforms",
|
||||
"project-url": "project_urls",
|
||||
"provides": "provides",
|
||||
"provides-dist": "provides_dist",
|
||||
"provides-extra": "provides_extra",
|
||||
"requires": "requires",
|
||||
"requires-dist": "requires_dist",
|
||||
"requires-external": "requires_external",
|
||||
"requires-python": "requires_python",
|
||||
"summary": "summary",
|
||||
"supported-platform": "supported_platforms",
|
||||
"version": "version",
|
||||
}
|
||||
|
||||
|
||||
def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]:
|
||||
"""Parse a distribution's metadata.
|
||||
|
||||
This function returns a two-item tuple of dicts. The first dict is of
|
||||
recognized fields from the core metadata specification. Fields that can be
|
||||
parsed and translated into Python's built-in types are converted
|
||||
appropriately. All other fields are left as-is. Fields that are allowed to
|
||||
appear multiple times are stored as lists.
|
||||
|
||||
The second dict contains all other fields from the metadata. This includes
|
||||
any unrecognized fields. It also includes any fields which are expected to
|
||||
be parsed into a built-in type but were not formatted appropriately. Finally,
|
||||
any fields that are expected to appear only once but are repeated are
|
||||
included in this dict.
|
||||
|
||||
"""
|
||||
raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {}
|
||||
unparsed: Dict[str, List[str]] = {}
|
||||
|
||||
if isinstance(data, str):
|
||||
parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data)
|
||||
else:
|
||||
parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data)
|
||||
|
||||
# We have to wrap parsed.keys() in a set, because in the case of multiple
|
||||
# values for a key (a list), the key will appear multiple times in the
|
||||
# list of keys, but we're avoiding that by using get_all().
|
||||
for name in frozenset(parsed.keys()):
|
||||
# Header names in RFC are case insensitive, so we'll normalize to all
|
||||
# lower case to make comparisons easier.
|
||||
name = name.lower()
|
||||
|
||||
# We use get_all() here, even for fields that aren't multiple use,
|
||||
# because otherwise someone could have e.g. two Name fields, and we
|
||||
# would just silently ignore it rather than doing something about it.
|
||||
headers = parsed.get_all(name)
|
||||
|
||||
# The way the email module works when parsing bytes is that it
|
||||
# unconditionally decodes the bytes as ascii using the surrogateescape
|
||||
# handler. When you pull that data back out (such as with get_all() ),
|
||||
# it looks to see if the str has any surrogate escapes, and if it does
|
||||
# it wraps it in a Header object instead of returning the string.
|
||||
#
|
||||
# As such, we'll look for those Header objects, and fix up the encoding.
|
||||
value = []
|
||||
# Flag if we have run into any issues processing the headers, thus
|
||||
# signalling that the data belongs in 'unparsed'.
|
||||
valid_encoding = True
|
||||
for h in headers:
|
||||
# It's unclear if this can return more types than just a Header or
|
||||
# a str, so we'll just assert here to make sure.
|
||||
assert isinstance(h, (email.header.Header, str))
|
||||
|
||||
# If it's a header object, we need to do our little dance to get
|
||||
# the real data out of it. In cases where there is invalid data
|
||||
# we're going to end up with mojibake, but there's no obvious, good
|
||||
# way around that without reimplementing parts of the Header object
|
||||
# ourselves.
|
||||
#
|
||||
# That should be fine since, if mojibacked happens, this key is
|
||||
# going into the unparsed dict anyways.
|
||||
if isinstance(h, email.header.Header):
|
||||
# The Header object stores it's data as chunks, and each chunk
|
||||
# can be independently encoded, so we'll need to check each
|
||||
# of them.
|
||||
chunks: List[Tuple[bytes, Optional[str]]] = []
|
||||
for bin, encoding in email.header.decode_header(h):
|
||||
try:
|
||||
bin.decode("utf8", "strict")
|
||||
except UnicodeDecodeError:
|
||||
# Enable mojibake.
|
||||
encoding = "latin1"
|
||||
valid_encoding = False
|
||||
else:
|
||||
encoding = "utf8"
|
||||
chunks.append((bin, encoding))
|
||||
|
||||
# Turn our chunks back into a Header object, then let that
|
||||
# Header object do the right thing to turn them into a
|
||||
# string for us.
|
||||
value.append(str(email.header.make_header(chunks)))
|
||||
# This is already a string, so just add it.
|
||||
else:
|
||||
value.append(h)
|
||||
|
||||
# We've processed all of our values to get them into a list of str,
|
||||
# but we may have mojibake data, in which case this is an unparsed
|
||||
# field.
|
||||
if not valid_encoding:
|
||||
unparsed[name] = value
|
||||
continue
|
||||
|
||||
raw_name = _EMAIL_TO_RAW_MAPPING.get(name)
|
||||
if raw_name is None:
|
||||
# This is a bit of a weird situation, we've encountered a key that
|
||||
# we don't know what it means, so we don't know whether it's meant
|
||||
# to be a list or not.
|
||||
#
|
||||
# Since we can't really tell one way or another, we'll just leave it
|
||||
# as a list, even though it may be a single item list, because that's
|
||||
# what makes the most sense for email headers.
|
||||
unparsed[name] = value
|
||||
continue
|
||||
|
||||
# If this is one of our string fields, then we'll check to see if our
|
||||
# value is a list of a single item. If it is then we'll assume that
|
||||
# it was emitted as a single string, and unwrap the str from inside
|
||||
# the list.
|
||||
#
|
||||
# If it's any other kind of data, then we haven't the faintest clue
|
||||
# what we should parse it as, and we have to just add it to our list
|
||||
# of unparsed stuff.
|
||||
if raw_name in _STRING_FIELDS and len(value) == 1:
|
||||
raw[raw_name] = value[0]
|
||||
# If this is one of our list of string fields, then we can just assign
|
||||
# the value, since email *only* has strings, and our get_all() call
|
||||
# above ensures that this is a list.
|
||||
elif raw_name in _LIST_STRING_FIELDS:
|
||||
raw[raw_name] = value
|
||||
# Special Case: Keywords
|
||||
# The keywords field is implemented in the metadata spec as a str,
|
||||
# but it conceptually is a list of strings, and is serialized using
|
||||
# ", ".join(keywords), so we'll do some light data massaging to turn
|
||||
# this into what it logically is.
|
||||
elif raw_name == "keywords" and len(value) == 1:
|
||||
raw[raw_name] = _parse_keywords(value[0])
|
||||
# Special Case: Project-URL
|
||||
# The project urls is implemented in the metadata spec as a list of
|
||||
# specially-formatted strings that represent a key and a value, which
|
||||
# is fundamentally a mapping, however the email format doesn't support
|
||||
# mappings in a sane way, so it was crammed into a list of strings
|
||||
# instead.
|
||||
#
|
||||
# We will do a little light data massaging to turn this into a map as
|
||||
# it logically should be.
|
||||
elif raw_name == "project_urls":
|
||||
try:
|
||||
raw[raw_name] = _parse_project_urls(value)
|
||||
except KeyError:
|
||||
unparsed[name] = value
|
||||
# Nothing that we've done has managed to parse this, so it'll just
|
||||
# throw it in our unparseable data and move on.
|
||||
else:
|
||||
unparsed[name] = value
|
||||
|
||||
# We need to support getting the Description from the message payload in
|
||||
# addition to getting it from the the headers. This does mean, though, there
|
||||
# is the possibility of it being set both ways, in which case we put both
|
||||
# in 'unparsed' since we don't know which is right.
|
||||
try:
|
||||
payload = _get_payload(parsed, data)
|
||||
except ValueError:
|
||||
unparsed.setdefault("description", []).append(
|
||||
parsed.get_payload(decode=isinstance(data, bytes))
|
||||
)
|
||||
else:
|
||||
if payload:
|
||||
# Check to see if we've already got a description, if so then both
|
||||
# it, and this body move to unparseable.
|
||||
if "description" in raw:
|
||||
description_header = cast(str, raw.pop("description"))
|
||||
unparsed.setdefault("description", []).extend(
|
||||
[description_header, payload]
|
||||
)
|
||||
elif "description" in unparsed:
|
||||
unparsed["description"].append(payload)
|
||||
else:
|
||||
raw["description"] = payload
|
||||
|
||||
# We need to cast our `raw` to a metadata, because a TypedDict only support
|
||||
# literal key names, but we're computing our key names on purpose, but the
|
||||
# way this function is implemented, our `TypedDict` can only have valid key
|
||||
# names.
|
||||
return cast(RawMetadata, raw), unparsed
|
|
@ -5,7 +5,7 @@
|
|||
import urllib.parse
|
||||
from typing import Any, List, Optional, Set
|
||||
|
||||
from ._parser import parse_requirement
|
||||
from ._parser import parse_requirement as _parse_requirement
|
||||
from ._tokenizer import ParserSyntaxError
|
||||
from .markers import Marker, _normalize_extra_values
|
||||
from .specifiers import SpecifierSet
|
||||
|
@ -32,7 +32,7 @@ class Requirement:
|
|||
|
||||
def __init__(self, requirement_string: str) -> None:
|
||||
try:
|
||||
parsed = parse_requirement(requirement_string)
|
||||
parsed = _parse_requirement(requirement_string)
|
||||
except ParserSyntaxError as e:
|
||||
raise InvalidRequirement(str(e)) from e
|
||||
|
||||
|
|
|
@ -252,7 +252,8 @@ class Specifier(BaseSpecifier):
|
|||
# Store whether or not this Specifier should accept prereleases
|
||||
self._prereleases = prereleases
|
||||
|
||||
@property
|
||||
# https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515
|
||||
@property # type: ignore[override]
|
||||
def prereleases(self) -> bool:
|
||||
# If there is an explicit prereleases set for this, then we'll just
|
||||
# blindly use that.
|
||||
|
@ -398,7 +399,9 @@ class Specifier(BaseSpecifier):
|
|||
# We need special logic to handle prefix matching
|
||||
if spec.endswith(".*"):
|
||||
# In the case of prefix matching we want to ignore local segment.
|
||||
normalized_prospective = canonicalize_version(prospective.public)
|
||||
normalized_prospective = canonicalize_version(
|
||||
prospective.public, strip_trailing_zero=False
|
||||
)
|
||||
# Get the normalized version string ignoring the trailing .*
|
||||
normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False)
|
||||
# Split the spec out by dots, and pretend that there is an implicit
|
||||
|
|
|
@ -111,7 +111,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]:
|
|||
|
||||
|
||||
def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
|
||||
value = sysconfig.get_config_var(name)
|
||||
value: Union[int, str, None] = sysconfig.get_config_var(name)
|
||||
if value is None and warn:
|
||||
logger.debug(
|
||||
"Config variable '%s' is unset, Python ABI tag may be incorrect", name
|
||||
|
@ -120,7 +120,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]:
|
|||
|
||||
|
||||
def _normalize_string(string: str) -> str:
|
||||
return string.replace(".", "_").replace("-", "_")
|
||||
return string.replace(".", "_").replace("-", "_").replace(" ", "_")
|
||||
|
||||
|
||||
def _abi3_applies(python_version: PythonVersion) -> bool:
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
import collections
|
||||
import itertools
|
||||
import re
|
||||
from typing import Callable, Optional, SupportsInt, Tuple, Union
|
||||
from typing import Any, Callable, Optional, SupportsInt, Tuple, Union
|
||||
|
||||
from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType
|
||||
|
||||
|
@ -63,7 +63,7 @@ class InvalidVersion(ValueError):
|
|||
|
||||
|
||||
class _BaseVersion:
|
||||
_key: CmpKey
|
||||
_key: Tuple[Any, ...]
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return hash(self._key)
|
||||
|
@ -179,6 +179,7 @@ class Version(_BaseVersion):
|
|||
"""
|
||||
|
||||
_regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
|
||||
_key: CmpKey
|
||||
|
||||
def __init__(self, version: str) -> None:
|
||||
"""Initialize a Version object.
|
||||
|
|
|
@ -22,8 +22,8 @@ from pytz.tzfile import build_tzinfo
|
|||
|
||||
|
||||
# The IANA (nee Olson) database is updated several times a year.
|
||||
OLSON_VERSION = '2022g'
|
||||
VERSION = '2022.7.1' # pip compatible version number.
|
||||
OLSON_VERSION = '2023c'
|
||||
VERSION = '2023.3' # pip compatible version number.
|
||||
__version__ = VERSION
|
||||
|
||||
OLSEN_VERSION = OLSON_VERSION # Old releases had this misspelling
|
||||
|
@ -1311,7 +1311,6 @@ common_timezones = \
|
|||
'America/Whitehorse',
|
||||
'America/Winnipeg',
|
||||
'America/Yakutat',
|
||||
'America/Yellowknife',
|
||||
'Antarctica/Casey',
|
||||
'Antarctica/Davis',
|
||||
'Antarctica/DumontDUrville',
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -238,7 +238,7 @@ SY Syria
|
|||
SZ Eswatini (Swaziland)
|
||||
TC Turks & Caicos Is
|
||||
TD Chad
|
||||
TF French Southern Territories
|
||||
TF French S. Terr.
|
||||
TG Togo
|
||||
TH Thailand
|
||||
TJ Tajikistan
|
||||
|
|
|
@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
|
|||
# Any additional leap seconds will come after this.
|
||||
# This Expires line is commented out for now,
|
||||
# so that pre-2020a zic implementations do not reject this file.
|
||||
#Expires 2023 Jun 28 00:00:00
|
||||
#Expires 2023 Dec 28 00:00:00
|
||||
|
||||
# POSIX timestamps for the data in this file:
|
||||
#updated 1467936000 (2016-07-08 00:00:00 UTC)
|
||||
#expires 1687910400 (2023-06-28 00:00:00 UTC)
|
||||
#expires 1703721600 (2023-12-28 00:00:00 UTC)
|
||||
|
||||
# Updated through IERS Bulletin C64
|
||||
# File expires on: 28 June 2023
|
||||
# Updated through IERS Bulletin C65
|
||||
# File expires on: 28 December 2023
|
||||
|
|
|
@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
|
|||
R K 2014 o - Jun 26 24 0 -
|
||||
R K 2014 o - Jul 31 24 1 S
|
||||
R K 2014 o - S lastTh 24 0 -
|
||||
R K 2023 ma - Ap lastF 0 1 S
|
||||
R K 2023 ma - O lastTh 24 0 -
|
||||
Z Africa/Cairo 2:5:9 - LMT 1900 O
|
||||
2 K EE%sT
|
||||
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
|
||||
|
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
|
|||
R M 2022 o - Mar 27 3 -1 -
|
||||
R M 2022 o - May 8 2 0 -
|
||||
R M 2023 o - Mar 19 3 -1 -
|
||||
R M 2023 o - Ap 30 2 0 -
|
||||
R M 2023 o - Ap 23 2 0 -
|
||||
R M 2024 o - Mar 10 3 -1 -
|
||||
R M 2024 o - Ap 14 2 0 -
|
||||
R M 2025 o - F 23 3 -1 -
|
||||
|
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
|
|||
R M 2029 o - D 30 3 -1 -
|
||||
R M 2030 o - F 10 2 0 -
|
||||
R M 2030 o - D 22 3 -1 -
|
||||
R M 2031 o - F 2 2 0 -
|
||||
R M 2031 o - Ja 26 2 0 -
|
||||
R M 2031 o - D 14 3 -1 -
|
||||
R M 2032 o - Ja 18 2 0 -
|
||||
R M 2032 o - N 28 3 -1 -
|
||||
|
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
|
|||
R M 2037 o - O 4 3 -1 -
|
||||
R M 2037 o - N 15 2 0 -
|
||||
R M 2038 o - S 26 3 -1 -
|
||||
R M 2038 o - N 7 2 0 -
|
||||
R M 2038 o - O 31 2 0 -
|
||||
R M 2039 o - S 18 3 -1 -
|
||||
R M 2039 o - O 23 2 0 -
|
||||
R M 2040 o - S 2 3 -1 -
|
||||
|
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
|
|||
R M 2045 o - Jul 9 3 -1 -
|
||||
R M 2045 o - Au 20 2 0 -
|
||||
R M 2046 o - Jul 1 3 -1 -
|
||||
R M 2046 o - Au 12 2 0 -
|
||||
R M 2046 o - Au 5 2 0 -
|
||||
R M 2047 o - Jun 23 3 -1 -
|
||||
R M 2047 o - Jul 28 2 0 -
|
||||
R M 2048 o - Jun 7 3 -1 -
|
||||
|
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
|
|||
R M 2053 o - Ap 13 3 -1 -
|
||||
R M 2053 o - May 25 2 0 -
|
||||
R M 2054 o - Ap 5 3 -1 -
|
||||
R M 2054 o - May 17 2 0 -
|
||||
R M 2054 o - May 10 2 0 -
|
||||
R M 2055 o - Mar 28 3 -1 -
|
||||
R M 2055 o - May 2 2 0 -
|
||||
R M 2056 o - Mar 12 3 -1 -
|
||||
|
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
|
|||
R M 2061 o - Ja 16 3 -1 -
|
||||
R M 2061 o - F 27 2 0 -
|
||||
R M 2062 o - Ja 8 3 -1 -
|
||||
R M 2062 o - F 19 2 0 -
|
||||
R M 2062 o - F 12 2 0 -
|
||||
R M 2062 o - D 31 3 -1 -
|
||||
R M 2063 o - F 4 2 0 -
|
||||
R M 2063 o - D 16 3 -1 -
|
||||
|
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
|
|||
R M 2068 o - O 21 3 -1 -
|
||||
R M 2068 o - D 2 2 0 -
|
||||
R M 2069 o - O 13 3 -1 -
|
||||
R M 2069 o - N 24 2 0 -
|
||||
R M 2069 o - N 17 2 0 -
|
||||
R M 2070 o - O 5 3 -1 -
|
||||
R M 2070 o - N 9 2 0 -
|
||||
R M 2071 o - S 20 3 -1 -
|
||||
|
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
|
|||
R M 2076 o - Jul 26 3 -1 -
|
||||
R M 2076 o - S 6 2 0 -
|
||||
R M 2077 o - Jul 18 3 -1 -
|
||||
R M 2077 o - Au 29 2 0 -
|
||||
R M 2077 o - Au 22 2 0 -
|
||||
R M 2078 o - Jul 10 3 -1 -
|
||||
R M 2078 o - Au 14 2 0 -
|
||||
R M 2079 o - Jun 25 3 -1 -
|
||||
|
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
|
|||
R M 2081 o - Jun 1 3 -1 -
|
||||
R M 2081 o - Jul 13 2 0 -
|
||||
R M 2082 o - May 24 3 -1 -
|
||||
R M 2082 o - Jul 5 2 0 -
|
||||
R M 2082 o - Jun 28 2 0 -
|
||||
R M 2083 o - May 16 3 -1 -
|
||||
R M 2083 o - Jun 20 2 0 -
|
||||
R M 2084 o - Ap 30 3 -1 -
|
||||
R M 2084 o - Jun 11 2 0 -
|
||||
R M 2085 o - Ap 22 3 -1 -
|
||||
R M 2085 o - Jun 3 2 0 -
|
||||
R M 2085 o - May 27 2 0 -
|
||||
R M 2086 o - Ap 14 3 -1 -
|
||||
R M 2086 o - May 19 2 0 -
|
||||
R M 2087 o - Mar 30 3 -1 -
|
||||
|
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
|
|||
R P 2020 o - O 24 1 0 -
|
||||
R P 2021 o - O 29 1 0 -
|
||||
R P 2022 o - Mar 27 0 1 S
|
||||
R P 2022 ma - O Sa<=30 2 0 -
|
||||
R P 2023 ma - Mar Sa<=30 2 1 S
|
||||
R P 2022 2035 - O Sa<=30 2 0 -
|
||||
R P 2023 o - Ap 29 2 1 S
|
||||
R P 2024 o - Ap 13 2 1 S
|
||||
R P 2025 o - Ap 5 2 1 S
|
||||
R P 2026 2054 - Mar Sa<=30 2 1 S
|
||||
R P 2036 o - O 18 2 0 -
|
||||
R P 2037 o - O 10 2 0 -
|
||||
R P 2038 o - S 25 2 0 -
|
||||
R P 2039 o - S 17 2 0 -
|
||||
R P 2039 o - O 22 2 1 S
|
||||
R P 2039 2067 - O Sa<=30 2 0 -
|
||||
R P 2040 o - S 1 2 0 -
|
||||
R P 2040 o - O 13 2 1 S
|
||||
R P 2041 o - Au 24 2 0 -
|
||||
R P 2041 o - S 28 2 1 S
|
||||
R P 2042 o - Au 16 2 0 -
|
||||
R P 2042 o - S 20 2 1 S
|
||||
R P 2043 o - Au 1 2 0 -
|
||||
R P 2043 o - S 12 2 1 S
|
||||
R P 2044 o - Jul 23 2 0 -
|
||||
R P 2044 o - Au 27 2 1 S
|
||||
R P 2045 o - Jul 15 2 0 -
|
||||
R P 2045 o - Au 19 2 1 S
|
||||
R P 2046 o - Jun 30 2 0 -
|
||||
R P 2046 o - Au 11 2 1 S
|
||||
R P 2047 o - Jun 22 2 0 -
|
||||
R P 2047 o - Jul 27 2 1 S
|
||||
R P 2048 o - Jun 6 2 0 -
|
||||
R P 2048 o - Jul 18 2 1 S
|
||||
R P 2049 o - May 29 2 0 -
|
||||
R P 2049 o - Jul 3 2 1 S
|
||||
R P 2050 o - May 21 2 0 -
|
||||
R P 2050 o - Jun 25 2 1 S
|
||||
R P 2051 o - May 6 2 0 -
|
||||
R P 2051 o - Jun 17 2 1 S
|
||||
R P 2052 o - Ap 27 2 0 -
|
||||
R P 2052 o - Jun 1 2 1 S
|
||||
R P 2053 o - Ap 12 2 0 -
|
||||
R P 2053 o - May 24 2 1 S
|
||||
R P 2054 o - Ap 4 2 0 -
|
||||
R P 2054 o - May 16 2 1 S
|
||||
R P 2055 o - May 1 2 1 S
|
||||
R P 2056 o - Ap 22 2 1 S
|
||||
R P 2057 o - Ap 7 2 1 S
|
||||
R P 2058 ma - Mar Sa<=30 2 1 S
|
||||
R P 2068 o - O 20 2 0 -
|
||||
R P 2069 o - O 12 2 0 -
|
||||
R P 2070 o - O 4 2 0 -
|
||||
R P 2071 o - S 19 2 0 -
|
||||
R P 2072 o - S 10 2 0 -
|
||||
R P 2072 o - O 15 2 1 S
|
||||
R P 2073 o - S 2 2 0 -
|
||||
R P 2073 o - O 7 2 1 S
|
||||
R P 2074 o - Au 18 2 0 -
|
||||
R P 2074 o - S 29 2 1 S
|
||||
R P 2075 o - Au 10 2 0 -
|
||||
R P 2075 o - S 14 2 1 S
|
||||
R P 2075 ma - O Sa<=30 2 0 -
|
||||
R P 2076 o - Jul 25 2 0 -
|
||||
R P 2076 o - S 5 2 1 S
|
||||
R P 2077 o - Jul 17 2 0 -
|
||||
R P 2077 o - Au 28 2 1 S
|
||||
R P 2078 o - Jul 9 2 0 -
|
||||
R P 2078 o - Au 13 2 1 S
|
||||
R P 2079 o - Jun 24 2 0 -
|
||||
R P 2079 o - Au 5 2 1 S
|
||||
R P 2080 o - Jun 15 2 0 -
|
||||
R P 2080 o - Jul 20 2 1 S
|
||||
R P 2081 o - Jun 7 2 0 -
|
||||
R P 2081 o - Jul 12 2 1 S
|
||||
R P 2082 o - May 23 2 0 -
|
||||
R P 2082 o - Jul 4 2 1 S
|
||||
R P 2083 o - May 15 2 0 -
|
||||
R P 2083 o - Jun 19 2 1 S
|
||||
R P 2084 o - Ap 29 2 0 -
|
||||
R P 2084 o - Jun 10 2 1 S
|
||||
R P 2085 o - Ap 21 2 0 -
|
||||
R P 2085 o - Jun 2 2 1 S
|
||||
R P 2086 o - Ap 13 2 0 -
|
||||
R P 2086 o - May 18 2 1 S
|
||||
Z Asia/Gaza 2:17:52 - LMT 1900 O
|
||||
2 Z EET/EEST 1948 May 15
|
||||
2 K EE%sT 1967 Jun 5
|
||||
|
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
|
|||
-1 E -01/+00
|
||||
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
|
||||
-3 - -03 1980 Ap 6 2
|
||||
-3 E -03/-02 2023 Mar 25 22
|
||||
-2 - -02
|
||||
-3 E -03/-02 2023 O 29 1u
|
||||
-2 E -02/-01
|
||||
Z America/Thule -4:35:8 - LMT 1916 Jul 28
|
||||
-4 Th A%sT
|
||||
Z Europe/Tallinn 1:39 - LMT 1880
|
||||
|
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
|
|||
3 - +03 1930 Jun 21
|
||||
4 - +04 1961 N 11
|
||||
4 R +04/+05 1988 Mar 27 2s
|
||||
3 R +03/+04 1991 Mar 31 2s
|
||||
3 R MSK/MSD 1991 Mar 31 2s
|
||||
4 - +04 1992 Mar 29 2s
|
||||
3 R +03/+04 2011 Mar 27 2s
|
||||
4 - +04 2014 O 26 2s
|
||||
3 - +03 2018 O 28 2s
|
||||
3 R MSK/MSD 2011 Mar 27 2s
|
||||
4 - MSK 2014 O 26 2s
|
||||
3 - MSK 2018 O 28 2s
|
||||
4 - +04 2020 D 27 2s
|
||||
3 - +03
|
||||
3 - MSK
|
||||
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 R +04/+05 1988 Mar 27 2s
|
||||
|
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
|
|||
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 R +04/+05 1989 Mar 26 2s
|
||||
3 R +03/+04 1991 Mar 31 2s
|
||||
3 R MSK/MSD 1991 Mar 31 2s
|
||||
4 - +04 1992 Mar 29 2s
|
||||
3 R +03/+04 2011 Mar 27 2s
|
||||
4 - +04 2014 O 26 2s
|
||||
3 - +03
|
||||
3 R MSK/MSD 2011 Mar 27 2s
|
||||
4 - MSK 2014 O 26 2s
|
||||
3 - MSK
|
||||
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 - +04 1935 Ja 27
|
||||
|
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
|
|||
-5 - EST 2000 N 5
|
||||
-6 - CST 2001 Ap 1 3
|
||||
-7 C M%sT
|
||||
Z America/Yellowknife 0 - -00 1935
|
||||
-7 Y M%sT 1980
|
||||
-7 C M%sT
|
||||
Z America/Inuvik 0 - -00 1953
|
||||
-8 Y P%sT 1979 Ap lastSu 2
|
||||
-7 Y M%sT 1980
|
||||
|
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
|
|||
L America/Tijuana America/Santa_Isabel
|
||||
L America/Denver America/Shiprock
|
||||
L America/Toronto America/Thunder_Bay
|
||||
L America/Edmonton America/Yellowknife
|
||||
L Pacific/Auckland Antarctica/South_Pole
|
||||
L Asia/Shanghai Asia/Chongqing
|
||||
L Asia/Shanghai Asia/Harbin
|
||||
|
|
|
@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
|
|||
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
|
||||
CA +5024-10439 America/Regina CST - SK (most areas)
|
||||
CA +5017-10750 America/Swift_Current CST - SK (midwest)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
|
||||
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
|
||||
CA +6227-11421 America/Yellowknife Mountain - NT (central)
|
||||
CA +682059-1334300 America/Inuvik Mountain - NT (west)
|
||||
CA +4906-11631 America/Creston MST - BC (Creston)
|
||||
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
|
||||
|
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
|
|||
CH +4723+00832 Europe/Zurich
|
||||
CI +0519-00402 Africa/Abidjan
|
||||
CK -2114-15946 Pacific/Rarotonga
|
||||
CL -3327-07040 America/Santiago Chile (most areas)
|
||||
CL -3327-07040 America/Santiago most of Chile
|
||||
CL -5309-07055 America/Punta_Arenas Region of Magallanes
|
||||
CL -2709-10926 Pacific/Easter Easter Island
|
||||
CM +0403+00942 Africa/Douala
|
||||
|
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
|
|||
CV +1455-02331 Atlantic/Cape_Verde
|
||||
CW +1211-06900 America/Curacao
|
||||
CX -1025+10543 Indian/Christmas
|
||||
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
|
||||
CY +3510+03322 Asia/Nicosia most of Cyprus
|
||||
CY +3507+03357 Asia/Famagusta Northern Cyprus
|
||||
CZ +5005+01426 Europe/Prague
|
||||
DE +5230+01322 Europe/Berlin Germany (most areas)
|
||||
DE +5230+01322 Europe/Berlin most of Germany
|
||||
DE +4742+00841 Europe/Busingen Busingen
|
||||
DJ +1136+04309 Africa/Djibouti
|
||||
DK +5540+01235 Europe/Copenhagen
|
||||
|
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
|
|||
GG +492717-0023210 Europe/Guernsey
|
||||
GH +0533-00013 Africa/Accra
|
||||
GI +3608-00521 Europe/Gibraltar
|
||||
GL +6411-05144 America/Nuuk Greenland (most areas)
|
||||
GL +6411-05144 America/Nuuk most of Greenland
|
||||
GL +7646-01840 America/Danmarkshavn National Park (east coast)
|
||||
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
|
||||
GL +7634-06847 America/Thule Thule/Pituffik
|
||||
|
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
|
|||
KR +3733+12658 Asia/Seoul
|
||||
KW +2920+04759 Asia/Kuwait
|
||||
KY +1918-08123 America/Cayman
|
||||
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
|
||||
KZ +4315+07657 Asia/Almaty most of Kazakhstan
|
||||
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
|
||||
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
|
||||
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
|
||||
|
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
|
|||
ME +4226+01916 Europe/Podgorica
|
||||
MF +1804-06305 America/Marigot
|
||||
MG -1855+04731 Indian/Antananarivo
|
||||
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas)
|
||||
MH +0709+17112 Pacific/Majuro most of Marshall Islands
|
||||
MH +0905+16720 Pacific/Kwajalein Kwajalein
|
||||
MK +4159+02126 Europe/Skopje
|
||||
ML +1239-00800 Africa/Bamako
|
||||
MM +1647+09610 Asia/Yangon
|
||||
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
|
||||
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
|
||||
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
|
||||
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
|
||||
MO +221150+1133230 Asia/Macau
|
||||
|
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
|
|||
NP +2743+08519 Asia/Kathmandu
|
||||
NR -0031+16655 Pacific/Nauru
|
||||
NU -1901-16955 Pacific/Niue
|
||||
NZ -3652+17446 Pacific/Auckland New Zealand (most areas)
|
||||
NZ -3652+17446 Pacific/Auckland most of New Zealand
|
||||
NZ -4357-17633 Pacific/Chatham Chatham Islands
|
||||
OM +2336+05835 Asia/Muscat
|
||||
PA +0858-07932 America/Panama
|
||||
|
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
|
|||
PF -1732-14934 Pacific/Tahiti Society Islands
|
||||
PF -0900-13930 Pacific/Marquesas Marquesas Islands
|
||||
PF -2308-13457 Pacific/Gambier Gambier Islands
|
||||
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas)
|
||||
PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
|
||||
PG -0613+15534 Pacific/Bougainville Bougainville
|
||||
PH +1435+12100 Asia/Manila
|
||||
PK +2452+06703 Asia/Karachi
|
||||
|
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
|
|||
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
|
||||
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
|
||||
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
|
||||
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
|
||||
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
|
||||
RW -0157+03004 Africa/Kigali
|
||||
|
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
|
|||
TV -0831+17913 Pacific/Funafuti
|
||||
TW +2503+12130 Asia/Taipei
|
||||
TZ -0648+03917 Africa/Dar_es_Salaam
|
||||
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
|
||||
UA +5026+03031 Europe/Kyiv most of Ukraine
|
||||
UG +0019+03225 Africa/Kampala
|
||||
UM +2813-17722 Pacific/Midway Midway Islands
|
||||
UM +1917+16637 Pacific/Wake Wake Island
|
||||
|
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
|
|||
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
|
||||
US +394421-1045903 America/Denver Mountain (most areas)
|
||||
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
|
||||
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo)
|
||||
US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
|
||||
US +340308-1181434 America/Los_Angeles Pacific
|
||||
US +611305-1495401 America/Anchorage Alaska (most areas)
|
||||
US +581807-1342511 America/Juneau Alaska - Juneau area
|
||||
|
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
|
|||
US +550737-1313435 America/Metlakatla Alaska - Annette Island
|
||||
US +593249-1394338 America/Yakutat Alaska - Yakutat
|
||||
US +643004-1652423 America/Nome Alaska (west)
|
||||
US +515248-1763929 America/Adak Aleutian Islands
|
||||
US +515248-1763929 America/Adak Alaska - western Aleutians
|
||||
US +211825-1575130 Pacific/Honolulu Hawaii
|
||||
UY -345433-0561245 America/Montevideo
|
||||
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
|
||||
|
|
|
@ -18,7 +18,10 @@
|
|||
# Please see the theory.html file for how these names are chosen.
|
||||
# If multiple timezones overlap a country, each has a row in the
|
||||
# table, with each column 1 containing the country code.
|
||||
# 4. Comments; present if and only if a country has multiple timezones.
|
||||
# 4. Comments; present if and only if countries have multiple timezones,
|
||||
# and useful only for those countries. For example, the comments
|
||||
# for the row with countries CH,DE,LI and name Europe/Zurich
|
||||
# are useful only for DE, since CH and LI have no other timezones.
|
||||
#
|
||||
# If a timezone covers multiple countries, the most-populous city is used,
|
||||
# and that country is listed first in column 1; any other countries
|
||||
|
@ -34,7 +37,7 @@
|
|||
#country-
|
||||
#codes coordinates TZ comments
|
||||
AD +4230+00131 Europe/Andorra
|
||||
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is
|
||||
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
|
||||
AF +3431+06912 Asia/Kabul
|
||||
AL +4120+01950 Europe/Tirane
|
||||
AM +4011+04430 Asia/Yerevan
|
||||
|
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
|
|||
AQ -6734-06808 Antarctica/Rothera Rothera
|
||||
AQ -720041+0023206 Antarctica/Troll Troll
|
||||
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
|
||||
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
|
||||
AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
|
||||
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
|
||||
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
|
||||
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
|
||||
|
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
|
|||
AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
|
||||
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
|
||||
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
|
||||
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway
|
||||
AS,UM -1416-17042 Pacific/Pago_Pago Midway
|
||||
AT +4813+01620 Europe/Vienna
|
||||
AU -3133+15905 Australia/Lord_Howe Lord Howe Island
|
||||
AU -5430+15857 Antarctica/Macquarie Macquarie Island
|
||||
|
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
|
|||
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
|
||||
CA +4606-06447 America/Moncton Atlantic - New Brunswick
|
||||
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
|
||||
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas
|
||||
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
|
||||
CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
|
||||
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
|
||||
CA +744144-0944945 America/Resolute Central - NU (Resolute)
|
||||
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
|
||||
CA +5024-10439 America/Regina CST - SK (most areas)
|
||||
CA +5017-10750 America/Swift_Current CST - SK (midwest)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
|
||||
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
|
||||
CA +6227-11421 America/Yellowknife Mountain - NT (central)
|
||||
CA +682059-1334300 America/Inuvik Mountain - NT (west)
|
||||
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
|
||||
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
|
||||
CA +6043-13503 America/Whitehorse MST - Yukon (east)
|
||||
CA +6404-13925 America/Dawson MST - Yukon (west)
|
||||
CA +4916-12307 America/Vancouver Pacific - BC (most areas)
|
||||
CH,DE,LI +4723+00832 Europe/Zurich Swiss time
|
||||
CH,DE,LI +4723+00832 Europe/Zurich Büsingen
|
||||
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
|
||||
CK -2114-15946 Pacific/Rarotonga
|
||||
CL -3327-07040 America/Santiago Chile (most areas)
|
||||
CL -3327-07040 America/Santiago most of Chile
|
||||
CL -5309-07055 America/Punta_Arenas Region of Magallanes
|
||||
CL -2709-10926 Pacific/Easter Easter Island
|
||||
CN +3114+12128 Asia/Shanghai Beijing Time
|
||||
|
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
|
|||
CR +0956-08405 America/Costa_Rica
|
||||
CU +2308-08222 America/Havana
|
||||
CV +1455-02331 Atlantic/Cape_Verde
|
||||
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
|
||||
CY +3510+03322 Asia/Nicosia most of Cyprus
|
||||
CY +3507+03357 Asia/Famagusta Northern Cyprus
|
||||
CZ,SK +5005+01426 Europe/Prague
|
||||
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia
|
||||
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
|
||||
DO +1828-06954 America/Santo_Domingo
|
||||
DZ +3647+00303 Africa/Algiers
|
||||
EC -0210-07950 America/Guayaquil Ecuador (mainland)
|
||||
|
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
|
|||
GE +4143+04449 Asia/Tbilisi
|
||||
GF +0456-05220 America/Cayenne
|
||||
GI +3608-00521 Europe/Gibraltar
|
||||
GL +6411-05144 America/Nuuk Greenland (most areas)
|
||||
GL +6411-05144 America/Nuuk most of Greenland
|
||||
GL +7646-01840 America/Danmarkshavn National Park (east coast)
|
||||
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
|
||||
GL +7634-06847 America/Thule Thule/Pituffik
|
||||
|
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
|
|||
JP +353916+1394441 Asia/Tokyo
|
||||
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
|
||||
KG +4254+07436 Asia/Bishkek
|
||||
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake
|
||||
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
|
||||
KI -0247-17143 Pacific/Kanton Phoenix Islands
|
||||
KI +0152-15720 Pacific/Kiritimati Line Islands
|
||||
KP +3901+12545 Asia/Pyongyang
|
||||
KR +3733+12658 Asia/Seoul
|
||||
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
|
||||
KZ +4315+07657 Asia/Almaty most of Kazakhstan
|
||||
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
|
||||
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
|
||||
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
|
||||
|
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
|
|||
MD +4700+02850 Europe/Chisinau
|
||||
MH +0905+16720 Pacific/Kwajalein Kwajalein
|
||||
MM,CC +1647+09610 Asia/Yangon
|
||||
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
|
||||
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
|
||||
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
|
||||
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
|
||||
MO +221150+1133230 Asia/Macau
|
||||
MQ +1436-06105 America/Martinique
|
||||
MT +3554+01431 Europe/Malta
|
||||
MU -2010+05730 Indian/Mauritius
|
||||
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I
|
||||
MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
|
||||
MX +1924-09909 America/Mexico_City Central Mexico
|
||||
MX +2105-08646 America/Cancun Quintana Roo
|
||||
MX +2058-08937 America/Merida Campeche, Yucatán
|
||||
|
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
|
|||
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
|
||||
MX +2904-11058 America/Hermosillo Sonora
|
||||
MX +3232-11701 America/Tijuana Baja California
|
||||
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei
|
||||
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
|
||||
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
|
||||
NA -2234+01706 Africa/Windhoek
|
||||
NC -2216+16627 Pacific/Noumea
|
||||
|
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
|
|||
NU -1901-16955 Pacific/Niue
|
||||
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
|
||||
NZ -4357-17633 Pacific/Chatham Chatham Islands
|
||||
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
|
||||
PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
|
||||
PE -1203-07703 America/Lima
|
||||
PF -1732-14934 Pacific/Tahiti Society Islands
|
||||
PF -0900-13930 Pacific/Marquesas Marquesas Islands
|
||||
|
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
|
|||
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
|
||||
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
|
||||
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
|
||||
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
|
||||
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
|
||||
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa
|
||||
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei
|
||||
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
|
||||
SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
|
||||
SD +1536+03232 Africa/Khartoum
|
||||
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia
|
||||
SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
|
||||
SR +0550-05510 America/Paramaribo
|
||||
SS +0451+03137 Africa/Juba
|
||||
ST +0020+00644 Africa/Sao_Tome
|
||||
|
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
|
|||
SY +3330+03618 Asia/Damascus
|
||||
TC +2128-07108 America/Grand_Turk
|
||||
TD +1207+01503 Africa/Ndjamena
|
||||
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas)
|
||||
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
|
||||
TJ +3835+06848 Asia/Dushanbe
|
||||
TK -0922-17114 Pacific/Fakaofo
|
||||
TL -0833+12535 Asia/Dili
|
||||
|
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
|
|||
TO -210800-1751200 Pacific/Tongatapu
|
||||
TR +4101+02858 Europe/Istanbul
|
||||
TW +2503+12130 Asia/Taipei
|
||||
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
|
||||
UA +5026+03031 Europe/Kyiv most of Ukraine
|
||||
US +404251-0740023 America/New_York Eastern (most areas)
|
||||
US +421953-0830245 America/Detroit Eastern - MI (most areas)
|
||||
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
|
||||
|
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
|
|||
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
|
||||
US +394421-1045903 America/Denver Mountain (most areas)
|
||||
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
|
||||
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC
|
||||
US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
|
||||
US +340308-1181434 America/Los_Angeles Pacific
|
||||
US +611305-1495401 America/Anchorage Alaska (most areas)
|
||||
US +581807-1342511 America/Juneau Alaska - Juneau area
|
||||
|
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
|
|||
US +550737-1313435 America/Metlakatla Alaska - Annette Island
|
||||
US +593249-1394338 America/Yakutat Alaska - Yakutat
|
||||
US +643004-1652423 America/Nome Alaska (west)
|
||||
US +515248-1763929 America/Adak Aleutian Islands
|
||||
US,UM +211825-1575130 Pacific/Honolulu Hawaii
|
||||
US +515248-1763929 America/Adak Alaska - western Aleutians
|
||||
US +211825-1575130 Pacific/Honolulu Hawaii
|
||||
UY -345433-0561245 America/Montevideo
|
||||
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
|
||||
UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
|
||||
VE +1030-06656 America/Caracas
|
||||
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south)
|
||||
VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
|
||||
VU -1740+16825 Pacific/Efate
|
||||
WS -1350-17144 Pacific/Apia
|
||||
ZA,LS,SZ -2615+02800 Africa/Johannesburg
|
||||
|
|
|
@ -118,7 +118,7 @@ Serializing multiple objects to JSON lines (newline-delimited JSON)::
|
|||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
__version__ = '3.18.3'
|
||||
__version__ = '3.19.1'
|
||||
__all__ = [
|
||||
'dump', 'dumps', 'load', 'loads',
|
||||
'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
|
||||
|
@ -149,28 +149,10 @@ def _import_c_make_encoder():
|
|||
except ImportError:
|
||||
return None
|
||||
|
||||
_default_encoder = JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=True,
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
use_decimal=True,
|
||||
namedtuple_as_object=True,
|
||||
tuple_as_array=True,
|
||||
iterable_as_array=False,
|
||||
bigint_as_string=False,
|
||||
item_sort_key=None,
|
||||
for_json=False,
|
||||
ignore_nan=False,
|
||||
int_as_string_bitcount=None,
|
||||
)
|
||||
_default_encoder = JSONEncoder()
|
||||
|
||||
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
allow_nan=False, cls=None, indent=None, separators=None,
|
||||
encoding='utf-8', default=None, use_decimal=True,
|
||||
namedtuple_as_object=True, tuple_as_array=True,
|
||||
bigint_as_string=False, sort_keys=False, item_sort_key=None,
|
||||
|
@ -187,10 +169,10 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
contain non-ASCII characters, so long as they do not need to be escaped
|
||||
by JSON. When it is true, all non-ASCII characters are escaped.
|
||||
|
||||
If *allow_nan* is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
|
||||
in strict compliance of the original JSON specification, instead of using
|
||||
the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
|
||||
If *allow_nan* is true (default: ``False``), then out of range ``float``
|
||||
values (``nan``, ``inf``, ``-inf``) will be serialized to
|
||||
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
|
||||
instead of raising a ValueError. See
|
||||
*ignore_nan* for ECMA-262 compliant behavior.
|
||||
|
||||
If *indent* is a string, then JSON array elements and object members
|
||||
|
@ -258,7 +240,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
check_circular and not allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
encoding == 'utf-8' and default is None and use_decimal
|
||||
and namedtuple_as_object and tuple_as_array and not iterable_as_array
|
||||
|
@ -292,7 +274,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
|
||||
|
||||
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
allow_nan=False, cls=None, indent=None, separators=None,
|
||||
encoding='utf-8', default=None, use_decimal=True,
|
||||
namedtuple_as_object=True, tuple_as_array=True,
|
||||
bigint_as_string=False, sort_keys=False, item_sort_key=None,
|
||||
|
@ -312,10 +294,11 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
for container types will be skipped and a circular reference will
|
||||
result in an ``OverflowError`` (or worse).
|
||||
|
||||
If ``allow_nan`` is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
|
||||
strict compliance of the JSON specification, instead of using the
|
||||
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
|
||||
If *allow_nan* is true (default: ``False``), then out of range ``float``
|
||||
values (``nan``, ``inf``, ``-inf``) will be serialized to
|
||||
their JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``)
|
||||
instead of raising a ValueError. See
|
||||
*ignore_nan* for ECMA-262 compliant behavior.
|
||||
|
||||
If ``indent`` is a string, then JSON array elements and object members
|
||||
will be pretty-printed with a newline followed by that string repeated
|
||||
|
@ -383,7 +366,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
check_circular and not allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
encoding == 'utf-8' and default is None and use_decimal
|
||||
and namedtuple_as_object and tuple_as_array and not iterable_as_array
|
||||
|
@ -412,14 +395,12 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
|||
**kw).encode(obj)
|
||||
|
||||
|
||||
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
|
||||
object_pairs_hook=None)
|
||||
_default_decoder = JSONDecoder()
|
||||
|
||||
|
||||
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None,
|
||||
use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
|
||||
**kw):
|
||||
use_decimal=False, allow_nan=False, **kw):
|
||||
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
|
||||
a JSON document as `str` or `bytes`) to a Python object.
|
||||
|
||||
|
@ -442,23 +423,27 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
|
|||
takes priority.
|
||||
|
||||
*parse_float*, if specified, will be called with the string of every
|
||||
JSON float to be decoded. By default, this is equivalent to
|
||||
JSON float to be decoded. By default, this is equivalent to
|
||||
``float(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. :class:`decimal.Decimal`).
|
||||
|
||||
*parse_int*, if specified, will be called with the string of every
|
||||
JSON int to be decoded. By default, this is equivalent to
|
||||
JSON int to be decoded. By default, this is equivalent to
|
||||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
*allow_nan*, if True (default false), will allow the parser to
|
||||
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
|
||||
and enable the use of the deprecated *parse_constant*.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then it implies
|
||||
parse_float=decimal.Decimal for parity with ``dump``.
|
||||
|
||||
*parse_constant*, if specified, will be
|
||||
called with one of the following strings: ``'-Infinity'``,
|
||||
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
|
||||
as it is rare to parse non-compliant JSON containing these values.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
|
||||
of subclassing whenever possible.
|
||||
|
@ -468,12 +453,12 @@ def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
|
|||
encoding=encoding, cls=cls, object_hook=object_hook,
|
||||
parse_float=parse_float, parse_int=parse_int,
|
||||
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
|
||||
use_decimal=use_decimal, **kw)
|
||||
use_decimal=use_decimal, allow_nan=allow_nan, **kw)
|
||||
|
||||
|
||||
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None,
|
||||
use_decimal=False, **kw):
|
||||
use_decimal=False, allow_nan=False, **kw):
|
||||
"""Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
|
||||
document) to a Python object.
|
||||
|
||||
|
@ -505,14 +490,18 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
|||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
*allow_nan*, if True (default false), will allow the parser to
|
||||
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``
|
||||
and enable the use of the deprecated *parse_constant*.
|
||||
|
||||
If *use_decimal* is true (default: ``False``) then it implies
|
||||
parse_float=decimal.Decimal for parity with ``dump``.
|
||||
|
||||
*parse_constant*, if specified, will be
|
||||
called with one of the following strings: ``'-Infinity'``,
|
||||
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
|
||||
as it is rare to parse non-compliant JSON containing these values.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
|
||||
of subclassing whenever possible.
|
||||
|
@ -521,7 +510,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
|||
if (cls is None and encoding is None and object_hook is None and
|
||||
parse_int is None and parse_float is None and
|
||||
parse_constant is None and object_pairs_hook is None
|
||||
and not use_decimal and not kw):
|
||||
and not use_decimal and not allow_nan and not kw):
|
||||
return _default_decoder.decode(s)
|
||||
if cls is None:
|
||||
cls = JSONDecoder
|
||||
|
@ -539,6 +528,8 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
|||
if parse_float is not None:
|
||||
raise TypeError("use_decimal=True implies parse_float=Decimal")
|
||||
kw['parse_float'] = Decimal
|
||||
if allow_nan:
|
||||
kw['allow_nan'] = True
|
||||
return cls(encoding=encoding, **kw).decode(s)
|
||||
|
||||
|
||||
|
@ -560,22 +551,9 @@ def _toggle_speedups(enabled):
|
|||
scan.make_scanner = scan.py_make_scanner
|
||||
dec.make_scanner = scan.make_scanner
|
||||
global _default_decoder
|
||||
_default_decoder = JSONDecoder(
|
||||
encoding=None,
|
||||
object_hook=None,
|
||||
object_pairs_hook=None,
|
||||
)
|
||||
_default_decoder = JSONDecoder()
|
||||
global _default_encoder
|
||||
_default_encoder = JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=True,
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
encoding='utf-8',
|
||||
default=None,
|
||||
)
|
||||
_default_encoder = JSONEncoder()
|
||||
|
||||
def simple_first(kv):
|
||||
"""Helper function to pass to item_sort_key to sort simple
|
||||
|
|
|
@ -46,9 +46,35 @@ BACKSLASH = {
|
|||
|
||||
DEFAULT_ENCODING = "utf-8"
|
||||
|
||||
if hasattr(sys, 'get_int_max_str_digits'):
|
||||
bounded_int = int
|
||||
else:
|
||||
def bounded_int(s, INT_MAX_STR_DIGITS=4300):
|
||||
"""Backport of the integer string length conversion limitation
|
||||
|
||||
https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
|
||||
"""
|
||||
if len(s) > INT_MAX_STR_DIGITS:
|
||||
raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
|
||||
return int(s)
|
||||
|
||||
|
||||
def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
|
||||
"""Scan a four digit hex number from s[end:end + 4]
|
||||
"""
|
||||
msg = "Invalid \\uXXXX escape sequence"
|
||||
esc = s[end:end + 4]
|
||||
if not _m(esc):
|
||||
raise JSONDecodeError(msg, s, end - 2)
|
||||
try:
|
||||
return int(esc, 16), end + 4
|
||||
except ValueError:
|
||||
raise JSONDecodeError(msg, s, end - 2)
|
||||
|
||||
def py_scanstring(s, end, encoding=None, strict=True,
|
||||
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
|
||||
_PY3=PY3, _maxunicode=sys.maxunicode):
|
||||
_PY3=PY3, _maxunicode=sys.maxunicode,
|
||||
_scan_four_digit_hex=scan_four_digit_hex):
|
||||
"""Scan the string s for a JSON string. End is the index of the
|
||||
character in s after the quote that started the JSON string.
|
||||
Unescapes all valid JSON string escape sequences and raises ValueError
|
||||
|
@ -67,6 +93,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
|
|||
if chunk is None:
|
||||
raise JSONDecodeError(
|
||||
"Unterminated string starting at", s, begin)
|
||||
prev_end = end
|
||||
end = chunk.end()
|
||||
content, terminator = chunk.groups()
|
||||
# Content is contains zero or more unescaped string characters
|
||||
|
@ -81,7 +108,7 @@ def py_scanstring(s, end, encoding=None, strict=True,
|
|||
elif terminator != '\\':
|
||||
if strict:
|
||||
msg = "Invalid control character %r at"
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
raise JSONDecodeError(msg, s, prev_end)
|
||||
else:
|
||||
_append(terminator)
|
||||
continue
|
||||
|
@ -100,35 +127,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
|
|||
end += 1
|
||||
else:
|
||||
# Unicode escape sequence
|
||||
msg = "Invalid \\uXXXX escape sequence"
|
||||
esc = s[end + 1:end + 5]
|
||||
escX = esc[1:2]
|
||||
if len(esc) != 4 or escX == 'x' or escX == 'X':
|
||||
raise JSONDecodeError(msg, s, end - 1)
|
||||
try:
|
||||
uni = int(esc, 16)
|
||||
except ValueError:
|
||||
raise JSONDecodeError(msg, s, end - 1)
|
||||
if uni < 0 or uni > _maxunicode:
|
||||
raise JSONDecodeError(msg, s, end - 1)
|
||||
end += 5
|
||||
uni, end = _scan_four_digit_hex(s, end + 1)
|
||||
# Check for surrogate pair on UCS-4 systems
|
||||
# Note that this will join high/low surrogate pairs
|
||||
# but will also pass unpaired surrogates through
|
||||
if (_maxunicode > 65535 and
|
||||
uni & 0xfc00 == 0xd800 and
|
||||
s[end:end + 2] == '\\u'):
|
||||
esc2 = s[end + 2:end + 6]
|
||||
escX = esc2[1:2]
|
||||
if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
|
||||
try:
|
||||
uni2 = int(esc2, 16)
|
||||
except ValueError:
|
||||
raise JSONDecodeError(msg, s, end)
|
||||
if uni2 & 0xfc00 == 0xdc00:
|
||||
uni = 0x10000 + (((uni - 0xd800) << 10) |
|
||||
(uni2 - 0xdc00))
|
||||
end += 6
|
||||
uni2, end2 = _scan_four_digit_hex(s, end + 2)
|
||||
if uni2 & 0xfc00 == 0xdc00:
|
||||
uni = 0x10000 + (((uni - 0xd800) << 10) |
|
||||
(uni2 - 0xdc00))
|
||||
end = end2
|
||||
char = unichr(uni)
|
||||
# Append the unescaped character
|
||||
_append(char)
|
||||
|
@ -169,7 +179,7 @@ def JSONObject(state, encoding, strict, scan_once, object_hook,
|
|||
return pairs, end + 1
|
||||
elif nextchar != '"':
|
||||
raise JSONDecodeError(
|
||||
"Expecting property name enclosed in double quotes",
|
||||
"Expecting property name enclosed in double quotes or '}'",
|
||||
s, end)
|
||||
end += 1
|
||||
while True:
|
||||
|
@ -296,14 +306,15 @@ class JSONDecoder(object):
|
|||
| null | None |
|
||||
+---------------+-------------------+
|
||||
|
||||
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
||||
When allow_nan=True, it also understands
|
||||
``NaN``, ``Infinity``, and ``-Infinity`` as
|
||||
their corresponding ``float`` values, which is outside the JSON spec.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, encoding=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, strict=True,
|
||||
object_pairs_hook=None):
|
||||
object_pairs_hook=None, allow_nan=False):
|
||||
"""
|
||||
*encoding* determines the encoding used to interpret any
|
||||
:class:`str` objects decoded by this instance (``'utf-8'`` by
|
||||
|
@ -336,10 +347,13 @@ class JSONDecoder(object):
|
|||
``int(num_str)``. This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. :class:`float`).
|
||||
|
||||
*parse_constant*, if specified, will be called with one of the
|
||||
following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
|
||||
can be used to raise an exception if invalid JSON numbers are
|
||||
encountered.
|
||||
*allow_nan*, if True (default false), will allow the parser to
|
||||
accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
|
||||
|
||||
*parse_constant*, if specified, will be
|
||||
called with one of the following strings: ``'-Infinity'``,
|
||||
``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
|
||||
as it is rare to parse non-compliant JSON containing these values.
|
||||
|
||||
*strict* controls the parser's behavior when it encounters an
|
||||
invalid control character in a string. The default setting of
|
||||
|
@ -353,8 +367,8 @@ class JSONDecoder(object):
|
|||
self.object_hook = object_hook
|
||||
self.object_pairs_hook = object_pairs_hook
|
||||
self.parse_float = parse_float or float
|
||||
self.parse_int = parse_int or int
|
||||
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
|
||||
self.parse_int = parse_int or bounded_int
|
||||
self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
|
||||
self.strict = strict
|
||||
self.parse_object = JSONObject
|
||||
self.parse_array = JSONArray
|
||||
|
|
|
@ -5,7 +5,7 @@ import re
|
|||
from operator import itemgetter
|
||||
# Do not import Decimal directly to avoid reload issues
|
||||
import decimal
|
||||
from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
|
||||
from .compat import binary_type, text_type, string_types, integer_types, PY3
|
||||
def _import_speedups():
|
||||
try:
|
||||
from . import _speedups
|
||||
|
@ -140,7 +140,7 @@ class JSONEncoder(object):
|
|||
key_separator = ': '
|
||||
|
||||
def __init__(self, skipkeys=False, ensure_ascii=True,
|
||||
check_circular=True, allow_nan=True, sort_keys=False,
|
||||
check_circular=True, allow_nan=False, sort_keys=False,
|
||||
indent=None, separators=None, encoding='utf-8', default=None,
|
||||
use_decimal=True, namedtuple_as_object=True,
|
||||
tuple_as_array=True, bigint_as_string=False,
|
||||
|
@ -161,10 +161,11 @@ class JSONEncoder(object):
|
|||
prevent an infinite recursion (which would cause an OverflowError).
|
||||
Otherwise, no such check takes place.
|
||||
|
||||
If allow_nan is true, then NaN, Infinity, and -Infinity will be
|
||||
encoded as such. This behavior is not JSON specification compliant,
|
||||
but is consistent with most JavaScript based encoders and decoders.
|
||||
Otherwise, it will be a ValueError to encode such floats.
|
||||
If allow_nan is true (default: False), then out of range float
|
||||
values (nan, inf, -inf) will be serialized to
|
||||
their JavaScript equivalents (NaN, Infinity, -Infinity)
|
||||
instead of raising a ValueError. See
|
||||
ignore_nan for ECMA-262 compliant behavior.
|
||||
|
||||
If sort_keys is true, then the output of dictionaries will be
|
||||
sorted by key; this is useful for regression tests to ensure
|
||||
|
@ -294,7 +295,7 @@ class JSONEncoder(object):
|
|||
# This doesn't pass the iterator directly to ''.join() because the
|
||||
# exceptions aren't as detailed. The list call should be roughly
|
||||
# equivalent to the PySequence_Fast that ''.join() would do.
|
||||
chunks = self.iterencode(o, _one_shot=True)
|
||||
chunks = self.iterencode(o)
|
||||
if not isinstance(chunks, (list, tuple)):
|
||||
chunks = list(chunks)
|
||||
if self.ensure_ascii:
|
||||
|
@ -302,7 +303,7 @@ class JSONEncoder(object):
|
|||
else:
|
||||
return u''.join(chunks)
|
||||
|
||||
def iterencode(self, o, _one_shot=False):
|
||||
def iterencode(self, o):
|
||||
"""Encode the given object and yield each string
|
||||
representation as available.
|
||||
|
||||
|
@ -356,8 +357,7 @@ class JSONEncoder(object):
|
|||
key_memo = {}
|
||||
int_as_string_bitcount = (
|
||||
53 if self.bigint_as_string else self.int_as_string_bitcount)
|
||||
if (_one_shot and c_make_encoder is not None
|
||||
and self.indent is None):
|
||||
if (c_make_encoder is not None and self.indent is None):
|
||||
_iterencode = c_make_encoder(
|
||||
markers, self.default, _encoder, self.indent,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
|
@ -370,7 +370,7 @@ class JSONEncoder(object):
|
|||
_iterencode = _make_iterencode(
|
||||
markers, self.default, _encoder, self.indent, floatstr,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, _one_shot, self.use_decimal,
|
||||
self.skipkeys, self.use_decimal,
|
||||
self.namedtuple_as_object, self.tuple_as_array,
|
||||
int_as_string_bitcount,
|
||||
self.item_sort_key, self.encoding, self.for_json,
|
||||
|
@ -398,14 +398,14 @@ class JSONEncoderForHTML(JSONEncoder):
|
|||
def encode(self, o):
|
||||
# Override JSONEncoder.encode because it has hacks for
|
||||
# performance that make things more complicated.
|
||||
chunks = self.iterencode(o, True)
|
||||
chunks = self.iterencode(o)
|
||||
if self.ensure_ascii:
|
||||
return ''.join(chunks)
|
||||
else:
|
||||
return u''.join(chunks)
|
||||
|
||||
def iterencode(self, o, _one_shot=False):
|
||||
chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
|
||||
def iterencode(self, o):
|
||||
chunks = super(JSONEncoderForHTML, self).iterencode(o)
|
||||
for chunk in chunks:
|
||||
chunk = chunk.replace('&', '\\u0026')
|
||||
chunk = chunk.replace('<', '\\u003c')
|
||||
|
@ -419,7 +419,7 @@ class JSONEncoderForHTML(JSONEncoder):
|
|||
|
||||
|
||||
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
|
||||
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
|
||||
_key_separator, _item_separator, _sort_keys, _skipkeys,
|
||||
_use_decimal, _namedtuple_as_object, _tuple_as_array,
|
||||
_int_as_string_bitcount, _item_sort_key,
|
||||
_encoding,_for_json,
|
||||
|
|
|
@ -60,11 +60,11 @@ def py_make_scanner(context):
|
|||
else:
|
||||
res = parse_int(integer)
|
||||
return res, m.end()
|
||||
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
|
||||
elif parse_constant and nextchar == 'N' and string[idx:idx + 3] == 'NaN':
|
||||
return parse_constant('NaN'), idx + 3
|
||||
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
|
||||
elif parse_constant and nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
|
||||
return parse_constant('Infinity'), idx + 8
|
||||
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
|
||||
elif parse_constant and nextchar == '-' and string[idx:idx + 9] == '-Infinity':
|
||||
return parse_constant('-Infinity'), idx + 9
|
||||
else:
|
||||
raise JSONDecodeError(errmsg, string, idx)
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import absolute_import
|
|||
import decimal
|
||||
from unittest import TestCase
|
||||
|
||||
import sys
|
||||
import simplejson as json
|
||||
from simplejson.compat import StringIO, b, binary_type
|
||||
from simplejson import OrderedDict
|
||||
|
@ -117,3 +118,10 @@ class TestDecode(TestCase):
|
|||
diff = id(x) - id(y)
|
||||
self.assertRaises(ValueError, j.scan_once, y, diff)
|
||||
self.assertRaises(ValueError, j.raw_decode, y, i)
|
||||
|
||||
def test_bounded_int(self):
|
||||
# SJ-PT-23-03, limit quadratic number parsing per Python 3.11
|
||||
max_str_digits = getattr(sys, 'get_int_max_str_digits', lambda: 4300)()
|
||||
s = '1' + '0' * (max_str_digits - 1)
|
||||
self.assertEqual(json.loads(s), int(s))
|
||||
self.assertRaises(ValueError, json.loads, s + '0')
|
||||
|
|
|
@ -145,7 +145,7 @@ class TestFail(TestCase):
|
|||
('["spam', 'Unterminated string starting at', 1),
|
||||
('["spam"', "Expecting ',' delimiter", 7),
|
||||
('["spam",', 'Expecting value', 8),
|
||||
('{', 'Expecting property name enclosed in double quotes', 1),
|
||||
('{', "Expecting property name enclosed in double quotes or '}'", 1),
|
||||
('{"', 'Unterminated string starting at', 1),
|
||||
('{"spam', 'Unterminated string starting at', 1),
|
||||
('{"spam"', "Expecting ':' delimiter", 7),
|
||||
|
@ -156,6 +156,8 @@ class TestFail(TestCase):
|
|||
('"', 'Unterminated string starting at', 0),
|
||||
('"spam', 'Unterminated string starting at', 0),
|
||||
('[,', "Expecting value", 1),
|
||||
('--', 'Expecting value', 0),
|
||||
('"\x18d', "Invalid control character %r", 1),
|
||||
]
|
||||
for data, msg, idx in test_cases:
|
||||
try:
|
||||
|
|
|
@ -7,9 +7,9 @@ from simplejson.decoder import NaN, PosInf, NegInf
|
|||
class TestFloat(TestCase):
|
||||
def test_degenerates_allow(self):
|
||||
for inf in (PosInf, NegInf):
|
||||
self.assertEqual(json.loads(json.dumps(inf)), inf)
|
||||
self.assertEqual(json.loads(json.dumps(inf, allow_nan=True), allow_nan=True), inf)
|
||||
# Python 2.5 doesn't have math.isnan
|
||||
nan = json.loads(json.dumps(NaN))
|
||||
nan = json.loads(json.dumps(NaN, allow_nan=True), allow_nan=True)
|
||||
self.assertTrue((0 + nan) != nan)
|
||||
|
||||
def test_degenerates_ignore(self):
|
||||
|
@ -19,6 +19,9 @@ class TestFloat(TestCase):
|
|||
def test_degenerates_deny(self):
|
||||
for f in (PosInf, NegInf, NaN):
|
||||
self.assertRaises(ValueError, json.dumps, f, allow_nan=False)
|
||||
for s in ('Infinity', '-Infinity', 'NaN'):
|
||||
self.assertRaises(ValueError, json.loads, s, allow_nan=False)
|
||||
self.assertRaises(ValueError, json.loads, s)
|
||||
|
||||
def test_floats(self):
|
||||
for num in [1617161771.7650001, math.pi, math.pi**100,
|
||||
|
|
|
@ -132,7 +132,9 @@ class TestScanString(TestCase):
|
|||
self.assertRaises(ValueError,
|
||||
scanstring, '\\ud834\\x0123"', 0, None, True)
|
||||
|
||||
self.assertRaises(json.JSONDecodeError, scanstring, "\\u-123", 0, None, True)
|
||||
self.assertRaises(json.JSONDecodeError, scanstring, '\\u-123"', 0, None, True)
|
||||
# SJ-PT-23-01: Invalid Handling of Broken Unicode Escape Sequences
|
||||
self.assertRaises(json.JSONDecodeError, scanstring, '\\u EDD"', 0, None, True)
|
||||
|
||||
def test_issue3623(self):
|
||||
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
|
||||
|
|
|
@ -32,7 +32,7 @@ from . import css_match as cm
|
|||
from . import css_types as ct
|
||||
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Optional, Any, Iterator, Iterable
|
||||
from typing import Any, Iterator, Iterable
|
||||
|
||||
__all__ = (
|
||||
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
||||
|
@ -45,10 +45,10 @@ SoupSieve = cm.SoupSieve
|
|||
|
||||
def compile( # noqa: A001
|
||||
pattern: str,
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> cm.SoupSieve:
|
||||
"""Compile CSS pattern."""
|
||||
|
@ -79,10 +79,10 @@ def purge() -> None:
|
|||
def closest(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Match closest ancestor."""
|
||||
|
@ -93,10 +93,10 @@ def closest(
|
|||
def match(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> bool:
|
||||
"""Match node."""
|
||||
|
@ -107,10 +107,10 @@ def match(
|
|||
def filter( # noqa: A001
|
||||
select: str,
|
||||
iterable: Iterable['bs4.Tag'],
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> list['bs4.Tag']:
|
||||
"""Filter list of nodes."""
|
||||
|
@ -121,10 +121,10 @@ def filter( # noqa: A001
|
|||
def select_one(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> 'bs4.Tag':
|
||||
"""Select a single tag."""
|
||||
|
@ -135,11 +135,11 @@ def select_one(
|
|||
def select(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> list['bs4.Tag']:
|
||||
"""Select the specified tags."""
|
||||
|
@ -150,11 +150,11 @@ def select(
|
|||
def iselect(
|
||||
select: str,
|
||||
tag: 'bs4.Tag',
|
||||
namespaces: Optional[dict[str, str]] = None,
|
||||
namespaces: dict[str, str] | None = None,
|
||||
limit: int = 0,
|
||||
flags: int = 0,
|
||||
*,
|
||||
custom: Optional[dict[str, str]] = None,
|
||||
custom: dict[str, str] | None = None,
|
||||
**kwargs: Any
|
||||
) -> Iterator['bs4.Tag']:
|
||||
"""Iterate the specified tags."""
|
||||
|
|
|
@ -193,5 +193,5 @@ def parse_version(ver: str) -> Version:
|
|||
return Version(major, minor, micro, release, pre, post, dev)
|
||||
|
||||
|
||||
__version_info__ = Version(2, 4, 0, "final")
|
||||
__version_info__ = Version(2, 4, 1, "final")
|
||||
__version__ = __version_info__._get_canonical()
|
||||
|
|
|
@ -6,7 +6,7 @@ import re
|
|||
from . import css_types as ct
|
||||
import unicodedata
|
||||
import bs4 # type: ignore[import]
|
||||
from typing import Iterator, Iterable, Any, Optional, Callable, Sequence, cast # noqa: F401
|
||||
from typing import Iterator, Iterable, Any, Callable, Sequence, cast # noqa: F401
|
||||
|
||||
# Empty tag pattern (whitespace okay)
|
||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||
|
@ -171,7 +171,7 @@ class _DocumentNav:
|
|||
def get_children(
|
||||
self,
|
||||
el: bs4.Tag,
|
||||
start: Optional[int] = None,
|
||||
start: int | None = None,
|
||||
reverse: bool = False,
|
||||
tags: bool = True,
|
||||
no_iframe: bool = False
|
||||
|
@ -239,22 +239,22 @@ class _DocumentNav:
|
|||
return parent
|
||||
|
||||
@staticmethod
|
||||
def get_tag_name(el: bs4.Tag) -> Optional[str]:
|
||||
def get_tag_name(el: bs4.Tag) -> str | None:
|
||||
"""Get tag."""
|
||||
|
||||
return cast(Optional[str], el.name)
|
||||
return cast('str | None', el.name)
|
||||
|
||||
@staticmethod
|
||||
def get_prefix_name(el: bs4.Tag) -> Optional[str]:
|
||||
def get_prefix_name(el: bs4.Tag) -> str | None:
|
||||
"""Get prefix."""
|
||||
|
||||
return cast(Optional[str], el.prefix)
|
||||
return cast('str | None', el.prefix)
|
||||
|
||||
@staticmethod
|
||||
def get_uri(el: bs4.Tag) -> Optional[str]:
|
||||
def get_uri(el: bs4.Tag) -> str | None:
|
||||
"""Get namespace `URI`."""
|
||||
|
||||
return cast(Optional[str], el.namespace)
|
||||
return cast('str | None', el.namespace)
|
||||
|
||||
@classmethod
|
||||
def get_next(cls, el: bs4.Tag, tags: bool = True) -> bs4.PageElement:
|
||||
|
@ -287,7 +287,7 @@ class _DocumentNav:
|
|||
return bool(ns and ns == NS_XHTML)
|
||||
|
||||
@staticmethod
|
||||
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[Optional[str], Optional[str]]:
|
||||
def split_namespace(el: bs4.Tag, attr_name: str) -> tuple[str | None, str | None]:
|
||||
"""Return namespace and attribute name without the prefix."""
|
||||
|
||||
return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
|
||||
|
@ -330,8 +330,8 @@ class _DocumentNav:
|
|||
cls,
|
||||
el: bs4.Tag,
|
||||
name: str,
|
||||
default: Optional[str | Sequence[str]] = None
|
||||
) -> Optional[str | Sequence[str]]:
|
||||
default: str | Sequence[str] | None = None
|
||||
) -> str | Sequence[str] | None:
|
||||
"""Get attribute by name."""
|
||||
|
||||
value = default
|
||||
|
@ -348,7 +348,7 @@ class _DocumentNav:
|
|||
return value
|
||||
|
||||
@classmethod
|
||||
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, Optional[str | Sequence[str]]]]:
|
||||
def iter_attributes(cls, el: bs4.Tag) -> Iterator[tuple[str, str | Sequence[str] | None]]:
|
||||
"""Iterate attributes."""
|
||||
|
||||
for k, v in el.attrs.items():
|
||||
|
@ -424,10 +424,10 @@ class Inputs:
|
|||
return 0 <= minutes <= 59
|
||||
|
||||
@classmethod
|
||||
def parse_value(cls, itype: str, value: Optional[str]) -> Optional[tuple[float, ...]]:
|
||||
def parse_value(cls, itype: str, value: str | None) -> tuple[float, ...] | None:
|
||||
"""Parse the input value."""
|
||||
|
||||
parsed = None # type: Optional[tuple[float, ...]]
|
||||
parsed = None # type: tuple[float, ...] | None
|
||||
if value is None:
|
||||
return value
|
||||
if itype == "date":
|
||||
|
@ -486,7 +486,7 @@ class CSSMatch(_DocumentNav):
|
|||
self,
|
||||
selectors: ct.SelectorList,
|
||||
scope: bs4.Tag,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
namespaces: ct.Namespaces | None,
|
||||
flags: int
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
@ -545,19 +545,19 @@ class CSSMatch(_DocumentNav):
|
|||
|
||||
return self.get_tag_ns(el) == NS_XHTML
|
||||
|
||||
def get_tag(self, el: bs4.Tag) -> Optional[str]:
|
||||
def get_tag(self, el: bs4.Tag) -> str | None:
|
||||
"""Get tag."""
|
||||
|
||||
name = self.get_tag_name(el)
|
||||
return util.lower(name) if name is not None and not self.is_xml else name
|
||||
|
||||
def get_prefix(self, el: bs4.Tag) -> Optional[str]:
|
||||
def get_prefix(self, el: bs4.Tag) -> str | None:
|
||||
"""Get prefix."""
|
||||
|
||||
prefix = self.get_prefix_name(el)
|
||||
return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
|
||||
|
||||
def find_bidi(self, el: bs4.Tag) -> Optional[int]:
|
||||
def find_bidi(self, el: bs4.Tag) -> int | None:
|
||||
"""Get directionality from element text."""
|
||||
|
||||
for node in self.get_children(el, tags=False):
|
||||
|
@ -653,8 +653,8 @@ class CSSMatch(_DocumentNav):
|
|||
self,
|
||||
el: bs4.Tag,
|
||||
attr: str,
|
||||
prefix: Optional[str]
|
||||
) -> Optional[str | Sequence[str]]:
|
||||
prefix: str | None
|
||||
) -> str | Sequence[str] | None:
|
||||
"""Match attribute name and return value if it exists."""
|
||||
|
||||
value = None
|
||||
|
@ -751,7 +751,7 @@ class CSSMatch(_DocumentNav):
|
|||
name not in (self.get_tag(el), '*')
|
||||
)
|
||||
|
||||
def match_tag(self, el: bs4.Tag, tag: Optional[ct.SelectorTag]) -> bool:
|
||||
def match_tag(self, el: bs4.Tag, tag: ct.SelectorTag | None) -> bool:
|
||||
"""Match the tag."""
|
||||
|
||||
match = True
|
||||
|
@ -1030,7 +1030,7 @@ class CSSMatch(_DocumentNav):
|
|||
"""Match element if it contains text."""
|
||||
|
||||
match = True
|
||||
content = None # type: Optional[str | Sequence[str]]
|
||||
content = None # type: str | Sequence[str] | None
|
||||
for contain_list in contains:
|
||||
if content is None:
|
||||
if contain_list.own:
|
||||
|
@ -1099,7 +1099,7 @@ class CSSMatch(_DocumentNav):
|
|||
match = False
|
||||
name = cast(str, self.get_attribute_by_name(el, 'name'))
|
||||
|
||||
def get_parent_form(el: bs4.Tag) -> Optional[bs4.Tag]:
|
||||
def get_parent_form(el: bs4.Tag) -> bs4.Tag | None:
|
||||
"""Find this input's form."""
|
||||
form = None
|
||||
parent = self.get_parent(el, no_iframe=True)
|
||||
|
@ -1478,7 +1478,7 @@ class CSSMatch(_DocumentNav):
|
|||
if lim < 1:
|
||||
break
|
||||
|
||||
def closest(self) -> Optional[bs4.Tag]:
|
||||
def closest(self) -> bs4.Tag | None:
|
||||
"""Match closest ancestor."""
|
||||
|
||||
current = self.tag
|
||||
|
@ -1506,7 +1506,7 @@ class SoupSieve(ct.Immutable):
|
|||
|
||||
pattern: str
|
||||
selectors: ct.SelectorList
|
||||
namespaces: Optional[ct.Namespaces]
|
||||
namespaces: ct.Namespaces | None
|
||||
custom: dict[str, str]
|
||||
flags: int
|
||||
|
||||
|
@ -1516,8 +1516,8 @@ class SoupSieve(ct.Immutable):
|
|||
self,
|
||||
pattern: str,
|
||||
selectors: ct.SelectorList,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
custom: Optional[ct.CustomSelectors],
|
||||
namespaces: ct.Namespaces | None,
|
||||
custom: ct.CustomSelectors | None,
|
||||
flags: int
|
||||
):
|
||||
"""Initialize."""
|
||||
|
|
|
@ -7,7 +7,7 @@ from . import css_match as cm
|
|||
from . import css_types as ct
|
||||
from .util import SelectorSyntaxError
|
||||
import warnings
|
||||
from typing import Optional, Match, Any, Iterator, cast
|
||||
from typing import Match, Any, Iterator, cast
|
||||
|
||||
UNICODE_REPLACEMENT_CHAR = 0xFFFD
|
||||
|
||||
|
@ -113,7 +113,7 @@ VALUE = r'''
|
|||
'''.format(nl=NEWLINE, ident=IDENTIFIER)
|
||||
# Attribute value comparison. `!=` is handled special as it is non-standard.
|
||||
ATTR = r'''
|
||||
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}+(?P<case>[is]))?)?{ws}*\]
|
||||
(?:{ws}*(?P<cmp>[!~^|*$]?=){ws}*(?P<value>{value})(?:{ws}*(?P<case>[is]))?)?{ws}*\]
|
||||
'''.format(ws=WSC, value=VALUE)
|
||||
|
||||
# Selector patterns
|
||||
|
@ -207,8 +207,8 @@ _MAXCACHE = 500
|
|||
@lru_cache(maxsize=_MAXCACHE)
|
||||
def _cached_css_compile(
|
||||
pattern: str,
|
||||
namespaces: Optional[ct.Namespaces],
|
||||
custom: Optional[ct.CustomSelectors],
|
||||
namespaces: ct.Namespaces | None,
|
||||
custom: ct.CustomSelectors | None,
|
||||
flags: int
|
||||
) -> cm.SoupSieve:
|
||||
"""Cached CSS compile."""
|
||||
|
@ -233,7 +233,7 @@ def _purge_cache() -> None:
|
|||
_cached_css_compile.cache_clear()
|
||||
|
||||
|
||||
def process_custom(custom: Optional[ct.CustomSelectors]) -> dict[str, str | ct.SelectorList]:
|
||||
def process_custom(custom: ct.CustomSelectors | None) -> dict[str, str | ct.SelectorList]:
|
||||
"""Process custom."""
|
||||
|
||||
custom_selectors = {}
|
||||
|
@ -317,7 +317,7 @@ class SelectorPattern:
|
|||
|
||||
return self.name
|
||||
|
||||
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
|
||||
"""Match the selector."""
|
||||
|
||||
return self.re_pattern.match(selector, index)
|
||||
|
@ -336,7 +336,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
|||
for pseudo in p[1]:
|
||||
self.patterns[pseudo] = pattern
|
||||
|
||||
self.matched_name = None # type: Optional[SelectorPattern]
|
||||
self.matched_name = None # type: SelectorPattern | None
|
||||
self.re_pseudo_name = re.compile(PAT_PSEUDO_CLASS_SPECIAL, re.I | re.X | re.U)
|
||||
|
||||
def get_name(self) -> str:
|
||||
|
@ -344,7 +344,7 @@ class SpecialPseudoPattern(SelectorPattern):
|
|||
|
||||
return '' if self.matched_name is None else self.matched_name.get_name()
|
||||
|
||||
def match(self, selector: str, index: int, flags: int) -> Optional[Match[str]]:
|
||||
def match(self, selector: str, index: int, flags: int) -> Match[str] | None:
|
||||
"""Match the selector."""
|
||||
|
||||
pseudo = None
|
||||
|
@ -372,14 +372,14 @@ class _Selector:
|
|||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.tag = kwargs.get('tag', None) # type: Optional[ct.SelectorTag]
|
||||
self.tag = kwargs.get('tag', None) # type: ct.SelectorTag | None
|
||||
self.ids = kwargs.get('ids', []) # type: list[str]
|
||||
self.classes = kwargs.get('classes', []) # type: list[str]
|
||||
self.attributes = kwargs.get('attributes', []) # type: list[ct.SelectorAttribute]
|
||||
self.nth = kwargs.get('nth', []) # type: list[ct.SelectorNth]
|
||||
self.selectors = kwargs.get('selectors', []) # type: list[ct.SelectorList]
|
||||
self.relations = kwargs.get('relations', []) # type: list[_Selector]
|
||||
self.rel_type = kwargs.get('rel_type', None) # type: Optional[str]
|
||||
self.rel_type = kwargs.get('rel_type', None) # type: str | None
|
||||
self.contains = kwargs.get('contains', []) # type: list[ct.SelectorContains]
|
||||
self.lang = kwargs.get('lang', []) # type: list[ct.SelectorLang]
|
||||
self.flags = kwargs.get('flags', 0) # type: int
|
||||
|
@ -462,7 +462,7 @@ class CSSParser:
|
|||
def __init__(
|
||||
self,
|
||||
selector: str,
|
||||
custom: Optional[dict[str, str | ct.SelectorList]] = None,
|
||||
custom: dict[str, str | ct.SelectorList] | None = None,
|
||||
flags: int = 0
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import annotations
|
||||
import copyreg
|
||||
from .pretty import pretty
|
||||
from typing import Any, Iterator, Hashable, Optional, Pattern, Iterable, Mapping
|
||||
from typing import Any, Iterator, Hashable, Pattern, Iterable, Mapping
|
||||
|
||||
__all__ = (
|
||||
'Selector',
|
||||
|
@ -189,28 +189,28 @@ class Selector(Immutable):
|
|||
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
|
||||
)
|
||||
|
||||
tag: Optional[SelectorTag]
|
||||
tag: SelectorTag | None
|
||||
ids: tuple[str, ...]
|
||||
classes: tuple[str, ...]
|
||||
attributes: tuple[SelectorAttribute, ...]
|
||||
nth: tuple[SelectorNth, ...]
|
||||
selectors: tuple[SelectorList, ...]
|
||||
relation: SelectorList
|
||||
rel_type: Optional[str]
|
||||
rel_type: str | None
|
||||
contains: tuple[SelectorContains, ...]
|
||||
lang: tuple[SelectorLang, ...]
|
||||
flags: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tag: Optional[SelectorTag],
|
||||
tag: SelectorTag | None,
|
||||
ids: tuple[str, ...],
|
||||
classes: tuple[str, ...],
|
||||
attributes: tuple[SelectorAttribute, ...],
|
||||
nth: tuple[SelectorNth, ...],
|
||||
selectors: tuple[SelectorList, ...],
|
||||
relation: SelectorList,
|
||||
rel_type: Optional[str],
|
||||
rel_type: str | None,
|
||||
contains: tuple[SelectorContains, ...],
|
||||
lang: tuple[SelectorLang, ...],
|
||||
flags: int
|
||||
|
@ -247,9 +247,9 @@ class SelectorTag(Immutable):
|
|||
__slots__ = ("name", "prefix", "_hash")
|
||||
|
||||
name: str
|
||||
prefix: Optional[str]
|
||||
prefix: str | None
|
||||
|
||||
def __init__(self, name: str, prefix: Optional[str]) -> None:
|
||||
def __init__(self, name: str, prefix: str | None) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
super().__init__(name=name, prefix=prefix)
|
||||
|
@ -262,15 +262,15 @@ class SelectorAttribute(Immutable):
|
|||
|
||||
attribute: str
|
||||
prefix: str
|
||||
pattern: Optional[Pattern[str]]
|
||||
xml_type_pattern: Optional[Pattern[str]]
|
||||
pattern: Pattern[str] | None
|
||||
xml_type_pattern: Pattern[str] | None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
attribute: str,
|
||||
prefix: str,
|
||||
pattern: Optional[Pattern[str]],
|
||||
xml_type_pattern: Optional[Pattern[str]]
|
||||
pattern: Pattern[str] | None,
|
||||
xml_type_pattern: Pattern[str] | None
|
||||
) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
|
@ -360,7 +360,7 @@ class SelectorList(Immutable):
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
selectors: Optional[Iterable[Selector | SelectorNull]] = None,
|
||||
selectors: Iterable[Selector | SelectorNull] | None = None,
|
||||
is_not: bool = False,
|
||||
is_html: bool = False
|
||||
) -> None:
|
||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
|||
from functools import wraps, lru_cache
|
||||
import warnings
|
||||
import re
|
||||
from typing import Callable, Any, Optional
|
||||
from typing import Callable, Any
|
||||
|
||||
DEBUG = 0x00001
|
||||
|
||||
|
@ -27,7 +27,7 @@ def lower(string: str) -> str:
|
|||
class SelectorSyntaxError(Exception):
|
||||
"""Syntax error in a CSS selector."""
|
||||
|
||||
def __init__(self, msg: str, pattern: Optional[str] = None, index: Optional[int] = None) -> None:
|
||||
def __init__(self, msg: str, pattern: str | None = None, index: int | None = None) -> None:
|
||||
"""Initialize."""
|
||||
|
||||
self.line = None
|
||||
|
@ -84,7 +84,7 @@ def get_pattern_context(pattern: str, index: int) -> tuple[str, int, int]:
|
|||
col = 1
|
||||
text = [] # type: list[str]
|
||||
line = 1
|
||||
offset = None # type: Optional[int]
|
||||
offset = None # type: int | None
|
||||
|
||||
# Split pattern by newline and handle the text before the newline
|
||||
for m in RE_PATTERN_LINE_SPLIT.finditer(pattern):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# IANA versions like 2020a are not valid PEP 440 identifiers; the recommended
|
||||
# way to translate the version is to use YYYY.n where `n` is a 0-based index.
|
||||
__version__ = "2022.7"
|
||||
__version__ = "2023.3"
|
||||
|
||||
# This exposes the original IANA version number.
|
||||
IANA_VERSION = "2022g"
|
||||
IANA_VERSION = "2023c"
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -238,7 +238,7 @@ SY Syria
|
|||
SZ Eswatini (Swaziland)
|
||||
TC Turks & Caicos Is
|
||||
TD Chad
|
||||
TF French Southern Territories
|
||||
TF French S. Terr.
|
||||
TG Togo
|
||||
TH Thailand
|
||||
TJ Tajikistan
|
||||
|
|
|
@ -72,11 +72,11 @@ Leap 2016 Dec 31 23:59:60 + S
|
|||
# Any additional leap seconds will come after this.
|
||||
# This Expires line is commented out for now,
|
||||
# so that pre-2020a zic implementations do not reject this file.
|
||||
#Expires 2023 Jun 28 00:00:00
|
||||
#Expires 2023 Dec 28 00:00:00
|
||||
|
||||
# POSIX timestamps for the data in this file:
|
||||
#updated 1467936000 (2016-07-08 00:00:00 UTC)
|
||||
#expires 1687910400 (2023-06-28 00:00:00 UTC)
|
||||
#expires 1703721600 (2023-12-28 00:00:00 UTC)
|
||||
|
||||
# Updated through IERS Bulletin C64
|
||||
# File expires on: 28 June 2023
|
||||
# Updated through IERS Bulletin C65
|
||||
# File expires on: 28 December 2023
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# version 2022g
|
||||
# version 2023c
|
||||
# This zic input file is in the public domain.
|
||||
R d 1916 o - Jun 14 23s 1 S
|
||||
R d 1916 1919 - O Su>=1 23s 0 -
|
||||
|
@ -75,6 +75,8 @@ R K 2014 o - May 15 24 1 S
|
|||
R K 2014 o - Jun 26 24 0 -
|
||||
R K 2014 o - Jul 31 24 1 S
|
||||
R K 2014 o - S lastTh 24 0 -
|
||||
R K 2023 ma - Ap lastF 0 1 S
|
||||
R K 2023 ma - O lastTh 24 0 -
|
||||
Z Africa/Cairo 2:5:9 - LMT 1900 O
|
||||
2 K EE%sT
|
||||
Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u
|
||||
|
@ -172,7 +174,7 @@ R M 2021 o - May 16 2 0 -
|
|||
R M 2022 o - Mar 27 3 -1 -
|
||||
R M 2022 o - May 8 2 0 -
|
||||
R M 2023 o - Mar 19 3 -1 -
|
||||
R M 2023 o - Ap 30 2 0 -
|
||||
R M 2023 o - Ap 23 2 0 -
|
||||
R M 2024 o - Mar 10 3 -1 -
|
||||
R M 2024 o - Ap 14 2 0 -
|
||||
R M 2025 o - F 23 3 -1 -
|
||||
|
@ -188,7 +190,7 @@ R M 2029 o - F 18 2 0 -
|
|||
R M 2029 o - D 30 3 -1 -
|
||||
R M 2030 o - F 10 2 0 -
|
||||
R M 2030 o - D 22 3 -1 -
|
||||
R M 2031 o - F 2 2 0 -
|
||||
R M 2031 o - Ja 26 2 0 -
|
||||
R M 2031 o - D 14 3 -1 -
|
||||
R M 2032 o - Ja 18 2 0 -
|
||||
R M 2032 o - N 28 3 -1 -
|
||||
|
@ -204,7 +206,7 @@ R M 2036 o - N 23 2 0 -
|
|||
R M 2037 o - O 4 3 -1 -
|
||||
R M 2037 o - N 15 2 0 -
|
||||
R M 2038 o - S 26 3 -1 -
|
||||
R M 2038 o - N 7 2 0 -
|
||||
R M 2038 o - O 31 2 0 -
|
||||
R M 2039 o - S 18 3 -1 -
|
||||
R M 2039 o - O 23 2 0 -
|
||||
R M 2040 o - S 2 3 -1 -
|
||||
|
@ -220,7 +222,7 @@ R M 2044 o - Au 28 2 0 -
|
|||
R M 2045 o - Jul 9 3 -1 -
|
||||
R M 2045 o - Au 20 2 0 -
|
||||
R M 2046 o - Jul 1 3 -1 -
|
||||
R M 2046 o - Au 12 2 0 -
|
||||
R M 2046 o - Au 5 2 0 -
|
||||
R M 2047 o - Jun 23 3 -1 -
|
||||
R M 2047 o - Jul 28 2 0 -
|
||||
R M 2048 o - Jun 7 3 -1 -
|
||||
|
@ -236,7 +238,7 @@ R M 2052 o - Jun 2 2 0 -
|
|||
R M 2053 o - Ap 13 3 -1 -
|
||||
R M 2053 o - May 25 2 0 -
|
||||
R M 2054 o - Ap 5 3 -1 -
|
||||
R M 2054 o - May 17 2 0 -
|
||||
R M 2054 o - May 10 2 0 -
|
||||
R M 2055 o - Mar 28 3 -1 -
|
||||
R M 2055 o - May 2 2 0 -
|
||||
R M 2056 o - Mar 12 3 -1 -
|
||||
|
@ -252,7 +254,7 @@ R M 2060 o - Mar 7 2 0 -
|
|||
R M 2061 o - Ja 16 3 -1 -
|
||||
R M 2061 o - F 27 2 0 -
|
||||
R M 2062 o - Ja 8 3 -1 -
|
||||
R M 2062 o - F 19 2 0 -
|
||||
R M 2062 o - F 12 2 0 -
|
||||
R M 2062 o - D 31 3 -1 -
|
||||
R M 2063 o - F 4 2 0 -
|
||||
R M 2063 o - D 16 3 -1 -
|
||||
|
@ -268,7 +270,7 @@ R M 2067 o - D 11 2 0 -
|
|||
R M 2068 o - O 21 3 -1 -
|
||||
R M 2068 o - D 2 2 0 -
|
||||
R M 2069 o - O 13 3 -1 -
|
||||
R M 2069 o - N 24 2 0 -
|
||||
R M 2069 o - N 17 2 0 -
|
||||
R M 2070 o - O 5 3 -1 -
|
||||
R M 2070 o - N 9 2 0 -
|
||||
R M 2071 o - S 20 3 -1 -
|
||||
|
@ -284,7 +286,7 @@ R M 2075 o - S 15 2 0 -
|
|||
R M 2076 o - Jul 26 3 -1 -
|
||||
R M 2076 o - S 6 2 0 -
|
||||
R M 2077 o - Jul 18 3 -1 -
|
||||
R M 2077 o - Au 29 2 0 -
|
||||
R M 2077 o - Au 22 2 0 -
|
||||
R M 2078 o - Jul 10 3 -1 -
|
||||
R M 2078 o - Au 14 2 0 -
|
||||
R M 2079 o - Jun 25 3 -1 -
|
||||
|
@ -294,13 +296,13 @@ R M 2080 o - Jul 21 2 0 -
|
|||
R M 2081 o - Jun 1 3 -1 -
|
||||
R M 2081 o - Jul 13 2 0 -
|
||||
R M 2082 o - May 24 3 -1 -
|
||||
R M 2082 o - Jul 5 2 0 -
|
||||
R M 2082 o - Jun 28 2 0 -
|
||||
R M 2083 o - May 16 3 -1 -
|
||||
R M 2083 o - Jun 20 2 0 -
|
||||
R M 2084 o - Ap 30 3 -1 -
|
||||
R M 2084 o - Jun 11 2 0 -
|
||||
R M 2085 o - Ap 22 3 -1 -
|
||||
R M 2085 o - Jun 3 2 0 -
|
||||
R M 2085 o - May 27 2 0 -
|
||||
R M 2086 o - Ap 14 3 -1 -
|
||||
R M 2086 o - May 19 2 0 -
|
||||
R M 2087 o - Mar 30 3 -1 -
|
||||
|
@ -997,8 +999,86 @@ R P 2020 2021 - Mar Sa<=30 0 1 S
|
|||
R P 2020 o - O 24 1 0 -
|
||||
R P 2021 o - O 29 1 0 -
|
||||
R P 2022 o - Mar 27 0 1 S
|
||||
R P 2022 ma - O Sa<=30 2 0 -
|
||||
R P 2023 ma - Mar Sa<=30 2 1 S
|
||||
R P 2022 2035 - O Sa<=30 2 0 -
|
||||
R P 2023 o - Ap 29 2 1 S
|
||||
R P 2024 o - Ap 13 2 1 S
|
||||
R P 2025 o - Ap 5 2 1 S
|
||||
R P 2026 2054 - Mar Sa<=30 2 1 S
|
||||
R P 2036 o - O 18 2 0 -
|
||||
R P 2037 o - O 10 2 0 -
|
||||
R P 2038 o - S 25 2 0 -
|
||||
R P 2039 o - S 17 2 0 -
|
||||
R P 2039 o - O 22 2 1 S
|
||||
R P 2039 2067 - O Sa<=30 2 0 -
|
||||
R P 2040 o - S 1 2 0 -
|
||||
R P 2040 o - O 13 2 1 S
|
||||
R P 2041 o - Au 24 2 0 -
|
||||
R P 2041 o - S 28 2 1 S
|
||||
R P 2042 o - Au 16 2 0 -
|
||||
R P 2042 o - S 20 2 1 S
|
||||
R P 2043 o - Au 1 2 0 -
|
||||
R P 2043 o - S 12 2 1 S
|
||||
R P 2044 o - Jul 23 2 0 -
|
||||
R P 2044 o - Au 27 2 1 S
|
||||
R P 2045 o - Jul 15 2 0 -
|
||||
R P 2045 o - Au 19 2 1 S
|
||||
R P 2046 o - Jun 30 2 0 -
|
||||
R P 2046 o - Au 11 2 1 S
|
||||
R P 2047 o - Jun 22 2 0 -
|
||||
R P 2047 o - Jul 27 2 1 S
|
||||
R P 2048 o - Jun 6 2 0 -
|
||||
R P 2048 o - Jul 18 2 1 S
|
||||
R P 2049 o - May 29 2 0 -
|
||||
R P 2049 o - Jul 3 2 1 S
|
||||
R P 2050 o - May 21 2 0 -
|
||||
R P 2050 o - Jun 25 2 1 S
|
||||
R P 2051 o - May 6 2 0 -
|
||||
R P 2051 o - Jun 17 2 1 S
|
||||
R P 2052 o - Ap 27 2 0 -
|
||||
R P 2052 o - Jun 1 2 1 S
|
||||
R P 2053 o - Ap 12 2 0 -
|
||||
R P 2053 o - May 24 2 1 S
|
||||
R P 2054 o - Ap 4 2 0 -
|
||||
R P 2054 o - May 16 2 1 S
|
||||
R P 2055 o - May 1 2 1 S
|
||||
R P 2056 o - Ap 22 2 1 S
|
||||
R P 2057 o - Ap 7 2 1 S
|
||||
R P 2058 ma - Mar Sa<=30 2 1 S
|
||||
R P 2068 o - O 20 2 0 -
|
||||
R P 2069 o - O 12 2 0 -
|
||||
R P 2070 o - O 4 2 0 -
|
||||
R P 2071 o - S 19 2 0 -
|
||||
R P 2072 o - S 10 2 0 -
|
||||
R P 2072 o - O 15 2 1 S
|
||||
R P 2073 o - S 2 2 0 -
|
||||
R P 2073 o - O 7 2 1 S
|
||||
R P 2074 o - Au 18 2 0 -
|
||||
R P 2074 o - S 29 2 1 S
|
||||
R P 2075 o - Au 10 2 0 -
|
||||
R P 2075 o - S 14 2 1 S
|
||||
R P 2075 ma - O Sa<=30 2 0 -
|
||||
R P 2076 o - Jul 25 2 0 -
|
||||
R P 2076 o - S 5 2 1 S
|
||||
R P 2077 o - Jul 17 2 0 -
|
||||
R P 2077 o - Au 28 2 1 S
|
||||
R P 2078 o - Jul 9 2 0 -
|
||||
R P 2078 o - Au 13 2 1 S
|
||||
R P 2079 o - Jun 24 2 0 -
|
||||
R P 2079 o - Au 5 2 1 S
|
||||
R P 2080 o - Jun 15 2 0 -
|
||||
R P 2080 o - Jul 20 2 1 S
|
||||
R P 2081 o - Jun 7 2 0 -
|
||||
R P 2081 o - Jul 12 2 1 S
|
||||
R P 2082 o - May 23 2 0 -
|
||||
R P 2082 o - Jul 4 2 1 S
|
||||
R P 2083 o - May 15 2 0 -
|
||||
R P 2083 o - Jun 19 2 1 S
|
||||
R P 2084 o - Ap 29 2 0 -
|
||||
R P 2084 o - Jun 10 2 1 S
|
||||
R P 2085 o - Ap 21 2 0 -
|
||||
R P 2085 o - Jun 2 2 1 S
|
||||
R P 2086 o - Ap 13 2 0 -
|
||||
R P 2086 o - May 18 2 1 S
|
||||
Z Asia/Gaza 2:17:52 - LMT 1900 O
|
||||
2 Z EET/EEST 1948 May 15
|
||||
2 K EE%sT 1967 Jun 5
|
||||
|
@ -1754,8 +1834,8 @@ Z America/Scoresbysund -1:27:52 - LMT 1916 Jul 28
|
|||
-1 E -01/+00
|
||||
Z America/Nuuk -3:26:56 - LMT 1916 Jul 28
|
||||
-3 - -03 1980 Ap 6 2
|
||||
-3 E -03/-02 2023 Mar 25 22
|
||||
-2 - -02
|
||||
-3 E -03/-02 2023 O 29 1u
|
||||
-2 E -02/-01
|
||||
Z America/Thule -4:35:8 - LMT 1916 Jul 28
|
||||
-4 Th A%sT
|
||||
Z Europe/Tallinn 1:39 - LMT 1880
|
||||
|
@ -2175,13 +2255,13 @@ Z Europe/Volgograd 2:57:40 - LMT 1920 Ja 3
|
|||
3 - +03 1930 Jun 21
|
||||
4 - +04 1961 N 11
|
||||
4 R +04/+05 1988 Mar 27 2s
|
||||
3 R +03/+04 1991 Mar 31 2s
|
||||
3 R MSK/MSD 1991 Mar 31 2s
|
||||
4 - +04 1992 Mar 29 2s
|
||||
3 R +03/+04 2011 Mar 27 2s
|
||||
4 - +04 2014 O 26 2s
|
||||
3 - +03 2018 O 28 2s
|
||||
3 R MSK/MSD 2011 Mar 27 2s
|
||||
4 - MSK 2014 O 26 2s
|
||||
3 - MSK 2018 O 28 2s
|
||||
4 - +04 2020 D 27 2s
|
||||
3 - +03
|
||||
3 - MSK
|
||||
Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 R +04/+05 1988 Mar 27 2s
|
||||
|
@ -2194,11 +2274,11 @@ Z Europe/Saratov 3:4:18 - LMT 1919 Jul 1 0u
|
|||
Z Europe/Kirov 3:18:48 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 R +04/+05 1989 Mar 26 2s
|
||||
3 R +03/+04 1991 Mar 31 2s
|
||||
3 R MSK/MSD 1991 Mar 31 2s
|
||||
4 - +04 1992 Mar 29 2s
|
||||
3 R +03/+04 2011 Mar 27 2s
|
||||
4 - +04 2014 O 26 2s
|
||||
3 - +03
|
||||
3 R MSK/MSD 2011 Mar 27 2s
|
||||
4 - MSK 2014 O 26 2s
|
||||
3 - MSK
|
||||
Z Europe/Samara 3:20:20 - LMT 1919 Jul 1 0u
|
||||
3 - +03 1930 Jun 21
|
||||
4 - +04 1935 Ja 27
|
||||
|
@ -3070,9 +3150,6 @@ Z America/Cambridge_Bay 0 - -00 1920
|
|||
-5 - EST 2000 N 5
|
||||
-6 - CST 2001 Ap 1 3
|
||||
-7 C M%sT
|
||||
Z America/Yellowknife 0 - -00 1935
|
||||
-7 Y M%sT 1980
|
||||
-7 C M%sT
|
||||
Z America/Inuvik 0 - -00 1953
|
||||
-8 Y P%sT 1979 Ap lastSu 2
|
||||
-7 Y M%sT 1980
|
||||
|
@ -4171,6 +4248,7 @@ L America/Argentina/Cordoba America/Rosario
|
|||
L America/Tijuana America/Santa_Isabel
|
||||
L America/Denver America/Shiprock
|
||||
L America/Toronto America/Thunder_Bay
|
||||
L America/Edmonton America/Yellowknife
|
||||
L Pacific/Auckland Antarctica/South_Pole
|
||||
L Asia/Shanghai Asia/Chongqing
|
||||
L Asia/Shanghai Asia/Harbin
|
||||
|
|
|
@ -121,9 +121,8 @@ CA +744144-0944945 America/Resolute Central - NU (Resolute)
|
|||
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
|
||||
CA +5024-10439 America/Regina CST - SK (most areas)
|
||||
CA +5017-10750 America/Swift_Current CST - SK (midwest)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
|
||||
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
|
||||
CA +6227-11421 America/Yellowknife Mountain - NT (central)
|
||||
CA +682059-1334300 America/Inuvik Mountain - NT (west)
|
||||
CA +4906-11631 America/Creston MST - BC (Creston)
|
||||
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
|
||||
|
@ -139,7 +138,7 @@ CG -0416+01517 Africa/Brazzaville
|
|||
CH +4723+00832 Europe/Zurich
|
||||
CI +0519-00402 Africa/Abidjan
|
||||
CK -2114-15946 Pacific/Rarotonga
|
||||
CL -3327-07040 America/Santiago Chile (most areas)
|
||||
CL -3327-07040 America/Santiago most of Chile
|
||||
CL -5309-07055 America/Punta_Arenas Region of Magallanes
|
||||
CL -2709-10926 Pacific/Easter Easter Island
|
||||
CM +0403+00942 Africa/Douala
|
||||
|
@ -151,10 +150,10 @@ CU +2308-08222 America/Havana
|
|||
CV +1455-02331 Atlantic/Cape_Verde
|
||||
CW +1211-06900 America/Curacao
|
||||
CX -1025+10543 Indian/Christmas
|
||||
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
|
||||
CY +3510+03322 Asia/Nicosia most of Cyprus
|
||||
CY +3507+03357 Asia/Famagusta Northern Cyprus
|
||||
CZ +5005+01426 Europe/Prague
|
||||
DE +5230+01322 Europe/Berlin Germany (most areas)
|
||||
DE +5230+01322 Europe/Berlin most of Germany
|
||||
DE +4742+00841 Europe/Busingen Busingen
|
||||
DJ +1136+04309 Africa/Djibouti
|
||||
DK +5540+01235 Europe/Copenhagen
|
||||
|
@ -187,7 +186,7 @@ GF +0456-05220 America/Cayenne
|
|||
GG +492717-0023210 Europe/Guernsey
|
||||
GH +0533-00013 Africa/Accra
|
||||
GI +3608-00521 Europe/Gibraltar
|
||||
GL +6411-05144 America/Nuuk Greenland (most areas)
|
||||
GL +6411-05144 America/Nuuk most of Greenland
|
||||
GL +7646-01840 America/Danmarkshavn National Park (east coast)
|
||||
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
|
||||
GL +7634-06847 America/Thule Thule/Pituffik
|
||||
|
@ -235,7 +234,7 @@ KP +3901+12545 Asia/Pyongyang
|
|||
KR +3733+12658 Asia/Seoul
|
||||
KW +2920+04759 Asia/Kuwait
|
||||
KY +1918-08123 America/Cayman
|
||||
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
|
||||
KZ +4315+07657 Asia/Almaty most of Kazakhstan
|
||||
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
|
||||
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
|
||||
KZ +5017+05710 Asia/Aqtobe Aqtobe/Aktobe
|
||||
|
@ -259,12 +258,12 @@ MD +4700+02850 Europe/Chisinau
|
|||
ME +4226+01916 Europe/Podgorica
|
||||
MF +1804-06305 America/Marigot
|
||||
MG -1855+04731 Indian/Antananarivo
|
||||
MH +0709+17112 Pacific/Majuro Marshall Islands (most areas)
|
||||
MH +0709+17112 Pacific/Majuro most of Marshall Islands
|
||||
MH +0905+16720 Pacific/Kwajalein Kwajalein
|
||||
MK +4159+02126 Europe/Skopje
|
||||
ML +1239-00800 Africa/Bamako
|
||||
MM +1647+09610 Asia/Yangon
|
||||
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
|
||||
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
|
||||
MN +4801+09139 Asia/Hovd Bayan-Olgiy, Govi-Altai, Hovd, Uvs, Zavkhan
|
||||
MN +4804+11430 Asia/Choibalsan Dornod, Sukhbaatar
|
||||
MO +221150+1133230 Asia/Macau
|
||||
|
@ -302,7 +301,7 @@ NO +5955+01045 Europe/Oslo
|
|||
NP +2743+08519 Asia/Kathmandu
|
||||
NR -0031+16655 Pacific/Nauru
|
||||
NU -1901-16955 Pacific/Niue
|
||||
NZ -3652+17446 Pacific/Auckland New Zealand (most areas)
|
||||
NZ -3652+17446 Pacific/Auckland most of New Zealand
|
||||
NZ -4357-17633 Pacific/Chatham Chatham Islands
|
||||
OM +2336+05835 Asia/Muscat
|
||||
PA +0858-07932 America/Panama
|
||||
|
@ -310,7 +309,7 @@ PE -1203-07703 America/Lima
|
|||
PF -1732-14934 Pacific/Tahiti Society Islands
|
||||
PF -0900-13930 Pacific/Marquesas Marquesas Islands
|
||||
PF -2308-13457 Pacific/Gambier Gambier Islands
|
||||
PG -0930+14710 Pacific/Port_Moresby Papua New Guinea (most areas)
|
||||
PG -0930+14710 Pacific/Port_Moresby most of Papua New Guinea
|
||||
PG -0613+15534 Pacific/Bougainville Bougainville
|
||||
PH +1435+12100 Asia/Manila
|
||||
PK +2452+06703 Asia/Karachi
|
||||
|
@ -356,7 +355,7 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
|
|||
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
|
||||
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
|
||||
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
|
||||
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
|
||||
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
|
||||
RW -0157+03004 Africa/Kigali
|
||||
|
@ -397,7 +396,7 @@ TT +1039-06131 America/Port_of_Spain
|
|||
TV -0831+17913 Pacific/Funafuti
|
||||
TW +2503+12130 Asia/Taipei
|
||||
TZ -0648+03917 Africa/Dar_es_Salaam
|
||||
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
|
||||
UA +5026+03031 Europe/Kyiv most of Ukraine
|
||||
UG +0019+03225 Africa/Kampala
|
||||
UM +2813-17722 Pacific/Midway Midway Islands
|
||||
UM +1917+16637 Pacific/Wake Wake Island
|
||||
|
@ -420,7 +419,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
|
|||
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
|
||||
US +394421-1045903 America/Denver Mountain (most areas)
|
||||
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
|
||||
US +332654-1120424 America/Phoenix MST - Arizona (except Navajo)
|
||||
US +332654-1120424 America/Phoenix MST - AZ (except Navajo)
|
||||
US +340308-1181434 America/Los_Angeles Pacific
|
||||
US +611305-1495401 America/Anchorage Alaska (most areas)
|
||||
US +581807-1342511 America/Juneau Alaska - Juneau area
|
||||
|
@ -428,7 +427,7 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
|
|||
US +550737-1313435 America/Metlakatla Alaska - Annette Island
|
||||
US +593249-1394338 America/Yakutat Alaska - Yakutat
|
||||
US +643004-1652423 America/Nome Alaska (west)
|
||||
US +515248-1763929 America/Adak Aleutian Islands
|
||||
US +515248-1763929 America/Adak Alaska - western Aleutians
|
||||
US +211825-1575130 Pacific/Honolulu Hawaii
|
||||
UY -345433-0561245 America/Montevideo
|
||||
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
|
||||
|
|
|
@ -18,7 +18,10 @@
|
|||
# Please see the theory.html file for how these names are chosen.
|
||||
# If multiple timezones overlap a country, each has a row in the
|
||||
# table, with each column 1 containing the country code.
|
||||
# 4. Comments; present if and only if a country has multiple timezones.
|
||||
# 4. Comments; present if and only if countries have multiple timezones,
|
||||
# and useful only for those countries. For example, the comments
|
||||
# for the row with countries CH,DE,LI and name Europe/Zurich
|
||||
# are useful only for DE, since CH and LI have no other timezones.
|
||||
#
|
||||
# If a timezone covers multiple countries, the most-populous city is used,
|
||||
# and that country is listed first in column 1; any other countries
|
||||
|
@ -34,7 +37,7 @@
|
|||
#country-
|
||||
#codes coordinates TZ comments
|
||||
AD +4230+00131 Europe/Andorra
|
||||
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai UAE, Oman, Réunion, Seychelles, Crozet, Scattered Is
|
||||
AE,OM,RE,SC,TF +2518+05518 Asia/Dubai Crozet, Scattered Is
|
||||
AF +3431+06912 Asia/Kabul
|
||||
AL +4120+01950 Europe/Tirane
|
||||
AM +4011+04430 Asia/Yerevan
|
||||
|
@ -45,7 +48,7 @@ AQ -6448-06406 Antarctica/Palmer Palmer
|
|||
AQ -6734-06808 Antarctica/Rothera Rothera
|
||||
AQ -720041+0023206 Antarctica/Troll Troll
|
||||
AR -3436-05827 America/Argentina/Buenos_Aires Buenos Aires (BA, CF)
|
||||
AR -3124-06411 America/Argentina/Cordoba Argentina (most areas: CB, CC, CN, ER, FM, MN, SE, SF)
|
||||
AR -3124-06411 America/Argentina/Cordoba most areas: CB, CC, CN, ER, FM, MN, SE, SF
|
||||
AR -2447-06525 America/Argentina/Salta Salta (SA, LP, NQ, RN)
|
||||
AR -2411-06518 America/Argentina/Jujuy Jujuy (JY)
|
||||
AR -2649-06513 America/Argentina/Tucuman Tucumán (TM)
|
||||
|
@ -56,7 +59,7 @@ AR -3253-06849 America/Argentina/Mendoza Mendoza (MZ)
|
|||
AR -3319-06621 America/Argentina/San_Luis San Luis (SL)
|
||||
AR -5138-06913 America/Argentina/Rio_Gallegos Santa Cruz (SC)
|
||||
AR -5448-06818 America/Argentina/Ushuaia Tierra del Fuego (TF)
|
||||
AS,UM -1416-17042 Pacific/Pago_Pago Samoa, Midway
|
||||
AS,UM -1416-17042 Pacific/Pago_Pago Midway
|
||||
AT +4813+01620 Europe/Vienna
|
||||
AU -3133+15905 Australia/Lord_Howe Lord Howe Island
|
||||
AU -5430+15857 Antarctica/Macquarie Macquarie Island
|
||||
|
@ -101,26 +104,25 @@ CA +4439-06336 America/Halifax Atlantic - NS (most areas); PE
|
|||
CA +4612-05957 America/Glace_Bay Atlantic - NS (Cape Breton)
|
||||
CA +4606-06447 America/Moncton Atlantic - New Brunswick
|
||||
CA +5320-06025 America/Goose_Bay Atlantic - Labrador (most areas)
|
||||
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas), Bahamas
|
||||
CA,BS +4339-07923 America/Toronto Eastern - ON, QC (most areas)
|
||||
CA +6344-06828 America/Iqaluit Eastern - NU (most areas)
|
||||
CA +4953-09709 America/Winnipeg Central - ON (west); Manitoba
|
||||
CA +744144-0944945 America/Resolute Central - NU (Resolute)
|
||||
CA +624900-0920459 America/Rankin_Inlet Central - NU (central)
|
||||
CA +5024-10439 America/Regina CST - SK (most areas)
|
||||
CA +5017-10750 America/Swift_Current CST - SK (midwest)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); SK (W)
|
||||
CA +5333-11328 America/Edmonton Mountain - AB; BC (E); NT (E); SK (W)
|
||||
CA +690650-1050310 America/Cambridge_Bay Mountain - NU (west)
|
||||
CA +6227-11421 America/Yellowknife Mountain - NT (central)
|
||||
CA +682059-1334300 America/Inuvik Mountain - NT (west)
|
||||
CA +5546-12014 America/Dawson_Creek MST - BC (Dawson Cr, Ft St John)
|
||||
CA +5848-12242 America/Fort_Nelson MST - BC (Ft Nelson)
|
||||
CA +6043-13503 America/Whitehorse MST - Yukon (east)
|
||||
CA +6404-13925 America/Dawson MST - Yukon (west)
|
||||
CA +4916-12307 America/Vancouver Pacific - BC (most areas)
|
||||
CH,DE,LI +4723+00832 Europe/Zurich Swiss time
|
||||
CH,DE,LI +4723+00832 Europe/Zurich Büsingen
|
||||
CI,BF,GH,GM,GN,IS,ML,MR,SH,SL,SN,TG +0519-00402 Africa/Abidjan
|
||||
CK -2114-15946 Pacific/Rarotonga
|
||||
CL -3327-07040 America/Santiago Chile (most areas)
|
||||
CL -3327-07040 America/Santiago most of Chile
|
||||
CL -5309-07055 America/Punta_Arenas Region of Magallanes
|
||||
CL -2709-10926 Pacific/Easter Easter Island
|
||||
CN +3114+12128 Asia/Shanghai Beijing Time
|
||||
|
@ -129,10 +131,10 @@ CO +0436-07405 America/Bogota
|
|||
CR +0956-08405 America/Costa_Rica
|
||||
CU +2308-08222 America/Havana
|
||||
CV +1455-02331 Atlantic/Cape_Verde
|
||||
CY +3510+03322 Asia/Nicosia Cyprus (most areas)
|
||||
CY +3510+03322 Asia/Nicosia most of Cyprus
|
||||
CY +3507+03357 Asia/Famagusta Northern Cyprus
|
||||
CZ,SK +5005+01426 Europe/Prague
|
||||
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin Germany (most areas), Scandinavia
|
||||
DE,DK,NO,SE,SJ +5230+01322 Europe/Berlin most of Germany
|
||||
DO +1828-06954 America/Santo_Domingo
|
||||
DZ +3647+00303 Africa/Algiers
|
||||
EC -0210-07950 America/Guayaquil Ecuador (mainland)
|
||||
|
@ -153,7 +155,7 @@ GB,GG,IM,JE +513030-0000731 Europe/London
|
|||
GE +4143+04449 Asia/Tbilisi
|
||||
GF +0456-05220 America/Cayenne
|
||||
GI +3608-00521 Europe/Gibraltar
|
||||
GL +6411-05144 America/Nuuk Greenland (most areas)
|
||||
GL +6411-05144 America/Nuuk most of Greenland
|
||||
GL +7646-01840 America/Danmarkshavn National Park (east coast)
|
||||
GL +7029-02158 America/Scoresbysund Scoresbysund/Ittoqqortoormiit
|
||||
GL +7634-06847 America/Thule Thule/Pituffik
|
||||
|
@ -183,12 +185,12 @@ JO +3157+03556 Asia/Amman
|
|||
JP +353916+1394441 Asia/Tokyo
|
||||
KE,DJ,ER,ET,KM,MG,SO,TZ,UG,YT -0117+03649 Africa/Nairobi
|
||||
KG +4254+07436 Asia/Bishkek
|
||||
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Tuvalu, Wallis & Futuna, Wake
|
||||
KI,MH,TV,UM,WF +0125+17300 Pacific/Tarawa Gilberts, Marshalls, Wake
|
||||
KI -0247-17143 Pacific/Kanton Phoenix Islands
|
||||
KI +0152-15720 Pacific/Kiritimati Line Islands
|
||||
KP +3901+12545 Asia/Pyongyang
|
||||
KR +3733+12658 Asia/Seoul
|
||||
KZ +4315+07657 Asia/Almaty Kazakhstan (most areas)
|
||||
KZ +4315+07657 Asia/Almaty most of Kazakhstan
|
||||
KZ +4448+06528 Asia/Qyzylorda Qyzylorda/Kyzylorda/Kzyl-Orda
|
||||
KZ +5312+06337 Asia/Qostanay Qostanay/Kostanay/Kustanay
|
||||
KZ +5017+05710 Asia/Aqtobe Aqtöbe/Aktobe
|
||||
|
@ -205,14 +207,14 @@ MA +3339-00735 Africa/Casablanca
|
|||
MD +4700+02850 Europe/Chisinau
|
||||
MH +0905+16720 Pacific/Kwajalein Kwajalein
|
||||
MM,CC +1647+09610 Asia/Yangon
|
||||
MN +4755+10653 Asia/Ulaanbaatar Mongolia (most areas)
|
||||
MN +4755+10653 Asia/Ulaanbaatar most of Mongolia
|
||||
MN +4801+09139 Asia/Hovd Bayan-Ölgii, Govi-Altai, Hovd, Uvs, Zavkhan
|
||||
MN +4804+11430 Asia/Choibalsan Dornod, Sükhbaatar
|
||||
MO +221150+1133230 Asia/Macau
|
||||
MQ +1436-06105 America/Martinique
|
||||
MT +3554+01431 Europe/Malta
|
||||
MU -2010+05730 Indian/Mauritius
|
||||
MV,TF +0410+07330 Indian/Maldives Maldives, Kerguelen, St Paul I, Amsterdam I
|
||||
MV,TF +0410+07330 Indian/Maldives Kerguelen, St Paul I, Amsterdam I
|
||||
MX +1924-09909 America/Mexico_City Central Mexico
|
||||
MX +2105-08646 America/Cancun Quintana Roo
|
||||
MX +2058-08937 America/Merida Campeche, Yucatán
|
||||
|
@ -225,7 +227,7 @@ MX +2313-10625 America/Mazatlan Baja California Sur, Nayarit (most areas), Sinal
|
|||
MX +2048-10515 America/Bahia_Banderas Bahía de Banderas
|
||||
MX +2904-11058 America/Hermosillo Sonora
|
||||
MX +3232-11701 America/Tijuana Baja California
|
||||
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak, Brunei
|
||||
MY,BN +0133+11020 Asia/Kuching Sabah, Sarawak
|
||||
MZ,BI,BW,CD,MW,RW,ZM,ZW -2558+03235 Africa/Maputo Central Africa Time
|
||||
NA -2234+01706 Africa/Windhoek
|
||||
NC -2216+16627 Pacific/Noumea
|
||||
|
@ -237,7 +239,7 @@ NR -0031+16655 Pacific/Nauru
|
|||
NU -1901-16955 Pacific/Niue
|
||||
NZ,AQ -3652+17446 Pacific/Auckland New Zealand time
|
||||
NZ -4357-17633 Pacific/Chatham Chatham Islands
|
||||
PA,CA,KY +0858-07932 America/Panama EST - Panama, Cayman, ON (Atikokan), NU (Coral H)
|
||||
PA,CA,KY +0858-07932 America/Panama EST - ON (Atikokan), NU (Coral H)
|
||||
PE -1203-07703 America/Lima
|
||||
PF -1732-14934 Pacific/Tahiti Society Islands
|
||||
PF -0900-13930 Pacific/Marquesas Marquesas Islands
|
||||
|
@ -285,13 +287,13 @@ RU +4310+13156 Asia/Vladivostok MSK+07 - Amur River
|
|||
RU +643337+1431336 Asia/Ust-Nera MSK+07 - Oymyakonsky
|
||||
RU +5934+15048 Asia/Magadan MSK+08 - Magadan
|
||||
RU +4658+14242 Asia/Sakhalin MSK+08 - Sakhalin Island
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); North Kuril Is
|
||||
RU +6728+15343 Asia/Srednekolymsk MSK+08 - Sakha (E); N Kuril Is
|
||||
RU +5301+15839 Asia/Kamchatka MSK+09 - Kamchatka
|
||||
RU +6445+17729 Asia/Anadyr MSK+09 - Bering Sea
|
||||
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Arabia, Syowa
|
||||
SB,FM -0932+16012 Pacific/Guadalcanal Solomons, Pohnpei
|
||||
SA,AQ,KW,YE +2438+04643 Asia/Riyadh Syowa
|
||||
SB,FM -0932+16012 Pacific/Guadalcanal Pohnpei
|
||||
SD +1536+03232 Africa/Khartoum
|
||||
SG,MY +0117+10351 Asia/Singapore Singapore, peninsular Malaysia
|
||||
SG,MY +0117+10351 Asia/Singapore peninsular Malaysia
|
||||
SR +0550-05510 America/Paramaribo
|
||||
SS +0451+03137 Africa/Juba
|
||||
ST +0020+00644 Africa/Sao_Tome
|
||||
|
@ -299,7 +301,7 @@ SV +1342-08912 America/El_Salvador
|
|||
SY +3330+03618 Asia/Damascus
|
||||
TC +2128-07108 America/Grand_Turk
|
||||
TD +1207+01503 Africa/Ndjamena
|
||||
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok Indochina (most areas)
|
||||
TH,CX,KH,LA,VN +1345+10031 Asia/Bangkok north Vietnam
|
||||
TJ +3835+06848 Asia/Dushanbe
|
||||
TK -0922-17114 Pacific/Fakaofo
|
||||
TL -0833+12535 Asia/Dili
|
||||
|
@ -308,7 +310,7 @@ TN +3648+01011 Africa/Tunis
|
|||
TO -210800-1751200 Pacific/Tongatapu
|
||||
TR +4101+02858 Europe/Istanbul
|
||||
TW +2503+12130 Asia/Taipei
|
||||
UA +5026+03031 Europe/Kyiv Ukraine (most areas)
|
||||
UA +5026+03031 Europe/Kyiv most of Ukraine
|
||||
US +404251-0740023 America/New_York Eastern (most areas)
|
||||
US +421953-0830245 America/Detroit Eastern - MI (most areas)
|
||||
US +381515-0854534 America/Kentucky/Louisville Eastern - KY (Louisville area)
|
||||
|
@ -328,7 +330,7 @@ US +465042-1012439 America/North_Dakota/New_Salem Central - ND (Morton rural)
|
|||
US +471551-1014640 America/North_Dakota/Beulah Central - ND (Mercer)
|
||||
US +394421-1045903 America/Denver Mountain (most areas)
|
||||
US +433649-1161209 America/Boise Mountain - ID (south); OR (east)
|
||||
US,CA +332654-1120424 America/Phoenix MST - Arizona (except Navajo), Creston BC
|
||||
US,CA +332654-1120424 America/Phoenix MST - AZ (most areas), Creston BC
|
||||
US +340308-1181434 America/Los_Angeles Pacific
|
||||
US +611305-1495401 America/Anchorage Alaska (most areas)
|
||||
US +581807-1342511 America/Juneau Alaska - Juneau area
|
||||
|
@ -336,13 +338,13 @@ US +571035-1351807 America/Sitka Alaska - Sitka area
|
|||
US +550737-1313435 America/Metlakatla Alaska - Annette Island
|
||||
US +593249-1394338 America/Yakutat Alaska - Yakutat
|
||||
US +643004-1652423 America/Nome Alaska (west)
|
||||
US +515248-1763929 America/Adak Aleutian Islands
|
||||
US,UM +211825-1575130 Pacific/Honolulu Hawaii
|
||||
US +515248-1763929 America/Adak Alaska - western Aleutians
|
||||
US +211825-1575130 Pacific/Honolulu Hawaii
|
||||
UY -345433-0561245 America/Montevideo
|
||||
UZ +3940+06648 Asia/Samarkand Uzbekistan (west)
|
||||
UZ +4120+06918 Asia/Tashkent Uzbekistan (east)
|
||||
VE +1030-06656 America/Caracas
|
||||
VN +1045+10640 Asia/Ho_Chi_Minh Vietnam (south)
|
||||
VN +1045+10640 Asia/Ho_Chi_Minh south Vietnam
|
||||
VU -1740+16825 Pacific/Efate
|
||||
WS -1350-17144 Pacific/Apia
|
||||
ZA,LS,SZ -2615+02800 Africa/Johannesburg
|
||||
|
|
|
@ -243,7 +243,6 @@ America/Iqaluit
|
|||
America/Resolute
|
||||
America/Rankin_Inlet
|
||||
America/Cambridge_Bay
|
||||
America/Yellowknife
|
||||
America/Inuvik
|
||||
America/Whitehorse
|
||||
America/Dawson
|
||||
|
@ -561,6 +560,7 @@ America/Rosario
|
|||
America/Santa_Isabel
|
||||
America/Shiprock
|
||||
America/Thunder_Bay
|
||||
America/Yellowknife
|
||||
Antarctica/South_Pole
|
||||
Asia/Chongqing
|
||||
Asia/Harbin
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
apscheduler==3.10.1
|
||||
importlib-metadata==6.0.0
|
||||
importlib-metadata==6.8.0
|
||||
importlib-resources==5.12.0
|
||||
pyinstaller==5.8.0
|
||||
pyopenssl==23.0.0
|
||||
pycryptodomex==3.17
|
||||
pyinstaller==5.13.0
|
||||
pyopenssl==23.2.0
|
||||
pycryptodomex==3.18.0
|
||||
|
||||
pyobjc-core==9.0.1; platform_system == "Darwin"
|
||||
pyobjc-framework-Cocoa==9.0.1; platform_system == "Darwin"
|
||||
pyobjc-core==9.2; platform_system == "Darwin"
|
||||
pyobjc-framework-Cocoa==9.2; platform_system == "Darwin"
|
||||
|
||||
pywin32==305; platform_system == "Windows"
|
||||
pywin32==306; platform_system == "Windows"
|
||||
|
|
|
@ -326,70 +326,7 @@ class ActivityProcessor(object):
|
|||
|
||||
# Get the last insert row id
|
||||
last_id = self.db.last_insert_id()
|
||||
new_session = prev_session = None
|
||||
watched = False
|
||||
|
||||
if session['live']:
|
||||
# Check if we should group the session, select the last guid from the user
|
||||
query = "SELECT session_history.id, session_history_metadata.guid, session_history.reference_id " \
|
||||
"FROM session_history " \
|
||||
"JOIN session_history_metadata ON session_history.id == session_history_metadata.id " \
|
||||
"WHERE session_history.user_id = ? ORDER BY session_history.id DESC LIMIT 1 "
|
||||
|
||||
args = [session['user_id']]
|
||||
|
||||
result = self.db.select(query=query, args=args)
|
||||
|
||||
if len(result) > 0:
|
||||
new_session = {'id': last_id,
|
||||
'guid': metadata['guid'],
|
||||
'reference_id': last_id}
|
||||
|
||||
prev_session = {'id': result[0]['id'],
|
||||
'guid': result[0]['guid'],
|
||||
'reference_id': result[0]['reference_id']}
|
||||
|
||||
else:
|
||||
# Check if we should group the session, select the last two rows from the user
|
||||
query = "SELECT id, rating_key, view_offset, reference_id FROM session_history " \
|
||||
"WHERE user_id = ? AND rating_key = ? ORDER BY id DESC LIMIT 2 "
|
||||
|
||||
args = [session['user_id'], session['rating_key']]
|
||||
|
||||
result = self.db.select(query=query, args=args)
|
||||
|
||||
if len(result) > 1:
|
||||
new_session = {'id': result[0]['id'],
|
||||
'rating_key': result[0]['rating_key'],
|
||||
'view_offset': result[0]['view_offset'],
|
||||
'reference_id': result[0]['reference_id']}
|
||||
|
||||
prev_session = {'id': result[1]['id'],
|
||||
'rating_key': result[1]['rating_key'],
|
||||
'view_offset': result[1]['view_offset'],
|
||||
'reference_id': result[1]['reference_id']}
|
||||
|
||||
marker_first, marker_final = helpers.get_first_final_marker(metadata['markers'])
|
||||
watched = helpers.check_watched(
|
||||
session['media_type'], session['view_offset'], session['duration'],
|
||||
marker_first, marker_final
|
||||
)
|
||||
|
||||
query = "UPDATE session_history SET reference_id = ? WHERE id = ? "
|
||||
|
||||
# If previous session view offset less than watched percent,
|
||||
# and new session view offset is greater,
|
||||
# then set the reference_id to the previous row,
|
||||
# else set the reference_id to the new id
|
||||
if prev_session is None and new_session is None:
|
||||
args = [last_id, last_id]
|
||||
elif watched and prev_session['view_offset'] <= new_session['view_offset'] or \
|
||||
session['live'] and prev_session['guid'] == new_session['guid']:
|
||||
args = [prev_session['reference_id'], new_session['id']]
|
||||
else:
|
||||
args = [new_session['id'], new_session['id']]
|
||||
|
||||
self.db.action(query=query, args=args)
|
||||
self.group_history(last_id, session, metadata)
|
||||
|
||||
# logger.debug("Tautulli ActivityProcessor :: Successfully written history item, last id for session_history is %s"
|
||||
# % last_id)
|
||||
|
@ -546,6 +483,80 @@ class ActivityProcessor(object):
|
|||
# Return the session row id when the session is successfully written to the database
|
||||
return session['id']
|
||||
|
||||
def group_history(self, last_id, session, metadata=None):
|
||||
new_session = prev_session = None
|
||||
prev_watched = None
|
||||
|
||||
if session['live']:
|
||||
# Check if we should group the session, select the last guid from the user
|
||||
query = "SELECT session_history.id, session_history_metadata.guid, session_history.reference_id " \
|
||||
"FROM session_history " \
|
||||
"JOIN session_history_metadata ON session_history.id == session_history_metadata.id " \
|
||||
"WHERE session_history.id <= ? AND session_history.user_id = ? ORDER BY session_history.id DESC LIMIT 1 "
|
||||
|
||||
args = [last_id, session['user_id']]
|
||||
|
||||
result = self.db.select(query=query, args=args)
|
||||
|
||||
if len(result) > 0:
|
||||
new_session = {'id': last_id,
|
||||
'guid': metadata['guid'] if metadata else session['guid'],
|
||||
'reference_id': last_id}
|
||||
|
||||
prev_session = {'id': result[0]['id'],
|
||||
'guid': result[0]['guid'],
|
||||
'reference_id': result[0]['reference_id']}
|
||||
|
||||
else:
|
||||
# Check if we should group the session, select the last two rows from the user
|
||||
query = "SELECT id, rating_key, view_offset, reference_id FROM session_history " \
|
||||
"WHERE id <= ? AND user_id = ? AND rating_key = ? ORDER BY id DESC LIMIT 2 "
|
||||
|
||||
args = [last_id, session['user_id'], session['rating_key']]
|
||||
|
||||
result = self.db.select(query=query, args=args)
|
||||
|
||||
if len(result) > 1:
|
||||
new_session = {'id': result[0]['id'],
|
||||
'rating_key': result[0]['rating_key'],
|
||||
'view_offset': helpers.cast_to_int(result[0]['view_offset']),
|
||||
'reference_id': result[0]['reference_id']}
|
||||
|
||||
prev_session = {'id': result[1]['id'],
|
||||
'rating_key': result[1]['rating_key'],
|
||||
'view_offset': helpers.cast_to_int(result[1]['view_offset']),
|
||||
'reference_id': result[1]['reference_id']}
|
||||
|
||||
if metadata:
|
||||
marker_first, marker_final = helpers.get_first_final_marker(metadata['markers'])
|
||||
else:
|
||||
marker_first = session['marker_credits_first']
|
||||
marker_final = session['marker_credits_final']
|
||||
|
||||
prev_watched = helpers.check_watched(
|
||||
session['media_type'], prev_session['view_offset'], session['duration'],
|
||||
marker_first, marker_final
|
||||
)
|
||||
|
||||
query = "UPDATE session_history SET reference_id = ? WHERE id = ? "
|
||||
|
||||
# If previous session view offset less than watched threshold,
|
||||
# and new session view offset is greater,
|
||||
# then set the reference_id to the previous row,
|
||||
# else set the reference_id to the new id
|
||||
if (prev_watched is False and prev_session['view_offset'] <= new_session['view_offset'] or
|
||||
session['live'] and prev_session['guid'] == new_session['guid']):
|
||||
if metadata:
|
||||
logger.debug("Tautulli ActivityProcessor :: Grouping history for sessionKey %s", session['session_key'])
|
||||
args = [prev_session['reference_id'], new_session['id']]
|
||||
|
||||
else:
|
||||
if metadata:
|
||||
logger.debug("Tautulli ActivityProcessor :: Not grouping history for sessionKey %s", session['session_key'])
|
||||
args = [last_id, last_id]
|
||||
|
||||
self.db.action(query=query, args=args)
|
||||
|
||||
def get_sessions(self, user_id=None, ip_address=None):
|
||||
query = "SELECT * FROM sessions"
|
||||
args = []
|
||||
|
@ -695,3 +706,36 @@ class ActivityProcessor(object):
|
|||
"ORDER BY stopped DESC",
|
||||
[user_id, machine_id, media_type])
|
||||
return int(started - last_session.get('stopped', 0) >= plexpy.CONFIG.NOTIFY_CONTINUED_SESSION_THRESHOLD)
|
||||
|
||||
def regroup_history(self):
|
||||
logger.info("Tautulli ActivityProcessor :: Creating database backup...")
|
||||
if not database.make_backup():
|
||||
return False
|
||||
|
||||
logger.info("Tautulli ActivityProcessor :: Regrouping session history...")
|
||||
|
||||
query = (
|
||||
"SELECT * FROM session_history "
|
||||
"JOIN session_history_metadata ON session_history.id = session_history_metadata.id"
|
||||
)
|
||||
results = self.db.select(query)
|
||||
count = len(results)
|
||||
progress = 0
|
||||
|
||||
for i, session in enumerate(results, start=1):
|
||||
if int(i / count * 10) > progress:
|
||||
progress = int(i / count * 10)
|
||||
logger.info("Tautulli ActivityProcessor :: Regrouping session history: %d%%", progress * 10)
|
||||
|
||||
try:
|
||||
self.group_history(session['id'], session)
|
||||
except Exception as e:
|
||||
logger.error("Tautulli ActivityProcessor :: Error regrouping session history: %s", e)
|
||||
return False
|
||||
|
||||
logger.info("Tautulli ActivityProcessor :: Regrouping session history complete.")
|
||||
return True
|
||||
|
||||
|
||||
def regroup_history():
|
||||
ActivityProcessor().regroup_history()
|
||||
|
|
|
@ -216,6 +216,7 @@ AUDIO_QUALITY_PROFILES = {
|
|||
AUDIO_QUALITY_PROFILES = OrderedDict(sorted(list(AUDIO_QUALITY_PROFILES.items()), key=lambda k: k[0], reverse=True))
|
||||
|
||||
HW_DECODERS = [
|
||||
'd3d11va',
|
||||
'dxva2',
|
||||
'videotoolbox',
|
||||
'mediacodecndk',
|
||||
|
|
|
@ -177,6 +177,7 @@ _CONFIG_DEFINITIONS = {
|
|||
'NOTIFY_RECENTLY_ADDED_UPGRADE': (int, 'Monitoring', 0),
|
||||
'NOTIFY_REMOTE_ACCESS_THRESHOLD': (int, 'Monitoring', 60),
|
||||
'NOTIFY_CONCURRENT_BY_IP': (int, 'Monitoring', 0),
|
||||
'NOTIFY_CONCURRENT_IPV6_CIDR': (str, 'Monitoring', '/64'),
|
||||
'NOTIFY_CONCURRENT_THRESHOLD': (int, 'Monitoring', 2),
|
||||
'NOTIFY_NEW_DEVICE_INITIAL_ONLY': (int, 'Monitoring', 1),
|
||||
'NOTIFY_SERVER_CONNECTION_THRESHOLD': (int, 'Monitoring', 60),
|
||||
|
@ -536,7 +537,7 @@ class Config(object):
|
|||
Returns something from the ini unless it is a real property
|
||||
of the configuration object or is not all caps.
|
||||
"""
|
||||
if not re.match(r'[A-Z_]+$', name):
|
||||
if not re.match(r'[A-Z0-9_]+$', name):
|
||||
return super(Config, self).__getattr__(name)
|
||||
else:
|
||||
return self.check_setting(name)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue