An H1
-Some text
-Some more text
-An H2
-Another
-Bob -Another H2
-me - -span1a1 -span1a2 test - -span2a1 - - -English
-English UK
-English US
-French
-diff --git a/.github/workflows/issues-stale.yml b/.github/workflows/issues-stale.yml index 0643cb0a..26b8aa5f 100644 --- a/.github/workflows/issues-stale.yml +++ b/.github/workflows/issues-stale.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Stale - uses: actions/stale@v7 + uses: actions/stale@v8 with: stale-issue-message: > This issue is stale because it has been open for 30 days with no activity. @@ -30,7 +30,7 @@ jobs: days-before-close: 5 - name: Invalid Template - uses: actions/stale@v7 + uses: actions/stale@v8 with: stale-issue-message: > Invalid issues template. diff --git a/.github/workflows/publish-docker.yml b/.github/workflows/publish-docker.yml index 6480575f..6d91bbf6 100644 --- a/.github/workflows/publish-docker.yml +++ b/.github/workflows/publish-docker.yml @@ -95,7 +95,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Get Build Job Status - uses: technote-space/workflow-conclusion-action@v3.0 + uses: technote-space/workflow-conclusion-action@v3 - name: Combine Job Status id: status diff --git a/.github/workflows/publish-installers.yml b/.github/workflows/publish-installers.yml index 49d53233..0b6eec36 100644 --- a/.github/workflows/publish-installers.yml +++ b/.github/workflows/publish-installers.yml @@ -68,7 +68,7 @@ jobs: pyinstaller -y ./package/Tautulli-${{ matrix.os }}.spec - name: Create Windows Installer - uses: joncloud/makensis-action@v3.7 + uses: joncloud/makensis-action@v4 if: matrix.os == 'windows' with: script-file: ./package/Tautulli.nsi @@ -100,10 +100,10 @@ jobs: runs-on: ubuntu-latest steps: - name: Get Build Job Status - uses: technote-space/workflow-conclusion-action@v3.0 + uses: technote-space/workflow-conclusion-action@v3 - name: Checkout Code - uses: actions/checkout@v3.2.0 + uses: actions/checkout@v3 - name: Set Release Version id: get_version @@ -168,7 +168,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Get Build Job Status - uses: technote-space/workflow-conclusion-action@v3.0 + uses: technote-space/workflow-conclusion-action@v3 - name: Combine Job Status id: status diff --git a/.github/workflows/publish-snap.yml b/.github/workflows/publish-snap.yml index 9df4d2fd..dd74c3a3 100644 --- a/.github/workflows/publish-snap.yml +++ b/.github/workflows/publish-snap.yml @@ -70,7 +70,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Get Build Job Status - uses: technote-space/workflow-conclusion-action@v3.0 + uses: technote-space/workflow-conclusion-action@v3 - name: Combine Job Status id: status diff --git a/.github/workflows/pull-requests.yml b/.github/workflows/pull-requests.yml index 58cb4ee4..1a24cf24 100644 --- a/.github/workflows/pull-requests.yml +++ b/.github/workflows/pull-requests.yml @@ -18,7 +18,6 @@ jobs: with: message: Pull requests must be made to the `nightly` branch. Thanks. repo-token: ${{ secrets.GITHUB_TOKEN }} - repo-token-user-login: 'github-actions[bot]' - name: Fail Workflow if: github.base_ref != 'nightly' diff --git a/CHANGELOG.md b/CHANGELOG.md index b3c6c4a1..974e69ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Changelog +## v2.12.5 (2023-07-13) + +* Activity: + * New: Added d3d11va to list of hardware decoders. +* History: + * Fix: Incorrect grouping of play history. + * New: Added button in settings to regroup play history. +* Notifications: + * Fix: Incorrect concurrent streams notifications by IP addresss for IPv6 addresses (#2096) (Thanks @pooley182) +* UI: + * Fix: Occasional UI crashing on Python 3.11. + * New: Added multiselect user filters to History and Graphs pages. (#2090) (Thanks @zdimension) +* API: + * New: Added regroup_history API command. + * Change: Updated graph API commands to accept a comma separated list of user IDs. + + +## v2.12.4 (2023-05-23) + +* History: + * Fix: Set view offset equal to duration if a stream is stopped within the last 10 sec. +* Other: + * Fix: Database import may fail for some older databases. + * Fix: Double-quoted strings for newer versions of SQLite. (#2015, #2057) +* API: + * Change: Return the ID for async API calls (export_metadata, notify, notify_newsletter). + + ## v2.12.3 (2023-04-14) * Activity: diff --git a/data/interfaces/default/css/bootstrap-select.min.css b/data/interfaces/default/css/bootstrap-select.min.css new file mode 100644 index 00000000..d22faa63 --- /dev/null +++ b/data/interfaces/default/css/bootstrap-select.min.css @@ -0,0 +1,6 @@ +/*! + * Bootstrap-select v1.13.14 (https://developer.snapappointments.com/bootstrap-select) + * + * Copyright 2012-2020 SnapAppointments, LLC + * Licensed under MIT (https://github.com/snapappointments/bootstrap-select/blob/master/LICENSE) + */@-webkit-keyframes bs-notify-fadeOut{0%{opacity:.9}100%{opacity:0}}@-o-keyframes bs-notify-fadeOut{0%{opacity:.9}100%{opacity:0}}@keyframes bs-notify-fadeOut{0%{opacity:.9}100%{opacity:0}}.bootstrap-select>select.bs-select-hidden,select.bs-select-hidden,select.selectpicker{display:none!important}.bootstrap-select{width:220px\0;vertical-align:middle}.bootstrap-select>.dropdown-toggle{position:relative;width:100%;text-align:right;white-space:nowrap;display:-webkit-inline-box;display:-webkit-inline-flex;display:-ms-inline-flexbox;display:inline-flex;-webkit-box-align:center;-webkit-align-items:center;-ms-flex-align:center;align-items:center;-webkit-box-pack:justify;-webkit-justify-content:space-between;-ms-flex-pack:justify;justify-content:space-between}.bootstrap-select>.dropdown-toggle:after{margin-top:-1px}.bootstrap-select>.dropdown-toggle.bs-placeholder,.bootstrap-select>.dropdown-toggle.bs-placeholder:active,.bootstrap-select>.dropdown-toggle.bs-placeholder:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder:hover{color:#999}.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-danger,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-danger:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-danger:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-danger:hover,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-dark,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-dark:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-dark:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-dark:hover,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-info,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-info:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-info:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-info:hover,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-primary,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-primary:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-primary:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-primary:hover,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-secondary,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-secondary:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-secondary:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-secondary:hover,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-success,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-success:active,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-success:focus,.bootstrap-select>.dropdown-toggle.bs-placeholder.btn-success:hover{color:rgba(255,255,255,.5)}.bootstrap-select>select{position:absolute!important;bottom:0;left:50%;display:block!important;width:.5px!important;height:100%!important;padding:0!important;opacity:0!important;border:none;z-index:0!important}.bootstrap-select>select.mobile-device{top:0;left:0;display:block!important;width:100%!important;z-index:2!important}.bootstrap-select.is-invalid .dropdown-toggle,.error .bootstrap-select .dropdown-toggle,.has-error .bootstrap-select .dropdown-toggle,.was-validated .bootstrap-select select:invalid+.dropdown-toggle{border-color:#b94a48}.bootstrap-select.is-valid .dropdown-toggle,.was-validated .bootstrap-select select:valid+.dropdown-toggle{border-color:#28a745}.bootstrap-select.fit-width{width:auto!important}.bootstrap-select:not([class*=col-]):not([class*=form-control]):not(.input-group-btn){width:220px}.bootstrap-select .dropdown-toggle:focus,.bootstrap-select>select.mobile-device:focus+.dropdown-toggle{outline:thin dotted #333!important;outline:5px auto -webkit-focus-ring-color!important;outline-offset:-2px}.bootstrap-select.form-control{margin-bottom:0;padding:0;border:none;height:auto}:not(.input-group)>.bootstrap-select.form-control:not([class*=col-]){width:100%}.bootstrap-select.form-control.input-group-btn{float:none;z-index:auto}.form-inline .bootstrap-select,.form-inline .bootstrap-select.form-control:not([class*=col-]){width:auto}.bootstrap-select:not(.input-group-btn),.bootstrap-select[class*=col-]{float:none;display:inline-block;margin-left:0}.bootstrap-select.dropdown-menu-right,.bootstrap-select[class*=col-].dropdown-menu-right,.row .bootstrap-select[class*=col-].dropdown-menu-right{float:right}.form-group .bootstrap-select,.form-horizontal .bootstrap-select,.form-inline .bootstrap-select{margin-bottom:0}.form-group-lg .bootstrap-select.form-control,.form-group-sm .bootstrap-select.form-control{padding:0}.form-group-lg .bootstrap-select.form-control .dropdown-toggle,.form-group-sm .bootstrap-select.form-control .dropdown-toggle{height:100%;font-size:inherit;line-height:inherit;border-radius:inherit}.bootstrap-select.form-control-lg .dropdown-toggle,.bootstrap-select.form-control-sm .dropdown-toggle{font-size:inherit;line-height:inherit;border-radius:inherit}.bootstrap-select.form-control-sm .dropdown-toggle{padding:.25rem .5rem}.bootstrap-select.form-control-lg .dropdown-toggle{padding:.5rem 1rem}.form-inline .bootstrap-select .form-control{width:100%}.bootstrap-select.disabled,.bootstrap-select>.disabled{cursor:not-allowed}.bootstrap-select.disabled:focus,.bootstrap-select>.disabled:focus{outline:0!important}.bootstrap-select.bs-container{position:absolute;top:0;left:0;height:0!important;padding:0!important}.bootstrap-select.bs-container .dropdown-menu{z-index:1060}.bootstrap-select .dropdown-toggle .filter-option{position:static;top:0;left:0;float:left;height:100%;width:100%;text-align:left;overflow:hidden;-webkit-box-flex:0;-webkit-flex:0 1 auto;-ms-flex:0 1 auto;flex:0 1 auto}.bs3.bootstrap-select .dropdown-toggle .filter-option{padding-right:inherit}.input-group .bs3-has-addon.bootstrap-select .dropdown-toggle .filter-option{position:absolute;padding-top:inherit;padding-bottom:inherit;padding-left:inherit;float:none}.input-group .bs3-has-addon.bootstrap-select .dropdown-toggle .filter-option .filter-option-inner{padding-right:inherit}.bootstrap-select .dropdown-toggle .filter-option-inner-inner{overflow:hidden}.bootstrap-select .dropdown-toggle .filter-expand{width:0!important;float:left;opacity:0!important;overflow:hidden}.bootstrap-select .dropdown-toggle .caret{position:absolute;top:50%;right:12px;margin-top:-2px;vertical-align:middle}.input-group .bootstrap-select.form-control .dropdown-toggle{border-radius:inherit}.bootstrap-select[class*=col-] .dropdown-toggle{width:100%}.bootstrap-select .dropdown-menu{min-width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.bootstrap-select .dropdown-menu>.inner:focus{outline:0!important}.bootstrap-select .dropdown-menu.inner{position:static;float:none;border:0;padding:0;margin:0;border-radius:0;-webkit-box-shadow:none;box-shadow:none}.bootstrap-select .dropdown-menu li{position:relative}.bootstrap-select .dropdown-menu li.active small{color:rgba(255,255,255,.5)!important}.bootstrap-select .dropdown-menu li.disabled a{cursor:not-allowed}.bootstrap-select .dropdown-menu li a{cursor:pointer;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.bootstrap-select .dropdown-menu li a.opt{position:relative;padding-left:2.25em}.bootstrap-select .dropdown-menu li a span.check-mark{display:none}.bootstrap-select .dropdown-menu li a span.text{display:inline-block}.bootstrap-select .dropdown-menu li small{padding-left:.5em}.bootstrap-select .dropdown-menu .notify{position:absolute;bottom:5px;width:96%;margin:0 2%;min-height:26px;padding:3px 5px;background:#f5f5f5;border:1px solid #e3e3e3;-webkit-box-shadow:inset 0 1px 1px rgba(0,0,0,.05);box-shadow:inset 0 1px 1px rgba(0,0,0,.05);pointer-events:none;opacity:.9;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.bootstrap-select .dropdown-menu .notify.fadeOut{-webkit-animation:.3s linear 750ms forwards bs-notify-fadeOut;-o-animation:.3s linear 750ms forwards bs-notify-fadeOut;animation:.3s linear 750ms forwards bs-notify-fadeOut}.bootstrap-select .no-results{padding:3px;background:#f5f5f5;margin:0 5px;white-space:nowrap}.bootstrap-select.fit-width .dropdown-toggle .filter-option{position:static;display:inline;padding:0}.bootstrap-select.fit-width .dropdown-toggle .filter-option-inner,.bootstrap-select.fit-width .dropdown-toggle .filter-option-inner-inner{display:inline}.bootstrap-select.fit-width .dropdown-toggle .bs-caret:before{content:'\00a0'}.bootstrap-select.fit-width .dropdown-toggle .caret{position:static;top:auto;margin-top:-1px}.bootstrap-select.show-tick .dropdown-menu .selected span.check-mark{position:absolute;display:inline-block;right:15px;top:5px}.bootstrap-select.show-tick .dropdown-menu li a span.text{margin-right:34px}.bootstrap-select .bs-ok-default:after{content:'';display:block;width:.5em;height:1em;border-style:solid;border-width:0 .26em .26em 0;-webkit-transform:rotate(45deg);-ms-transform:rotate(45deg);-o-transform:rotate(45deg);transform:rotate(45deg)}.bootstrap-select.show-menu-arrow.open>.dropdown-toggle,.bootstrap-select.show-menu-arrow.show>.dropdown-toggle{z-index:1061}.bootstrap-select.show-menu-arrow .dropdown-toggle .filter-option:before{content:'';border-left:7px solid transparent;border-right:7px solid transparent;border-bottom:7px solid rgba(204,204,204,.2);position:absolute;bottom:-4px;left:9px;display:none}.bootstrap-select.show-menu-arrow .dropdown-toggle .filter-option:after{content:'';border-left:6px solid transparent;border-right:6px solid transparent;border-bottom:6px solid #fff;position:absolute;bottom:-4px;left:10px;display:none}.bootstrap-select.show-menu-arrow.dropup .dropdown-toggle .filter-option:before{bottom:auto;top:-4px;border-top:7px solid rgba(204,204,204,.2);border-bottom:0}.bootstrap-select.show-menu-arrow.dropup .dropdown-toggle .filter-option:after{bottom:auto;top:-4px;border-top:6px solid #fff;border-bottom:0}.bootstrap-select.show-menu-arrow.pull-right .dropdown-toggle .filter-option:before{right:12px;left:auto}.bootstrap-select.show-menu-arrow.pull-right .dropdown-toggle .filter-option:after{right:13px;left:auto}.bootstrap-select.show-menu-arrow.open>.dropdown-toggle .filter-option:after,.bootstrap-select.show-menu-arrow.open>.dropdown-toggle .filter-option:before,.bootstrap-select.show-menu-arrow.show>.dropdown-toggle .filter-option:after,.bootstrap-select.show-menu-arrow.show>.dropdown-toggle .filter-option:before{display:block}.bs-actionsbox,.bs-donebutton,.bs-searchbox{padding:4px 8px}.bs-actionsbox{width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.bs-actionsbox .btn-group button{width:50%}.bs-donebutton{float:left;width:100%;-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}.bs-donebutton .btn-group button{width:100%}.bs-searchbox+.bs-actionsbox{padding:0 8px 4px}.bs-searchbox .form-control{margin-bottom:0;width:100%;float:none} \ No newline at end of file diff --git a/data/interfaces/default/css/tautulli.css b/data/interfaces/default/css/tautulli.css index 5f1d90a0..e256d2d7 100644 --- a/data/interfaces/default/css/tautulli.css +++ b/data/interfaces/default/css/tautulli.css @@ -2914,7 +2914,7 @@ a .home-platforms-list-cover-face:hover margin-bottom: -20px; width: 100%; max-width: 1750px; - overflow: hidden; + display: flow-root; } .table-card-back td { font-size: 12px; diff --git a/data/interfaces/default/graphs.html b/data/interfaces/default/graphs.html index 3f189112..8435df20 100644 --- a/data/interfaces/default/graphs.html +++ b/data/interfaces/default/graphs.html @@ -1,6 +1,7 @@ <%inherit file="base.html"/> <%def name="headIncludes()"> + %def> @@ -14,9 +15,7 @@
');y[2]&&($=$.replace("{var}",y[2][1Change the "Play by day of week" graph to start on Monday. Default is start on Sunday.
-Group play history for the same item and user as a single entry when progress is less than the watched percent.
-Decide whether to use end credits markers to determine the 'watched' state of video items. When markers are not available the selected threshold percentage will be used.
+Group play history for the same item and user as a single entry when progress is less than the watched percent.
+
+ Fix grouping of play history in the database.
+
@@ -2484,6 +2497,12 @@ $(document).ready(function() {
confirmAjaxCall(url, msg);
});
+ $("#regroup_history").click(function () {
+ var msg = 'Are you sure you want to regroup play history in the database?
This make take a long time for large databases.
Regrouping will continue in the background.';
+ var url = 'regroup_history';
+ confirmAjaxCall(url, msg);
+ });
+
$("#delete_temp_sessions").click(function () {
var msg = 'Are you sure you want to flush the temporary sessions?
This will reset all currently active sessions.';
var url = 'delete_temp_sessions';
diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
index db71cc7c..3d2ab09a 100644
--- a/lib/bs4/__init__.py
+++ b/lib/bs4/__init__.py
@@ -15,7 +15,7 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.11.2"
+__version__ = "4.12.2"
__copyright__ = "Copyright (c) 2004-2023 Leonard Richardson"
# Use of this source code is governed by the MIT license.
__license__ = "MIT"
@@ -38,11 +38,13 @@ from .builder import (
builder_registry,
ParserRejectedMarkup,
XMLParsedAsHTMLWarning,
+ HTMLParserTreeBuilder
)
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
+ CSS,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
@@ -116,7 +118,7 @@ class BeautifulSoup(Tag):
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-
+
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, exclude_encodings=None,
element_classes=None, **kwargs):
@@ -348,25 +350,49 @@ class BeautifulSoup(Tag):
self.markup = None
self.builder.soup = None
- def __copy__(self):
- """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
- copy = type(self)(
- self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
- )
+ def _clone(self):
+ """Create a new BeautifulSoup object with the same TreeBuilder,
+ but not associated with any markup.
- # Although we encoded the tree to UTF-8, that may not have
- # been the encoding of the original markup. Set the copy's
- # .original_encoding to reflect the original object's
- # .original_encoding.
- copy.original_encoding = self.original_encoding
- return copy
+ This is the first step of the deepcopy process.
+ """
+ clone = type(self)("", None, self.builder)
+ # Keep track of the encoding of the original document,
+ # since we won't be parsing it again.
+ clone.original_encoding = self.original_encoding
+ return clone
+
def __getstate__(self):
# Frequently a tree builder can't be pickled.
d = dict(self.__dict__)
if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
- d['builder'] = None
+ d['builder'] = type(self.builder)
+ # Store the contents as a Unicode string.
+ d['contents'] = []
+ d['markup'] = self.decode()
+
+ # If _most_recent_element is present, it's a Tag object left
+ # over from initial parse. It might not be picklable and we
+ # don't need it.
+ if '_most_recent_element' in d:
+ del d['_most_recent_element']
return d
+
+ def __setstate__(self, state):
+ # If necessary, restore the TreeBuilder by looking it up.
+ self.__dict__ = state
+ if isinstance(self.builder, type):
+ self.builder = self.builder()
+ elif not self.builder:
+ # We don't know which builder was used to build this
+ # parse tree, so use a default we know is always available.
+ self.builder = HTMLParserTreeBuilder()
+ self.builder.soup = self
+ self.reset()
+ self._feed()
+ return state
+
@classmethod
def _decode_markup(cls, markup):
@@ -468,6 +494,7 @@ class BeautifulSoup(Tag):
self.open_tag_counter = Counter()
self.preserve_whitespace_tag_stack = []
self.string_container_stack = []
+ self._most_recent_element = None
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@@ -749,7 +776,7 @@ class BeautifulSoup(Tag):
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- formatter="minimal"):
+ formatter="minimal", iterator=None):
"""Returns a string or Unicode representation of the parse tree
as an HTML or XML document.
@@ -776,7 +803,7 @@ class BeautifulSoup(Tag):
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
- indent_level, eventual_encoding, formatter)
+ indent_level, eventual_encoding, formatter, iterator)
# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
_s = BeautifulSoup
diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py
index e48b6a0e..e065096b 100644
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@@ -24,6 +24,7 @@ from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
DetectsXMLParsedAsHTML,
+ ParserRejectedMarkup,
HTML,
HTMLTreeBuilder,
STRICT,
@@ -70,6 +71,22 @@ class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
self._initialize_xml_detector()
+ def error(self, message):
+ # NOTE: This method is required so long as Python 3.9 is
+ # supported. The corresponding code is removed from HTMLParser
+ # in 3.5, but not removed from ParserBase until 3.10.
+ # https://github.com/python/cpython/issues/76025
+ #
+ # The original implementation turned the error into a warning,
+ # but in every case I discovered, this made HTMLParser
+ # immediately crash with an error message that was less
+ # helpful than the warning. The new implementation makes it
+ # more clear that html.parser just can't parse this
+ # markup. The 3.10 implementation does the same, though it
+ # raises AssertionError rather than calling a method. (We
+ # catch this error and wrap it in a ParserRejectedMarkup.)
+ raise ParserRejectedMarkup(message)
+
def handle_startendtag(self, name, attrs):
"""Handle an incoming empty-element tag.
@@ -359,6 +376,12 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
- parser.feed(markup)
+ try:
+ parser.feed(markup)
+ except AssertionError as e:
+ # html.parser raises AssertionError in rare cases to
+ # indicate a fatal problem with the markup, especially
+ # when there's an error in the doctype declaration.
+ raise ParserRejectedMarkup(e)
parser.close()
parser.already_closed_empty_element = []
diff --git a/lib/bs4/css.py b/lib/bs4/css.py
new file mode 100644
index 00000000..245ac601
--- /dev/null
+++ b/lib/bs4/css.py
@@ -0,0 +1,280 @@
+"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
+
+import warnings
+try:
+ import soupsieve
+except ImportError as e:
+ soupsieve = None
+ warnings.warn(
+ 'The soupsieve package is not installed. CSS selectors cannot be used.'
+ )
+
+
+class CSS(object):
+ """A proxy object against the soupsieve library, to simplify its
+ CSS selector API.
+
+ Acquire this object through the .css attribute on the
+ BeautifulSoup object, or on the Tag you want to use as the
+ starting point for a CSS selector.
+
+ The main advantage of doing this is that the tag to be selected
+ against doesn't need to be explicitly specified in the function
+ calls, since it's already scoped to a tag.
+ """
+
+ def __init__(self, tag, api=soupsieve):
+ """Constructor.
+
+ You don't need to instantiate this class yourself; instead,
+ access the .css attribute on the BeautifulSoup object, or on
+ the Tag you want to use as the starting point for your CSS
+ selector.
+
+ :param tag: All CSS selectors will use this as their starting
+ point.
+
+ :param api: A plug-in replacement for the soupsieve module,
+ designed mainly for use in tests.
+ """
+ if api is None:
+ raise NotImplementedError(
+ "Cannot execute CSS selectors because the soupsieve package is not installed."
+ )
+ self.api = api
+ self.tag = tag
+
+ def escape(self, ident):
+ """Escape a CSS identifier.
+
+ This is a simple wrapper around soupselect.escape(). See the
+ documentation for that function for more information.
+ """
+ if soupsieve is None:
+ raise NotImplementedError(
+ "Cannot escape CSS identifiers because the soupsieve package is not installed."
+ )
+ return self.api.escape(ident)
+
+ def _ns(self, ns, select):
+ """Normalize a dictionary of namespaces."""
+ if not isinstance(select, self.api.SoupSieve) and ns is None:
+ # If the selector is a precompiled pattern, it already has
+ # a namespace context compiled in, which cannot be
+ # replaced.
+ ns = self.tag._namespaces
+ return ns
+
+ def _rs(self, results):
+ """Normalize a list of results to a Resultset.
+
+ A ResultSet is more consistent with the rest of Beautiful
+ Soup's API, and ResultSet.__getattr__ has a helpful error
+ message if you try to treat a list of results as a single
+ result (a common mistake).
+ """
+ # Import here to avoid circular import
+ from bs4.element import ResultSet
+ return ResultSet(None, results)
+
+ def compile(self, select, namespaces=None, flags=0, **kwargs):
+ """Pre-compile a selector and return the compiled object.
+
+ :param selector: A CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will use the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.compile() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.compile() method.
+
+ :return: A precompiled selector object.
+ :rtype: soupsieve.SoupSieve
+ """
+ return self.api.compile(
+ select, self._ns(namespaces, select), flags, **kwargs
+ )
+
+ def select_one(self, select, namespaces=None, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag and return the
+ first result.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.select_one()
+ method.
+
+ :param selector: A CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will use the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.select_one() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.select_one() method.
+
+ :return: A Tag, or None if the selector has no match.
+ :rtype: bs4.element.Tag
+
+ """
+ return self.api.select_one(
+ select, self.tag, self._ns(namespaces, select), flags, **kwargs
+ )
+
+ def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.select()
+ method.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param limit: After finding this number of results, stop looking.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.select() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.select() method.
+
+ :return: A ResultSet of Tag objects.
+ :rtype: bs4.element.ResultSet
+
+ """
+ if limit is None:
+ limit = 0
+
+ return self._rs(
+ self.api.select(
+ select, self.tag, self._ns(namespaces, select), limit, flags,
+ **kwargs
+ )
+ )
+
+ def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+ """Perform a CSS selection operation on the current Tag.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.iselect()
+ method. It is the same as select(), but it returns a generator
+ instead of a list.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param limit: After finding this number of results, stop looking.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.iselect() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.iselect() method.
+
+ :return: A generator
+ :rtype: types.GeneratorType
+ """
+ return self.api.iselect(
+ select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
+ )
+
+ def closest(self, select, namespaces=None, flags=0, **kwargs):
+ """Find the Tag closest to this one that matches the given selector.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.closest()
+ method.
+
+ :param selector: A string containing a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.closest() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.closest() method.
+
+ :return: A Tag, or None if there is no match.
+ :rtype: bs4.Tag
+
+ """
+ return self.api.closest(
+ select, self.tag, self._ns(namespaces, select), flags, **kwargs
+ )
+
+ def match(self, select, namespaces=None, flags=0, **kwargs):
+ """Check whether this Tag matches the given CSS selector.
+
+ This uses the Soup Sieve library. For more information, see
+ that library's documentation for the soupsieve.match()
+ method.
+
+ :param: a CSS selector.
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.match() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.match() method.
+
+ :return: True if this Tag matches the selector; False otherwise.
+ :rtype: bool
+ """
+ return self.api.match(
+ select, self.tag, self._ns(namespaces, select), flags, **kwargs
+ )
+
+ def filter(self, select, namespaces=None, flags=0, **kwargs):
+ """Filter this Tag's direct children based on the given CSS selector.
+
+ This uses the Soup Sieve library. It works the same way as
+ passing this Tag into that library's soupsieve.filter()
+ method. More information, for more information see the
+ documentation for soupsieve.filter().
+
+ :param namespaces: A dictionary mapping namespace prefixes
+ used in the CSS selector to namespace URIs. By default,
+ Beautiful Soup will pass in the prefixes it encountered while
+ parsing the document.
+
+ :param flags: Flags to be passed into Soup Sieve's
+ soupsieve.filter() method.
+
+ :param kwargs: Keyword arguments to be passed into SoupSieve's
+ soupsieve.filter() method.
+
+ :return: A ResultSet of Tag objects.
+ :rtype: bs4.element.ResultSet
+
+ """
+ return self._rs(
+ self.api.filter(
+ select, self.tag, self._ns(namespaces, select), flags, **kwargs
+ )
+ )
diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py
index 3bf583f5..e079772e 100644
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@@ -59,21 +59,6 @@ def diagnose(data):
if hasattr(data, 'read'):
data = data.read()
- elif data.startswith("http:") or data.startswith("https:"):
- print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
- print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
- return
- else:
- try:
- if os.path.exists(data):
- print(('"%s" looks like a filename. Reading data from the file.' % data))
- with open(data) as fp:
- data = fp.read()
- except ValueError:
- # This can happen on some platforms when the 'filename' is
- # too long. Assume it's data and not a filename.
- pass
- print("")
for parser in basic_parsers:
print(("Trying to parse your markup with %s" % parser))
diff --git a/lib/bs4/element.py b/lib/bs4/element.py
index 583d0e8a..9c73957c 100644
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@@ -8,14 +8,8 @@ except ImportError as e:
import re
import sys
import warnings
-try:
- import soupsieve
-except ImportError as e:
- soupsieve = None
- warnings.warn(
- 'The soupsieve package is not installed. CSS selectors cannot be used.'
- )
+from bs4.css import CSS
from bs4.formatter import (
Formatter,
HTMLFormatter,
@@ -69,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
"string-escape",
"string_escape",
])
-
+
class NamespacedAttribute(str):
"""A namespaced string (e.g. 'xml:lang') that remembers the namespace
('xml') and the name ('lang') that were used to create it.
"""
-
+
def __new__(cls, prefix, name=None, namespace=None):
if not name:
# This is the default namespace. Its name "has no value"
@@ -146,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
return match.group(1) + encoding
return self.CHARSET_RE.sub(rewrite, self.original_value)
-
+
class PageElement(object):
"""Contains the navigational information for some part of the page:
that is, its current location in the parse tree.
NavigableString, Tag, etc. are all subclasses of PageElement.
"""
-
+
+ # In general, we can't tell just by looking at an element whether
+ # it's contained in an XML document or an HTML document. But for
+ # Tags (q.v.) we can store this information at parse time.
+ known_xml = None
+
def setup(self, parent=None, previous_element=None, next_element=None,
previous_sibling=None, next_sibling=None):
"""Sets up the initial relations between this element and
@@ -163,7 +162,7 @@ class PageElement(object):
:param previous_element: The element parsed immediately before
this one.
-
+
:param next_element: The element parsed immediately before
this one.
@@ -257,11 +256,11 @@ class PageElement(object):
default = object()
def _all_strings(self, strip=False, types=default):
"""Yield all strings of certain classes, possibly stripping them.
-
+
This is implemented differently in Tag and NavigableString.
"""
raise NotImplementedError()
-
+
@property
def stripped_strings(self):
"""Yield all strings in this PageElement, stripping them first.
@@ -294,11 +293,11 @@ class PageElement(object):
strip, types=types)])
getText = get_text
text = property(get_text)
-
+
def replace_with(self, *args):
- """Replace this PageElement with one or more PageElements, keeping the
+ """Replace this PageElement with one or more PageElements, keeping the
rest of the tree the same.
-
+
:param args: One or more PageElements.
:return: `self`, no longer part of the tree.
"""
@@ -410,7 +409,7 @@ class PageElement(object):
This works the same way as `list.insert`.
:param position: The numeric position that should be occupied
- in `self.children` by the new PageElement.
+ in `self.children` by the new PageElement.
:param new_child: A PageElement.
"""
if new_child is None:
@@ -546,7 +545,7 @@ class PageElement(object):
"Element has no parent, so 'after' has no meaning.")
if any(x is self for x in args):
raise ValueError("Can't insert an element after itself.")
-
+
offset = 0
for successor in args:
# Extract first so that the index won't be screwed up if they
@@ -912,7 +911,7 @@ class PageElement(object):
:rtype: bool
"""
return getattr(self, '_decomposed', False) or False
-
+
# Old non-property versions of the generators, for backwards
# compatibility with BS3.
def nextGenerator(self):
@@ -936,16 +935,11 @@ class NavigableString(str, PageElement):
When Beautiful Soup parses the markup penguin, it will
create a NavigableString for the string "penguin".
- """
+ """
PREFIX = ''
SUFFIX = ''
- # We can't tell just by looking at a string whether it's contained
- # in an XML document or an HTML document.
-
- known_xml = None
-
def __new__(cls, value):
"""Create a new NavigableString.
@@ -961,12 +955,22 @@ class NavigableString(str, PageElement):
u.setup()
return u
- def __copy__(self):
+ def __deepcopy__(self, memo, recursive=False):
"""A copy of a NavigableString has the same contents and class
as the original, but it is not connected to the parse tree.
+
+ :param recursive: This parameter is ignored; it's only defined
+ so that NavigableString.__deepcopy__ implements the same
+ signature as Tag.__deepcopy__.
"""
return type(self)(self)
+ def __copy__(self):
+ """A copy of a NavigableString can only be a deep copy, because
+ only one PageElement can occupy a given place in a parse tree.
+ """
+ return self.__deepcopy__({})
+
def __getnewargs__(self):
return (str(self),)
@@ -1059,10 +1063,10 @@ class PreformattedString(NavigableString):
as comments (the Comment class) and CDATA blocks (the CData
class).
"""
-
+
PREFIX = ''
SUFFIX = ''
-
+
def output_ready(self, formatter=None):
"""Make this string ready for output by adding any subclass-specific
prefix or suffix.
@@ -1144,7 +1148,7 @@ class Stylesheet(NavigableString):
"""
pass
-
+
class Script(NavigableString):
"""A NavigableString representing an executable script (probably
Javascript).
@@ -1250,7 +1254,7 @@ class Tag(PageElement):
if ((not builder or builder.store_line_numbers)
and (sourceline is not None or sourcepos is not None)):
self.sourceline = sourceline
- self.sourcepos = sourcepos
+ self.sourcepos = sourcepos
if attrs is None:
attrs = {}
elif attrs:
@@ -1308,13 +1312,49 @@ class Tag(PageElement):
self.interesting_string_types = builder.string_containers[self.name]
else:
self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
-
+
parserClass = _alias("parser_class") # BS3
- def __copy__(self):
- """A copy of a Tag is a new Tag, unconnected to the parse tree.
+ def __deepcopy__(self, memo, recursive=True):
+ """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
Its contents are a copy of the old Tag's contents.
"""
+ clone = self._clone()
+
+ if recursive:
+ # Clone this tag's descendants recursively, but without
+ # making any recursive function calls.
+ tag_stack = [clone]
+ for event, element in self._event_stream(self.descendants):
+ if event is Tag.END_ELEMENT_EVENT:
+ # Stop appending incoming Tags to the Tag that was
+ # just closed.
+ tag_stack.pop()
+ else:
+ descendant_clone = element.__deepcopy__(
+ memo, recursive=False
+ )
+ # Add to its parent's .contents
+ tag_stack[-1].append(descendant_clone)
+
+ if event is Tag.START_ELEMENT_EVENT:
+ # Add the Tag itself to the stack so that its
+ # children will be .appended to it.
+ tag_stack.append(descendant_clone)
+ return clone
+
+ def __copy__(self):
+ """A copy of a Tag must always be a deep copy, because a Tag's
+ children can only have one parent at a time.
+ """
+ return self.__deepcopy__({})
+
+ def _clone(self):
+ """Create a new Tag just like this one, but with no
+ contents and unattached to any parse tree.
+
+ This is the first step in the deepcopy process.
+ """
clone = type(self)(
None, self.builder, self.name, self.namespace,
self.prefix, self.attrs, is_xml=self._is_xml,
@@ -1326,8 +1366,6 @@ class Tag(PageElement):
)
for attr in ('can_be_empty_element', 'hidden'):
setattr(clone, attr, getattr(self, attr))
- for child in self.contents:
- clone.append(child.__copy__())
return clone
@property
@@ -1433,7 +1471,7 @@ class Tag(PageElement):
i.contents = []
i._decomposed = True
i = n
-
+
def clear(self, decompose=False):
"""Wipe out all children of this PageElement by calling extract()
on them.
@@ -1521,7 +1559,7 @@ class Tag(PageElement):
if not isinstance(value, list):
value = [value]
return value
-
+
def has_attr(self, key):
"""Does this PageElement have an attribute with the given name?"""
return key in self.attrs
@@ -1608,7 +1646,7 @@ class Tag(PageElement):
def __repr__(self, encoding="unicode-escape"):
"""Renders this PageElement as a string.
- :param encoding: The encoding to use (Python 2 only).
+ :param encoding: The encoding to use (Python 2 only).
TODO: This is now ignored and a warning should be issued
if a value is provided.
:return: A (Unicode) string.
@@ -1650,106 +1688,212 @@ class Tag(PageElement):
def decode(self, indent_level=None,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- formatter="minimal"):
- """Render a Unicode representation of this PageElement and its
- contents.
-
- :param indent_level: Each line of the rendering will be
- indented this many spaces. Used internally in
- recursive calls while pretty-printing.
- :param eventual_encoding: The tag is destined to be
- encoded into this encoding. This method is _not_
- responsible for performing that encoding. This information
- is passed in so that it can be substituted in if the
- document contains a tag that mentions the document's
- encoding.
- :param formatter: A Formatter object, or a string naming one of
- the standard formatters.
- """
-
+ formatter="minimal",
+ iterator=None):
+ pieces = []
# First off, turn a non-Formatter `formatter` into a Formatter
# object. This will stop the lookup from happening over and
# over again.
if not isinstance(formatter, Formatter):
formatter = self.formatter_for_name(formatter)
- attributes = formatter.attributes(self)
- attrs = []
- for key, val in attributes:
- if val is None:
- decoded = key
+
+ if indent_level is True:
+ indent_level = 0
+
+ # The currently active tag that put us into string literal
+ # mode. Until this element is closed, children will be treated
+ # as string literals and not pretty-printed. String literal
+ # mode is turned on immediately after this tag begins, and
+ # turned off immediately before it's closed. This means there
+ # will be whitespace before and after the tag itself.
+ string_literal_tag = None
+
+ for event, element in self._event_stream(iterator):
+ if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
+ piece = element._format_tag(
+ eventual_encoding, formatter, opening=True
+ )
+ elif event is Tag.END_ELEMENT_EVENT:
+ piece = element._format_tag(
+ eventual_encoding, formatter, opening=False
+ )
+ if indent_level is not None:
+ indent_level -= 1
else:
- if isinstance(val, list) or isinstance(val, tuple):
- val = ' '.join(val)
- elif not isinstance(val, str):
- val = str(val)
- elif (
- isinstance(val, AttributeValueWithCharsetSubstitution)
- and eventual_encoding is not None
- ):
- val = val.encode(eventual_encoding)
+ piece = element.output_ready(formatter)
- text = formatter.attribute_value(val)
- decoded = (
- str(key) + '='
- + formatter.quoted_attribute_value(text))
- attrs.append(decoded)
- close = ''
- closeTag = ''
+ # Now we need to apply the 'prettiness' -- extra
+ # whitespace before and/or after this tag. This can get
+ # complicated because certain tags, like
and + # for you +
some
+ for you
+
+Some text
-Some more text
-Another
-Bob -English
-English UK
-English US
-French
-