From 8d11af94f23382263611607e6cf0332243e49fc1 Mon Sep 17 00:00:00 2001 From: Tim Delaney Date: Sat, 11 Feb 2017 16:33:18 +1100 Subject: [PATCH] Use Perl-compatible regexes for RSS rules. Closes #6367. --HG-- branch : magao-dev --- src/base/rss/rssdownloadrule.cpp | 63 ++++++++++++++------------ src/base/rss/rssdownloadrule.h | 6 ++- src/base/utils/string.cpp | 11 +++++ src/base/utils/string.h | 2 + src/gui/rss/automatedrssdownloader.cpp | 47 +++++++++++++------ src/gui/rss/automatedrssdownloader.h | 3 +- 6 files changed, 86 insertions(+), 46 deletions(-) diff --git a/src/base/rss/rssdownloadrule.cpp b/src/base/rss/rssdownloadrule.cpp index eef217f0d..5f897a3d7 100644 --- a/src/base/rss/rssdownloadrule.cpp +++ b/src/base/rss/rssdownloadrule.cpp @@ -28,14 +28,16 @@ * Contact : chris@qbittorrent.org */ -#include #include #include +#include +#include #include #include #include "base/preferences.h" #include "base/utils/fs.h" +#include "base/utils/string.h" #include "rssfeed.h" #include "rssarticle.h" #include "rssdownloadrule.h" @@ -52,23 +54,23 @@ DownloadRule::DownloadRule() bool DownloadRule::matches(const QString &articleTitle, const QString &expression) const { - static QRegExp whitespace("\\s+"); + static QRegularExpression whitespace("\\s+"); if (expression.isEmpty()) { // A regex of the form "expr|" will always match, so do the same for wildcards return true; } else if (m_useRegex) { - QRegExp reg(expression, Qt::CaseInsensitive, QRegExp::RegExp); - return reg.indexIn(articleTitle) > -1; + QRegularExpression reg(expression, QRegularExpression::CaseInsensitiveOption); + return reg.match(articleTitle).hasMatch(); } else { // Only match if every wildcard token (separated by spaces) is present in the article name. // Order of wildcard tokens is unimportant (if order is important, they should have used *). foreach (const QString &wildcard, expression.split(whitespace, QString::SplitBehavior::SkipEmptyParts)) { - QRegExp reg(wildcard, Qt::CaseInsensitive, QRegExp::Wildcard); + QRegularExpression reg(Utils::String::wildcardToRegex(wildcard), QRegularExpression::CaseInsensitiveOption); - if (reg.indexIn(articleTitle) == -1) + if (!reg.match(articleTitle).hasMatch()) return false; } } @@ -124,14 +126,15 @@ bool DownloadRule::matches(const QString &articleTitle) const if (!m_episodeFilter.isEmpty()) { qDebug() << "Checking episode filter:" << m_episodeFilter; - QRegExp f("(^\\d{1,4})x(.*;$)"); - int pos = f.indexIn(m_episodeFilter); + QRegularExpression f("(^\\d{1,4})x(.*;$)"); + QRegularExpressionMatch matcher = f.match(m_episodeFilter); + bool matched = matcher.hasMatch(); - if (pos < 0) + if (!matched) return false; - QString s = f.cap(1); - QStringList eps = f.cap(2).split(";"); + QString s = matcher.captured(1); + QStringList eps = matcher.captured(2).split(";"); int sOurs = s.toInt(); foreach (QString ep, eps) { @@ -145,22 +148,24 @@ bool DownloadRule::matches(const QString &articleTitle) const if (ep.indexOf('-') != -1) { // Range detected QString partialPattern1 = "\\bs0?(\\d{1,4})[ -_\\.]?e(0?\\d{1,4})(?:\\D|\\b)"; QString partialPattern2 = "\\b(\\d{1,4})x(0?\\d{1,4})(?:\\D|\\b)"; - QRegExp reg(partialPattern1, Qt::CaseInsensitive); + QRegularExpression reg(partialPattern1, QRegularExpression::CaseInsensitiveOption); if (ep.endsWith('-')) { // Infinite range int epOurs = ep.left(ep.size() - 1).toInt(); // Extract partial match from article and compare as digits - pos = reg.indexIn(articleTitle); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); - if (pos == -1) { - reg = QRegExp(partialPattern2, Qt::CaseInsensitive); - pos = reg.indexIn(articleTitle); + if (!matched) { + reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); } - if (pos != -1) { - int sTheirs = reg.cap(1).toInt(); - int epTheirs = reg.cap(2).toInt(); + if (matched) { + int sTheirs = matcher.captured(1).toInt(); + int epTheirs = matcher.captured(2).toInt(); if (((sTheirs == sOurs) && (epTheirs >= epOurs)) || (sTheirs > sOurs)) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; @@ -178,16 +183,18 @@ bool DownloadRule::matches(const QString &articleTitle) const int epOursLast = range.last().toInt(); // Extract partial match from article and compare as digits - pos = reg.indexIn(articleTitle); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); - if (pos == -1) { - reg = QRegExp(partialPattern2, Qt::CaseInsensitive); - pos = reg.indexIn(articleTitle); + if (!matched) { + reg = QRegularExpression(partialPattern2, QRegularExpression::CaseInsensitiveOption); + matcher = reg.match(articleTitle); + matched = matcher.hasMatch(); } - if (pos != -1) { - int sTheirs = reg.cap(1).toInt(); - int epTheirs = reg.cap(2).toInt(); + if (matched) { + int sTheirs = matcher.captured(1).toInt(); + int epTheirs = matcher.captured(2).toInt(); if ((sTheirs == sOurs) && ((epOursFirst <= epTheirs) && (epOursLast >= epTheirs))) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; @@ -198,8 +205,8 @@ bool DownloadRule::matches(const QString &articleTitle) const } else { // Single number QString expStr("\\b(?:s0?" + s + "[ -_\\.]?" + "e0?" + ep + "|" + s + "x" + "0?" + ep + ")(?:\\D|\\b)"); - QRegExp reg(expStr, Qt::CaseInsensitive); - if (reg.indexIn(articleTitle) != -1) { + QRegularExpression reg(expStr, QRegularExpression::CaseInsensitiveOption); + if (reg.match(articleTitle).hasMatch()) { qDebug() << "Matched episode:" << ep; qDebug() << "Matched article:" << articleTitle; return true; diff --git a/src/base/rss/rssdownloadrule.h b/src/base/rss/rssdownloadrule.h index ac728f190..6b64943be 100644 --- a/src/base/rss/rssdownloadrule.h +++ b/src/base/rss/rssdownloadrule.h @@ -31,10 +31,12 @@ #ifndef RSSDOWNLOADRULE_H #define RSSDOWNLOADRULE_H -#include +#include #include #include -#include +#include + +class QRegularExpression; namespace Rss { diff --git a/src/base/utils/string.cpp b/src/base/utils/string.cpp index 6c713b10d..8a4a89bba 100644 --- a/src/base/utils/string.cpp +++ b/src/base/utils/string.cpp @@ -38,6 +38,7 @@ #ifdef QBT_USES_QT5 #include #endif +#include #ifdef Q_OS_MAC #include #endif @@ -238,3 +239,13 @@ QString Utils::String::toHtmlEscaped(const QString &str) return rich; #endif } + +// This is marked as internal in QRegExp.cpp, but is exported. The alternative would be to +// copy the code from QRegExp::wc2rx(). +QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax); + +QString Utils::String::wildcardToRegex(const QString &pattern) +{ + return qt_regexp_toCanonical(pattern, QRegExp::Wildcard); +} + diff --git a/src/base/utils/string.h b/src/base/utils/string.h index 52f4863ef..d1ab928ae 100644 --- a/src/base/utils/string.h +++ b/src/base/utils/string.h @@ -51,6 +51,8 @@ namespace Utils bool naturalCompareCaseSensitive(const QString &left, const QString &right); bool naturalCompareCaseInsensitive(const QString &left, const QString &right); + + QString wildcardToRegex(const QString &pattern); } } diff --git a/src/gui/rss/automatedrssdownloader.cpp b/src/gui/rss/automatedrssdownloader.cpp index 86cb3ac6c..f144c5cd8 100644 --- a/src/gui/rss/automatedrssdownloader.cpp +++ b/src/gui/rss/automatedrssdownloader.cpp @@ -28,14 +28,15 @@ * Contact : chris@qbittorrent.org */ -#include -#include -#include -#include #include +#include +#include +#include +#include +#include -#include "base/preferences.h" #include "base/bittorrent/session.h" +#include "base/preferences.h" #include "base/rss/rssdownloadrulelist.h" #include "base/rss/rssmanager.h" #include "base/rss/rssfolder.h" @@ -72,8 +73,8 @@ AutomatedRssDownloader::AutomatedRssDownloader(const QWeakPointer Q_ASSERT(ok); m_ruleList = manager.toStrongRef()->downloadRules(); m_editableRuleList = new Rss::DownloadRuleList; // Read rule list from disk - m_episodeRegex = new QRegExp("^(^\\d{1,4}x(\\d{1,4}(-(\\d{1,4})?)?;){1,}){1,1}", - Qt::CaseInsensitive); + m_episodeRegex = new QRegularExpression("^(^\\d{1,4}x(\\d{1,4}(-(\\d{1,4})?)?;){1,}){1,1}", + QRegularExpression::CaseInsensitiveOption); QString tip = "

" + tr("Matches articles based on episode filter.") + "

" + tr("Example: ") + "1x2;8-15;5;30-;" + tr(" will match 2, 5, 8 through 15, 30 and onward episodes of season one", "example X will match") + "

"; tip += "

" + tr("Episode filter rules: ") + "

  • " + tr("Season number is a mandatory non-zero value") + "
  • " @@ -671,7 +672,7 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) { QString tip; if (regex) { - tip = "

    " + tr("Regex mode: use Perl-like regular expressions") + "

    "; + tip = "

    " + tr("Regex mode: use Perl-compatible regular expressions") + "

    "; } else { tip = "

    " + tr("Wildcard mode: you can use") + "

      " @@ -697,17 +698,23 @@ void AutomatedRssDownloader::updateFieldsToolTips(bool regex) void AutomatedRssDownloader::updateMustLineValidity() { const QString text = ui->lineContains->text(); + bool isRegex = ui->checkRegex->isChecked(); bool valid = true; + QString error; if (!text.isEmpty()) { QStringList tokens; - if (ui->checkRegex->isChecked()) + if (isRegex) tokens << text; else - tokens << text.split("|"); + foreach (const QString &token, text.split("|")) + tokens << Utils::String::wildcardToRegex(token); + foreach (const QString &token, tokens) { - QRegExp reg(token, Qt::CaseInsensitive, ui->checkRegex->isChecked() ? QRegExp::RegExp : QRegExp::Wildcard); + QRegularExpression reg(token, QRegularExpression::CaseInsensitiveOption); if (!reg.isValid()) { + if (isRegex) + error = tr("Position %1: %2").arg(reg.patternErrorOffset()).arg(reg.errorString()); valid = false; break; } @@ -717,27 +724,35 @@ void AutomatedRssDownloader::updateMustLineValidity() if (valid) { ui->lineContains->setStyleSheet(""); ui->lbl_must_stat->setPixmap(QPixmap()); + ui->lbl_must_stat->setToolTip(QLatin1String("")); } else { ui->lineContains->setStyleSheet("QLineEdit { color: #ff0000; }"); ui->lbl_must_stat->setPixmap(GuiIconProvider::instance()->getIcon("task-attention").pixmap(16, 16)); + ui->lbl_must_stat->setToolTip(error); } } void AutomatedRssDownloader::updateMustNotLineValidity() { const QString text = ui->lineNotContains->text(); + bool isRegex = ui->checkRegex->isChecked(); bool valid = true; + QString error; if (!text.isEmpty()) { QStringList tokens; - if (ui->checkRegex->isChecked()) + if (isRegex) tokens << text; else - tokens << text.split("|"); + foreach (const QString &token, text.split("|")) + tokens << Utils::String::wildcardToRegex(token); + foreach (const QString &token, tokens) { - QRegExp reg(token, Qt::CaseInsensitive, ui->checkRegex->isChecked() ? QRegExp::RegExp : QRegExp::Wildcard); + QRegularExpression reg(token, QRegularExpression::CaseInsensitiveOption); if (!reg.isValid()) { + if (isRegex) + error = tr("Position %1: %2").arg(reg.patternErrorOffset()).arg(reg.errorString()); valid = false; break; } @@ -747,17 +762,19 @@ void AutomatedRssDownloader::updateMustNotLineValidity() if (valid) { ui->lineNotContains->setStyleSheet(""); ui->lbl_mustnot_stat->setPixmap(QPixmap()); + ui->lbl_mustnot_stat->setToolTip(QLatin1String("")); } else { ui->lineNotContains->setStyleSheet("QLineEdit { color: #ff0000; }"); ui->lbl_mustnot_stat->setPixmap(GuiIconProvider::instance()->getIcon("task-attention").pixmap(16, 16)); + ui->lbl_mustnot_stat->setToolTip(error); } } void AutomatedRssDownloader::updateEpisodeFilterValidity() { const QString text = ui->lineEFilter->text(); - bool valid = text.isEmpty() || m_episodeRegex->indexIn(text) != -1; + bool valid = text.isEmpty() || m_episodeRegex->match(text).hasMatch(); if (valid) { ui->lineEFilter->setStyleSheet(""); diff --git a/src/gui/rss/automatedrssdownloader.h b/src/gui/rss/automatedrssdownloader.h index 80df928c3..c6b9c0744 100644 --- a/src/gui/rss/automatedrssdownloader.h +++ b/src/gui/rss/automatedrssdownloader.h @@ -57,6 +57,7 @@ namespace Rss QT_BEGIN_NAMESPACE class QListWidgetItem; +class QRegularExpression; QT_END_NAMESPACE class AutomatedRssDownloader: public QDialog @@ -112,7 +113,7 @@ private: QListWidgetItem *m_editedRule; Rss::DownloadRuleList *m_ruleList; Rss::DownloadRuleList *m_editableRuleList; - QRegExp *m_episodeRegex; + QRegularExpression *m_episodeRegex; QShortcut *editHotkey; QShortcut *deleteHotkey; QSet> m_treeListEntries;