From 41d7d672cecf78bd9d2c1f0628566c560974d79f Mon Sep 17 00:00:00 2001 From: Vladimir Golovnev Date: Thu, 26 Jun 2025 08:49:58 +0300 Subject: [PATCH] Optimize parsing of search results PR #22906. --- src/base/search/searchhandler.cpp | 34 ++++++++---------- src/base/search/searchhandler.h | 4 +-- src/base/utils/bytearray.cpp | 36 ++++++++++++++----- src/base/utils/bytearray.h | 6 ++-- test/testutilsbytearray.cpp | 60 +++++++++++++++++++++++-------- 5 files changed, 92 insertions(+), 48 deletions(-) diff --git a/src/base/search/searchhandler.cpp b/src/base/search/searchhandler.cpp index 8479a1a64..d1d9fb4b7 100644 --- a/src/base/search/searchhandler.cpp +++ b/src/base/search/searchhandler.cpp @@ -1,6 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. - * Copyright (C) 2015-2024 Vladimir Golovnev + * Copyright (C) 2015-2025 Vladimir Golovnev * Copyright (C) 2006 Christophe Dumez * * This program is free software; you can redistribute it and/or @@ -38,6 +38,7 @@ #include "base/global.h" #include "base/path.h" +#include "base/utils/bytearray.h" #include "base/utils/foreignapps.h" #include "base/utils/fs.h" #include "searchpluginmanager.h" @@ -139,28 +140,23 @@ void SearchHandler::processFinished(const int exitcode) // line to SearchResult calling parseSearchResult(). void SearchHandler::readSearchOutput() { - QByteArray output = m_searchProcess->readAllStandardOutput(); - output.replace('\r', ""); + const QByteArray output = m_searchResultLineTruncated + m_searchProcess->readAllStandardOutput(); + QList lines = Utils::ByteArray::splitToViews(output, "\n", Qt::KeepEmptyParts); - QList lines = output.split('\n'); - if (!m_searchResultLineTruncated.isEmpty()) - lines.prepend(m_searchResultLineTruncated + lines.takeFirst()); - m_searchResultLineTruncated = lines.takeLast().trimmed(); + m_searchResultLineTruncated = lines.takeLast().trimmed().toByteArray(); QList searchResultList; searchResultList.reserve(lines.size()); - for (const QByteArray &line : asConst(lines)) + for (const QByteArrayView &line : asConst(lines)) { - SearchResult searchResult; - if (parseSearchResult(QString::fromUtf8(line), searchResult)) - searchResultList << searchResult; + if (SearchResult searchResult; parseSearchResult(line, searchResult)) + searchResultList.append(std::move(searchResult)); } if (!searchResultList.isEmpty()) { - for (const SearchResult &result : searchResultList) - m_results.append(result); + m_results.append(searchResultList); emit newSearchResults(searchResultList); } } @@ -174,17 +170,17 @@ void SearchHandler::processFailed() // Parse one line of search results list // Line is in the following form: // file url | file name | file size | nb seeds | nb leechers | Search engine url -bool SearchHandler::parseSearchResult(const QStringView line, SearchResult &searchResult) +bool SearchHandler::parseSearchResult(const QByteArrayView line, SearchResult &searchResult) { - const QList parts = line.split(u'|'); + const QList parts = Utils::ByteArray::splitToViews(line, "|"); const int nbFields = parts.size(); if (nbFields <= PL_ENGINE_URL) return false; // Anything after ENGINE_URL is optional searchResult = SearchResult(); - searchResult.fileUrl = parts.at(PL_DL_LINK).trimmed().toString(); // download URL - searchResult.fileName = parts.at(PL_NAME).trimmed().toString(); // Name + searchResult.fileUrl = QString::fromUtf8(parts.at(PL_DL_LINK).trimmed()); // download URL + searchResult.fileName = QString::fromUtf8(parts.at(PL_NAME).trimmed()); // Name searchResult.fileSize = parts.at(PL_SIZE).trimmed().toLongLong(); // Size bool ok = false; @@ -197,11 +193,11 @@ bool SearchHandler::parseSearchResult(const QStringView line, SearchResult &sear if (!ok || (searchResult.nbLeechers < 0)) searchResult.nbLeechers = -1; - searchResult.siteUrl = parts.at(PL_ENGINE_URL).trimmed().toString(); // Search engine site URL + searchResult.siteUrl = QString::fromUtf8(parts.at(PL_ENGINE_URL).trimmed()); // Search engine site URL searchResult.engineName = m_manager->pluginNameBySiteURL(searchResult.siteUrl); // Search engine name if (nbFields > PL_DESC_LINK) - searchResult.descrLink = parts.at(PL_DESC_LINK).trimmed().toString(); // Description Link + searchResult.descrLink = QString::fromUtf8(parts.at(PL_DESC_LINK).trimmed()); // Description Link if (nbFields > PL_PUB_DATE) { diff --git a/src/base/search/searchhandler.h b/src/base/search/searchhandler.h index 40cf58754..fe6fe9d76 100644 --- a/src/base/search/searchhandler.h +++ b/src/base/search/searchhandler.h @@ -1,6 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. - * Copyright (C) 2015-2024 Vladimir Golovnev + * Copyright (C) 2015-2025 Vladimir Golovnev * Copyright (C) 2006 Christophe Dumez * * This program is free software; you can redistribute it and/or @@ -81,7 +81,7 @@ private: void readSearchOutput(); void processFailed(); void processFinished(int exitcode); - bool parseSearchResult(QStringView line, SearchResult &searchResult); + bool parseSearchResult(QByteArrayView line, SearchResult &searchResult); const QString m_pattern; const QString m_category; diff --git a/src/base/utils/bytearray.cpp b/src/base/utils/bytearray.cpp index 81e0a2627..d372f507f 100644 --- a/src/base/utils/bytearray.cpp +++ b/src/base/utils/bytearray.cpp @@ -1,6 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. - * Copyright (C) 2023 Vladimir Golovnev + * Copyright (C) 2023-2025 Vladimir Golovnev * Copyright (C) 2018 Mike Tzou (Chocobo1) * * This program is free software; you can redistribute it and/or @@ -34,16 +34,32 @@ #include #include -QList Utils::ByteArray::splitToViews(const QByteArrayView in, const QByteArrayView sep) +QList Utils::ByteArray::splitToViews(const QByteArrayView in, const QByteArrayView sep, const Qt::SplitBehavior behavior) { - if (in.isEmpty()) - return {}; - if (sep.isEmpty()) - return {in}; + if (behavior == Qt::SkipEmptyParts) + { + if (in.isEmpty()) + return {}; + + if (sep.isEmpty()) + return {in}; + } + else + { + if (in.isEmpty()) + { + if (sep.isEmpty()) + return {{}, {}}; + + return {{}}; + } + } const QByteArrayMatcher matcher {sep}; QList ret; - ret.reserve(1 + (in.size() / (sep.size() + 1))); + ret.reserve((behavior == Qt::SkipEmptyParts) + ? (1 + (in.size() / (sep.size() + 1))) + : (1 + (in.size() / sep.size()))); qsizetype head = 0; while (head < in.size()) { @@ -51,14 +67,16 @@ QList Utils::ByteArray::splitToViews(const QByteArrayView in, co if (end < 0) end = in.size(); - // omit empty parts const QByteArrayView part = in.sliced(head, (end - head)); - if (!part.isEmpty()) + if (!part.isEmpty() || (behavior == Qt::KeepEmptyParts)) ret += part; head = end + sep.size(); } + if ((behavior == Qt::KeepEmptyParts) && (head == in.size())) + ret.emplaceBack(); + return ret; } diff --git a/src/base/utils/bytearray.h b/src/base/utils/bytearray.h index 0f0d38fe7..03f0f06d0 100644 --- a/src/base/utils/bytearray.h +++ b/src/base/utils/bytearray.h @@ -1,6 +1,6 @@ /* * Bittorrent Client using Qt and libtorrent. - * Copyright (C) 2023 Vladimir Golovnev + * Copyright (C) 2023-2025 Vladimir Golovnev * Copyright (C) 2018 Mike Tzou (Chocobo1) * * This program is free software; you can redistribute it and/or @@ -37,8 +37,8 @@ class QByteArrayView; namespace Utils::ByteArray { - // Inspired by QStringView(in).split(sep, Qt::SkipEmptyParts) - QList splitToViews(QByteArrayView in, QByteArrayView sep); + // Inspired by QStringView(in).split(sep, behavior) + QList splitToViews(QByteArrayView in, QByteArrayView sep, Qt::SplitBehavior behavior = Qt::SkipEmptyParts); QByteArray asQByteArray(QByteArrayView view); QByteArray toBase32(const QByteArray &in); diff --git a/test/testutilsbytearray.cpp b/test/testutilsbytearray.cpp index 39cf04a6c..f00d43c09 100644 --- a/test/testutilsbytearray.cpp +++ b/test/testutilsbytearray.cpp @@ -47,7 +47,7 @@ private slots: { using BAViews = QList; - const auto check = [](const QByteArrayView in, const QByteArrayView sep, const BAViews expected) + const auto checkSkipEmptyParts = [](const QByteArrayView in, const QByteArrayView sep, const BAViews expected) { // verify it works QCOMPARE(Utils::ByteArray::splitToViews(in, sep), expected); @@ -56,26 +56,56 @@ private slots: using Latin1Views = QList; const Latin1Views reference = QLatin1StringView(in) - .tokenize(QLatin1StringView(sep), Qt::SkipEmptyParts).toContainer(); + .tokenize(QLatin1StringView(sep), Qt::SkipEmptyParts).toContainer(); Latin1Views expectedStrings; for (const auto &string : expected) expectedStrings.append(QLatin1StringView(string)); QCOMPARE(reference, expectedStrings); }; - check({}, {}, {}); - check({}, "/", {}); - check("/", "/", {}); - check("/a", "/", {"a"}); - check("/a/", "/", {"a"}); - check("/a/b", "/", (BAViews {"a", "b"})); - check("/a/b/", "/", (BAViews {"a", "b"})); - check("/a/b", "//", {"/a/b"}); - check("//a/b", "//", {"a/b"}); - check("//a//b", "//", (BAViews {"a", "b"})); - check("//a//b/", "//", (BAViews {"a", "b/"})); - check("//a//b//", "//", (BAViews {"a", "b"})); - check("///a//b//", "//", (BAViews {"/a", "b"})); + checkSkipEmptyParts({}, {}, {}); + checkSkipEmptyParts({}, "/", {}); + checkSkipEmptyParts("/", "/", {}); + checkSkipEmptyParts("/a", "/", {"a"}); + checkSkipEmptyParts("/a/", "/", {"a"}); + checkSkipEmptyParts("/a/b", "/", (BAViews {"a", "b"})); + checkSkipEmptyParts("/a/b/", "/", (BAViews {"a", "b"})); + checkSkipEmptyParts("/a/b", "//", {"/a/b"}); + checkSkipEmptyParts("//a/b", "//", {"a/b"}); + checkSkipEmptyParts("//a//b", "//", (BAViews {"a", "b"})); + checkSkipEmptyParts("//a//b/", "//", (BAViews {"a", "b/"})); + checkSkipEmptyParts("//a//b//", "//", (BAViews {"a", "b"})); + checkSkipEmptyParts("///a//b//", "//", (BAViews {"/a", "b"})); + + const auto checkKeepEmptyParts = [](const QByteArrayView in, const QByteArrayView sep, const BAViews expected) + { + // verify it works + QCOMPARE(Utils::ByteArray::splitToViews(in, sep, Qt::KeepEmptyParts), expected); + + // verify it has the same behavior as `split(Qt::KeepEmptyParts)` + using Latin1Views = QList; + + const Latin1Views reference = QLatin1StringView(in) + .tokenize(QLatin1StringView(sep), Qt::KeepEmptyParts).toContainer(); + Latin1Views expectedStrings; + for (const auto &string : expected) + expectedStrings.append(QLatin1StringView(string)); + QCOMPARE(reference, expectedStrings); + }; + + checkKeepEmptyParts({}, {}, {{}, {}}); + checkKeepEmptyParts({}, "/", {{}}); + checkKeepEmptyParts("/", "/", {"", ""}); + checkKeepEmptyParts("/a", "/", {"", "a"}); + checkKeepEmptyParts("/a/", "/", {"", "a", ""}); + checkKeepEmptyParts("/a/b", "/", (BAViews {"", "a", "b"})); + checkKeepEmptyParts("/a/b/", "/", (BAViews {"", "a", "b", ""})); + checkKeepEmptyParts("/a/b", "//", {"/a/b"}); + checkKeepEmptyParts("//a/b", "//", {"", "a/b"}); + checkKeepEmptyParts("//a//b", "//", (BAViews {"", "a", "b"})); + checkKeepEmptyParts("//a//b/", "//", (BAViews {"", "a", "b/"})); + checkKeepEmptyParts("//a//b//", "//", (BAViews {"", "a", "b", ""})); + checkKeepEmptyParts("///a//b//", "//", (BAViews {"", "/a", "b", ""})); } void testAsQByteArray() const