Improve parsing of HTTP headers

Parse HTTP headers using raw byte arrays instead of strings. This allows us to apply different encodings for different parts.

This change is backward compatible and should not affect any existing operation, so WebAPI version bump is not required.
This commit is contained in:
Hanabishi 2025-08-18 10:12:09 +00:00
commit cf7864369f
5 changed files with 77 additions and 27 deletions

View file

@ -31,12 +31,13 @@
#include "requestparser.h" #include "requestparser.h"
#include <algorithm> #include <algorithm>
#include <optional>
#include <utility> #include <utility>
#include <QByteArrayList>
#include <QByteArrayView> #include <QByteArrayView>
#include <QDebug> #include <QDebug>
#include <QRegularExpression> #include <QRegularExpression>
#include <QStringList>
#include <QUrl> #include <QUrl>
#include <QUrlQuery> #include <QUrlQuery>
@ -59,21 +60,19 @@ namespace
return in; return in;
} }
bool parseHeaderLine(const QStringView line, HeaderMap &out) std::optional<QStringPair> parseHeaderLine(const QByteArrayView line)
{ {
// [rfc7230] 3.2. Header Fields // [rfc7230] 3.2. Header Fields
const int i = line.indexOf(u':'); const int i = line.indexOf(u':');
if (i <= 0) if (i <= 0)
{ {
qWarning() << Q_FUNC_INFO << "invalid http header:" << line; qWarning() << Q_FUNC_INFO << "invalid http header:" << line;
return false; return std::nullopt;
} }
const QString name = line.first(i).trimmed().toString().toLower(); const QString name = QString::fromLatin1(line.first(i).trimmed()).toLower();
const QString value = line.sliced(i + 1).trimmed().toString(); const QString value = QString::fromLatin1(line.sliced(i + 1).trimmed());
out[name] = value; return {{name, value}};
return true;
} }
} }
@ -93,7 +92,7 @@ RequestParser::ParseResult RequestParser::doParse(const QByteArrayView data)
return {ParseStatus::Incomplete, Request(), 0}; return {ParseStatus::Incomplete, Request(), 0};
} }
const QString httpHeaders = QString::fromLatin1(data.constData(), headerEnd); const QByteArrayView httpHeaders = data.first(headerEnd);
if (!parseStartLines(httpHeaders)) if (!parseStartLines(httpHeaders))
{ {
qWarning() << Q_FUNC_INFO << "header parsing error"; qWarning() << Q_FUNC_INFO << "header parsing error";
@ -152,36 +151,40 @@ RequestParser::ParseResult RequestParser::doParse(const QByteArrayView data)
return {ParseStatus::BadMethod, m_request, 0}; return {ParseStatus::BadMethod, m_request, 0};
} }
bool RequestParser::parseStartLines(const QStringView data) bool RequestParser::parseStartLines(const QByteArrayView data)
{ {
// we don't handle malformed request which uses `LF` for newline // we don't handle malformed request which uses `LF` for newline
const QList<QStringView> lines = data.split(QString::fromLatin1(CRLF), Qt::SkipEmptyParts); const QList<QByteArrayView> lines = splitToViews(data, CRLF, Qt::SkipEmptyParts);
// [rfc7230] 3.2.2. Field Order // [rfc7230] 3.2.2. Field Order
QStringList requestLines; QByteArrayList requestLines;
for (const auto &line : lines) for (const auto &line : lines)
{ {
if (line.at(0).isSpace() && !requestLines.isEmpty()) if (QChar::fromLatin1(line.at(0)).isSpace() && !requestLines.isEmpty())
{ {
// continuation of previous line // continuation of previous line
requestLines.last() += line; requestLines.last() += line;
} }
else else
{ {
requestLines += line.toString(); requestLines += line.toByteArray();
} }
} }
if (requestLines.isEmpty()) if (requestLines.isEmpty())
return false; return false;
if (!parseRequestLine(requestLines[0])) if (!parseRequestLine(QString::fromLatin1(requestLines[0])))
return false; return false;
for (auto i = ++(requestLines.begin()); i != requestLines.end(); ++i) for (auto i = ++(requestLines.begin()); i != requestLines.end(); ++i)
{ {
if (!parseHeaderLine(*i, m_request.headers)) const std::optional<QStringPair> header = parseHeaderLine(*i);
if (!header.has_value())
return false; return false;
const auto [name, value] = header.value();
m_request.headers[name] = value;
} }
return true; return true;
@ -310,17 +313,23 @@ bool RequestParser::parseFormData(const QByteArrayView data)
return false; return false;
} }
const QString headers = QString::fromLatin1(data.first(eohPos)); const QByteArrayView headers = data.first(eohPos);
const QByteArrayView payload = viewWithoutEndingWith(data.sliced((eohPos + EOH.size())), CRLF); const QByteArrayView payload = viewWithoutEndingWith(data.sliced((eohPos + EOH.size())), CRLF);
HeaderMap headersMap; HeaderMap headersMap;
const QList<QStringView> headerLines = QStringView(headers).split(QString::fromLatin1(CRLF), Qt::SkipEmptyParts); const QList<QByteArrayView> headerLines = splitToViews(headers, CRLF, Qt::SkipEmptyParts);
for (const auto &line : headerLines) for (const auto &line : headerLines)
{ {
if (line.trimmed().startsWith(HEADER_CONTENT_DISPOSITION, Qt::CaseInsensitive)) const std::optional<QStringPair> header = parseHeaderLine(line);
if (!header.has_value())
return false;
const auto [name, value] = header.value();
if (name == HEADER_CONTENT_DISPOSITION)
{ {
// extract out filename & name // extract out filename & name
const QList<QStringView> directives = line.split(u';', Qt::SkipEmptyParts); const QList<QByteArrayView> directives = splitToViews(line, ";", Qt::SkipEmptyParts);
for (const auto &directive : directives) for (const auto &directive : directives)
{ {
@ -328,15 +337,14 @@ bool RequestParser::parseFormData(const QByteArrayView data)
if (idx < 0) if (idx < 0)
continue; continue;
const QString name = directive.first(idx).trimmed().toString().toLower(); const QString name = QString::fromLatin1(directive.first(idx).trimmed()).toLower();
const QString value = Utils::String::unquote(directive.sliced(idx + 1).trimmed()).toString(); const QString value = QString::fromLatin1(unquote(directive.sliced(idx + 1).trimmed()));
headersMap[name] = value; headersMap[name] = value;
} }
} }
else else
{ {
if (!parseHeaderLine(line, headersMap)) headersMap[name] = value;
return false;
} }
} }

View file

@ -61,7 +61,7 @@ namespace Http
RequestParser() = default; RequestParser() = default;
ParseResult doParse(QByteArrayView data); ParseResult doParse(QByteArrayView data);
bool parseStartLines(QStringView data); bool parseStartLines(QByteArrayView data);
bool parseRequestLine(const QString &line); bool parseRequestLine(const QString &line);
bool parsePostMessage(QByteArrayView data); bool parsePostMessage(QByteArrayView data);

View file

@ -31,7 +31,6 @@
#include <QByteArray> #include <QByteArray>
#include <QByteArrayMatcher> #include <QByteArrayMatcher>
#include <QByteArrayView>
#include <QList> #include <QList>
QList<QByteArrayView> Utils::ByteArray::splitToViews(const QByteArrayView in, const QByteArrayView sep, const Qt::SplitBehavior behavior) QList<QByteArrayView> Utils::ByteArray::splitToViews(const QByteArrayView in, const QByteArrayView sep, const Qt::SplitBehavior behavior)

View file

@ -31,9 +31,9 @@
#include <Qt> #include <Qt>
#include <QtContainerFwd> #include <QtContainerFwd>
#include <QByteArrayView>
class QByteArray; class QByteArray;
class QByteArrayView;
namespace Utils::ByteArray namespace Utils::ByteArray
{ {
@ -42,4 +42,19 @@ namespace Utils::ByteArray
QByteArray asQByteArray(QByteArrayView view); QByteArray asQByteArray(QByteArrayView view);
QByteArray toBase32(const QByteArray &in); QByteArray toBase32(const QByteArray &in);
template <typename T>
T unquote(const T &arr, const QByteArrayView quotes = "\"")
{
if (arr.length() < 2)
return arr;
for (const char quote : quotes)
{
if (arr.startsWith(quote) && arr.endsWith(quote))
return arr.sliced(1, (arr.length() - 2));
}
return arr;
}
} }

View file

@ -28,6 +28,7 @@
*/ */
#include <QByteArray> #include <QByteArray>
#include <QByteArrayView>
#include <QLatin1StringView> #include <QLatin1StringView>
#include <QObject> #include <QObject>
#include <QTest> #include <QTest>
@ -123,6 +124,33 @@ private slots:
QCOMPARE(Utils::ByteArray::toBase32("0000000000"), "GAYDAMBQGAYDAMBQ"); QCOMPARE(Utils::ByteArray::toBase32("0000000000"), "GAYDAMBQGAYDAMBQ");
QCOMPARE(Utils::ByteArray::toBase32("1"), "GE======"); QCOMPARE(Utils::ByteArray::toBase32("1"), "GE======");
} }
void testUnquote() const
{
const auto test = []<typename T>()
{
QCOMPARE(Utils::ByteArray::unquote<T>({}), {});
QCOMPARE(Utils::ByteArray::unquote<T>("abc"), "abc");
QCOMPARE(Utils::ByteArray::unquote<T>("\"abc\""), "abc");
QCOMPARE(Utils::ByteArray::unquote<T>("\"a b c\""), "a b c");
QCOMPARE(Utils::ByteArray::unquote<T>("\"abc"), "\"abc");
QCOMPARE(Utils::ByteArray::unquote<T>("abc\""), "abc\"");
QCOMPARE(Utils::ByteArray::unquote<T>(" \"abc\" "), " \"abc\" ");
QCOMPARE(Utils::ByteArray::unquote<T>("\"a\"bc\""), "a\"bc");
QCOMPARE(Utils::ByteArray::unquote<T>("'abc'", "'"), "abc");
QCOMPARE(Utils::ByteArray::unquote<T>("'abc'", "\"'"), "abc");
QCOMPARE(Utils::ByteArray::unquote<T>("\"'abc'\"", "\"'"), "'abc'");
QCOMPARE(Utils::ByteArray::unquote<T>("\"'abc'\"", "'\""), "'abc'");
QCOMPARE(Utils::ByteArray::unquote<T>("\"'abc'\"", "'"), "\"'abc'\"");
QCOMPARE(Utils::ByteArray::unquote<T>("\"abc'", "'"), "\"abc'");
QCOMPARE(Utils::ByteArray::unquote<T>("'abc\"", "'"), "'abc\"");
QCOMPARE(Utils::ByteArray::unquote<T>("\"\""), "");
QCOMPARE(Utils::ByteArray::unquote<T>("\""), "\"");
};
test.template operator()<QByteArray>();
test.template operator()<QByteArrayView>();
}
}; };
QTEST_APPLESS_MAIN(TestUtilsByteArray) QTEST_APPLESS_MAIN(TestUtilsByteArray)