Make url normalization closer to common browser behavior

Firefox, Chrome and various http libraries normalize /./ and /../ from
urls, but retain multiple adjacent slashes as is. Qt removes
duplicated slashes which makes it impossible to access some web
resources that rely on those.

Fixes: QTBUG-71973
Change-Id: Ie18ae6ad3264acb252fcd87a754726a8c546e5ec
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Joni Poikelin 2018-12-11 11:42:34 +02:00
parent bc997b856a
commit eaf4438b35
5 changed files with 106 additions and 23 deletions

View File

@ -2161,9 +2161,10 @@ bool QDir::match(const QString &filter, const QString &fileName)
This method is shared with QUrl, so it doesn't deal with QDir::separator(),
nor does it remove the trailing slash, if any.
*/
Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool allowUncPaths,
bool *ok = nullptr)
QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
{
const bool allowUncPaths = QDirPrivate::AllowUncPaths & flags;
const bool isRemote = QDirPrivate::RemotePath & flags;
const int len = name.length();
if (ok)
@ -2185,14 +2186,30 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
i -= prefixLength;
// replicate trailing slash (i > 0 checks for emptiness of input string p)
if (i > 0 && p[i] == '/') {
// except for remote paths because there can be /../ or /./ ending
if (i > 0 && p[i] == '/' && !isRemote) {
out[--used] = '/';
--i;
}
auto isDot = [](const ushort *p, int i) {
return i > 1 && p[i - 1] == '.' && p[i - 2] == '/';
};
auto isDotDot = [](const ushort *p, int i) {
return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/';
};
while (i >= 0) {
// remove trailing slashes
// copy trailing slashes for remote urls
if (p[i] == '/') {
if (isRemote && !up) {
if (isDot(p, i)) {
i -= 2;
continue;
}
out[--used] = p[i];
}
--i;
continue;
}
@ -2204,10 +2221,17 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
}
// detect up dir
if (i >= 1 && p[i] == '.' && p[i-1] == '.'
&& (i == 1 || (i >= 2 && p[i-2] == '/'))) {
if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) {
++up;
i -= 2;
i -= i >= 2 ? 3 : 2;
if (isRemote) {
// moving up should consider empty path segments too (/path//../ -> /path/)
while (i > 0 && up && p[i] == '/') {
--up;
--i;
}
}
continue;
}
@ -2217,7 +2241,27 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
// skip or copy
while (i >= 0) {
if (p[i] == '/') { // do not copy slashes
if (p[i] == '/') {
// copy all slashes as is for remote urls if they are not part of /./ or /../
if (isRemote && !up) {
while (i > 0 && p[i] == '/' && !isDotDot(p, i)) {
if (isDot(p, i)) {
i -= 2;
continue;
}
out[--used] = p[i];
--i;
}
// in case of /./, jump over
if (isDot(p, i))
i -= 2;
break;
}
--i;
break;
}
@ -2238,7 +2282,7 @@ Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, bool all
*ok = prefixLength == 0 || up == 0;
// add remaining '..'
while (up) {
while (up && !isRemote) {
if (used != len && out[used] != '/') // is not empty and there isn't already a '/'
out[--used] = '/';
out[--used] = '.';
@ -2284,7 +2328,7 @@ static QString qt_cleanPath(const QString &path, bool *ok)
if (dir_separator != QLatin1Char('/'))
name.replace(dir_separator, QLatin1Char('/'));
QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths, ok);
QString ret = qt_normalizePathSegments(name, OSSupportsUncPaths ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization, ok);
// Strip away last slash except for root directories
if (ret.length() > 1 && ret.endsWith(QLatin1Char('/'))) {

View File

@ -59,6 +59,14 @@ QT_BEGIN_NAMESPACE
class QDirPrivate : public QSharedData
{
public:
enum PathNormalization {
DefaultNormalization = 0x00,
AllowUncPaths = 0x01,
RemotePath = 0x02
};
Q_DECLARE_FLAGS(PathNormalizations, PathNormalization)
Q_FLAGS(PathNormalizations)
explicit QDirPrivate(const QString &path, const QStringList &nameFilters_ = QStringList(),
QDir::SortFlags sort_ = QDir::SortFlags(QDir::Name | QDir::IgnoreCase),
QDir::Filters filters_ = QDir::AllEntries);
@ -97,6 +105,10 @@ public:
mutable QFileSystemMetaData metaData;
};
Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations)
Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr);
QT_END_NAMESPACE
#endif

View File

@ -253,7 +253,8 @@
and contains no query or fragment, a local file path is returned.
\value StripTrailingSlash The trailing slash is removed from the path, if one is present.
\value NormalizePathSegments Modifies the path to remove redundant directory separators,
and to resolve "."s and ".."s (as far as possible).
and to resolve "."s and ".."s (as far as possible). For non-local paths, adjacent
slashes are preserved.
Note that the case folding rules in \l{RFC 3491}{Nameprep}, which QUrl
conforms to, require host names to always be converted to lower case,
@ -419,10 +420,9 @@
#endif
#include "private/qipaddress_p.h"
#include "qurlquery.h"
#include "private/qdir_p.h"
QT_BEGIN_NAMESPACE
extern QString qt_normalizePathSegments(const QString &name, bool allowUncPaths,
bool *ok = nullptr); // qdir.cpp
inline static bool isHex(char c)
{
@ -930,7 +930,7 @@ inline void QUrlPrivate::appendPath(QString &appendTo, QUrl::FormattingOptions o
{
QString thePath = path;
if (options & QUrl::NormalizePathSegments) {
thePath = qt_normalizePathSegments(path, false);
thePath = qt_normalizePathSegments(path, isLocalFile() ? QDirPrivate::DefaultNormalization : QDirPrivate::RemotePath);
}
QStringRef thePathRef(&thePath);

View File

@ -62,12 +62,7 @@
#endif
#ifdef QT_BUILD_INTERNAL
QT_BEGIN_NAMESPACE
extern Q_AUTOTEST_EXPORT QString
qt_normalizePathSegments(const QString &path, bool allowUncPaths, bool *ok = nullptr);
QT_END_NAMESPACE
#include "private/qdir_p.h"
#endif
static QByteArray msgDoesNotExist(const QString &name)
@ -1376,7 +1371,7 @@ void tst_QDir::normalizePathSegments()
QFETCH(QString, path);
QFETCH(UncHandling, uncHandling);
QFETCH(QString, expected);
QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc);
QString cleaned = qt_normalizePathSegments(path, uncHandling == HandleUnc ? QDirPrivate::AllowUncPaths : QDirPrivate::DefaultNormalization);
QCOMPARE(cleaned, expected);
if (path == expected)
QVERIFY2(path.isSharedWith(cleaned), "Strings are same but data is not shared");

View File

@ -182,6 +182,8 @@ private slots:
void matches();
void ipv6_zoneId_data();
void ipv6_zoneId();
void normalizeRemotePaths_data();
void normalizeRemotePaths();
private:
void testThreadingHelper();
@ -323,7 +325,7 @@ void tst_QUrl::comparison()
QUrl url3bis = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D/");
QUrl url3bisNoSlash = QUrl::fromEncoded("example://a/b/c/%7Bfoo%7D");
QUrl url4bis = QUrl::fromEncoded("example://a/.//b/../b/c//%7Bfoo%7D/");
QUrl url4bis = QUrl::fromEncoded("example://a/./b/../b/c/%7Bfoo%7D/");
QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments), url3bis);
QCOMPARE(url4bis.adjusted(QUrl::NormalizePathSegments | QUrl::StripTrailingSlash), url3bisNoSlash);
QVERIFY(url3bis.matches(url4bis, QUrl::NormalizePathSegments));
@ -335,7 +337,7 @@ void tst_QUrl::comparison()
QCOMPARE(url4EncodedDots.path(QUrl::FullyDecoded), QString("/.//b/..//b/c/"));
QCOMPARE(QString::fromLatin1(url4EncodedDots.toEncoded()), QString::fromLatin1("example://a/.//b/..%2F/b/c/"));
QCOMPARE(url4EncodedDots.toString(), QString("example://a/.//b/..%2F/b/c/"));
QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a/b/..%2F/b/c/"));
QCOMPARE(url4EncodedDots.adjusted(QUrl::NormalizePathSegments).toString(), QString("example://a//b/..%2F/b/c/"));
// 6.2.2.1 Make sure hexdecimal characters in percent encoding are
// treated case-insensitively
@ -4201,6 +4203,36 @@ void tst_QUrl::ipv6_zoneId()
QCOMPARE(url.toString(QUrl::FullyEncoded), "x://[" + encodedHost + "]");
}
void tst_QUrl::normalizeRemotePaths_data()
{
QTest::addColumn<QUrl>("url");
QTest::addColumn<QString>("expected");
QTest::newRow("dotdot-slashslash") << QUrl("http://qt-project.org/some/long/..//path") << "http://qt-project.org/some//path";
QTest::newRow("slashslash-dotdot") << QUrl("http://qt-project.org/some//../path") << "http://qt-project.org/some/path";
QTest::newRow("slashslash-dotdot2") << QUrl("http://qt-project.org/some//path/../") << "http://qt-project.org/some//";
QTest::newRow("dot-slash") << QUrl("http://qt-project.org/some/./path") << "http://qt-project.org/some/path";
QTest::newRow("slashslash-dot-slashslash") << QUrl("http://qt-project.org/some//.//path") << "http://qt-project.org/some///path";
QTest::newRow("dot-slashslash") << QUrl("http://qt-project.org/some/.//path") << "http://qt-project.org/some//path";
QTest::newRow("multiple-slashes") << QUrl("http://qt-project.org/some//path") << "http://qt-project.org/some//path";
QTest::newRow("multiple-slashes4") << QUrl("http://qt-project.org/some////path") << "http://qt-project.org/some////path";
QTest::newRow("slashes-at-end") << QUrl("http://qt-project.org/some//") << "http://qt-project.org/some//";
QTest::newRow("dot-dotdot") << QUrl("http://qt-project.org/path/./../") << "http://qt-project.org/";
QTest::newRow("slash-dot-slash-dot-slash") << QUrl("http://qt-project.org/path//.//.//") << "http://qt-project.org/path////";
QTest::newRow("dotdot") << QUrl("http://qt-project.org/../") << "http://qt-project.org/";
QTest::newRow("dotdot-dotdot") << QUrl("http://qt-project.org/path/../../") << "http://qt-project.org/";
QTest::newRow("dot-dotdot-tail") << QUrl("http://qt-project.org/stem/path/./../tail") << "http://qt-project.org/stem/tail";
QTest::newRow("slash-dotdot-slash-tail") << QUrl("http://qt-project.org/stem/path//..//tail") << "http://qt-project.org/stem/path//tail";
}
void tst_QUrl::normalizeRemotePaths()
{
QFETCH(QUrl, url);
QFETCH(QString, expected);
QCOMPARE(url.adjusted(QUrl::NormalizePathSegments).toString(), expected);
}
QTEST_MAIN(tst_QUrl)
#include "tst_qurl.moc"