Improve conversion between JS RegExp and QRegularExpression

Add support for InvertedGreedinessOption and MultilineOption.

Change-Id: I19dce6e356a7ec406640bb8858885cd576b4aa2f
Reviewed-by: Ulf Hermann <ulf.hermann@qt.io>
This commit is contained in:
Lars Knoll 2020-04-02 09:28:23 +02:00
parent 3156fd64cc
commit e1bc9db851
2 changed files with 110 additions and 44 deletions

View File

@ -84,6 +84,42 @@ void Heap::RegExpObject::init(QV4::RegExp *value)
o->initProperties();
}
static QString minimalPattern(const QString &pattern)
{
QString ecmaPattern;
int len = pattern.length();
ecmaPattern.reserve(len);
int i = 0;
const QChar *wc = pattern.unicode();
bool inBracket = false;
while (i < len) {
QChar c = wc[i++];
ecmaPattern += c;
switch (c.unicode()) {
case '?':
case '+':
case '*':
case '}':
if (!inBracket)
ecmaPattern += QLatin1Char('?');
break;
case '\\':
if (i < len)
ecmaPattern += wc[i++];
break;
case '[':
inBracket = true;
break;
case ']':
inBracket = false;
break;
default:
break;
}
}
return ecmaPattern;
}
// Converts a QRegExp to a JS RegExp.
// The conversion is not 100% exact since ECMA regexp and QRegExp
// have different semantics/flags, but we try to do our best.
@ -93,40 +129,8 @@ void Heap::RegExpObject::init(const QRegExp &re)
// Convert the pattern to a ECMAScript pattern.
QString pattern = QT_PREPEND_NAMESPACE(qt_regexp_toCanonical)(re.pattern(), re.patternSyntax());
if (re.isMinimal()) {
QString ecmaPattern;
int len = pattern.length();
ecmaPattern.reserve(len);
int i = 0;
const QChar *wc = pattern.unicode();
bool inBracket = false;
while (i < len) {
QChar c = wc[i++];
ecmaPattern += c;
switch (c.unicode()) {
case '?':
case '+':
case '*':
case '}':
if (!inBracket)
ecmaPattern += QLatin1Char('?');
break;
case '\\':
if (i < len)
ecmaPattern += wc[i++];
break;
case '[':
inBracket = true;
break;
case ']':
inBracket = false;
break;
default:
break;
}
}
pattern = ecmaPattern;
}
if (re.isMinimal())
pattern = minimalPattern(pattern);
Scope scope(internalClass->engine);
Scoped<QV4::RegExpObject> o(scope, this);
@ -148,10 +152,16 @@ void Heap::RegExpObject::init(const QRegularExpression &re)
Scope scope(internalClass->engine);
Scoped<QV4::RegExpObject> o(scope, this);
const uint flags = (re.patternOptions() & QRegularExpression::CaseInsensitiveOption)
? CompiledData::RegExp::RegExp_IgnoreCase
: CompiledData::RegExp::RegExp_NoFlags;
o->d()->value.set(scope.engine, QV4::RegExp::create(scope.engine, re.pattern(), flags));
QRegularExpression::PatternOptions options = re.patternOptions();
uint flags = (options & QRegularExpression::CaseInsensitiveOption)
? CompiledData::RegExp::RegExp_IgnoreCase
: CompiledData::RegExp::RegExp_NoFlags;
if (options & QRegularExpression::MultilineOption)
flags |= CompiledData::RegExp::RegExp_Multiline;
QString pattern = re.pattern();
if (options & QRegularExpression::InvertedGreedinessOption)
pattern = minimalPattern(pattern);
o->d()->value.set(scope.engine, QV4::RegExp::create(scope.engine, pattern, flags));
o->initProperties();
}
#endif
@ -178,11 +188,12 @@ QRegExp RegExpObject::toQRegExp() const
// have different semantics/flags, but we try to do our best.
QRegularExpression RegExpObject::toQRegularExpression() const
{
QRegularExpression::PatternOptions caseSensitivity
= (value()->flags & CompiledData::RegExp::RegExp_IgnoreCase)
? QRegularExpression::CaseInsensitiveOption
: QRegularExpression::NoPatternOption;
return QRegularExpression(*value()->pattern, caseSensitivity);
QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption;
if (value()->flags & CompiledData::RegExp::RegExp_IgnoreCase)
options |= QRegularExpression::CaseInsensitiveOption;
if (value()->flags & CompiledData::RegExp::RegExp_Multiline)
options |= QRegularExpression::MultilineOption;
return QRegularExpression(*value()->pattern, options);
}
#endif

View File

@ -142,6 +142,8 @@ private slots:
void qRegExpInport();
void qRegularExpressionImport_data();
void qRegularExpressionImport();
void qRegularExpressionExport_data();
void qRegularExpressionExport();
void dateRoundtripJSQtJS();
void dateRoundtripQtJSQt();
void dateConversionJSQt();
@ -3287,7 +3289,6 @@ void tst_QJSEngine::qRegularExpressionImport_data()
{
QTest::addColumn<QRegularExpression>("rx");
QTest::addColumn<QString>("string");
QTest::addColumn<QString>("matched");
QTest::newRow("normal") << QRegularExpression("(test|foo)") << "test _ foo _ test _ Foo";
QTest::newRow("normal2") << QRegularExpression("(Test|Foo)") << "test _ foo _ test _ Foo";
@ -3311,6 +3312,14 @@ void tst_QJSEngine::qRegularExpressionImport_data()
QTest::newRow(".+ minimal") << QRegularExpression("^.+$") << ".+";
QTest::newRow("[.?] minimal") << QRegularExpression("^[.?]$") << ".?";
QTest::newRow("[.+] minimal") << QRegularExpression("^[.+]$") << ".+";
QTest::newRow("aaa inverted greedyness") << QRegularExpression("a{2,5}", QRegularExpression::InvertedGreedinessOption) << "aAaAaaaaaAa";
QTest::newRow("inverted greedyness") << QRegularExpression(".*\\} [*8]", QRegularExpression::InvertedGreedinessOption) << "}?} ?} *";
QTest::newRow(".? inverted greedyness") << QRegularExpression(".?", QRegularExpression::InvertedGreedinessOption) << ".?";
QTest::newRow(".+ inverted greedyness") << QRegularExpression(".+", QRegularExpression::InvertedGreedinessOption) << ".+";
QTest::newRow("[.?] inverted greedyness") << QRegularExpression("[.?]", QRegularExpression::InvertedGreedinessOption) << ".?";
QTest::newRow("[.+] inverted greedyness") << QRegularExpression("[.+]", QRegularExpression::InvertedGreedinessOption) << ".+";
QTest::newRow("two lines") << QRegularExpression("^.*$") << "abc\ndef";
QTest::newRow("multiline") << QRegularExpression("^.*$", QRegularExpression::MultilineOption) << "abc\ndef";
}
void tst_QJSEngine::qRegularExpressionImport()
@ -3333,6 +3342,52 @@ void tst_QJSEngine::qRegularExpressionImport()
QCOMPARE(result.property(i).toString(), match.captured(i));
}
void tst_QJSEngine::qRegularExpressionExport_data()
{
QTest::addColumn<QString>("js");
QTest::addColumn<QRegularExpression>("regularexpression");
QTest::newRow("normal") << "/(test|foo)/" << QRegularExpression("(test|foo)");
QTest::newRow("normal2") << "/(Test|Foo)/" << QRegularExpression("(Test|Foo)");
QTest::newRow("case insensitive") << "/(test|foo)/i" << QRegularExpression("(test|foo)", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("case insensitive2") << "/(Test|Foo)/i" << QRegularExpression("(Test|Foo)", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("b(a*)(b*)") << "/b(a*)(b*)/i" << QRegularExpression("b(a*)(b*)", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("greedy") << "/a*(a*)/i" << QRegularExpression("a*(a*)", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("wildcard") << "/.*\\.txt/" << QRegularExpression(".*\\.txt");
QTest::newRow("wildcard 2") << "/a.b\\.txt/" << QRegularExpression("a.b\\.txt");
QTest::newRow("slash") << "/g\\/.*\\/s/i" << QRegularExpression("g\\/.*\\/s", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("slash2") << "/g \\/ .* \\/ s/i" << QRegularExpression("g \\/ .* \\/ s", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("fixed") << "/a\\*aa\\.a\\(ba\\)\\*a\\\\ba/i" << QRegularExpression("a\\*aa\\.a\\(ba\\)\\*a\\\\ba", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("fixed insensitive") << "/A\\*A/i" << QRegularExpression("A\\*A", QRegularExpression::CaseInsensitiveOption);
QTest::newRow("fixed sensitive") << "/A\\*A/" << QRegularExpression("A\\*A");
QTest::newRow("html") << "/<b>(.*)<\\/b>/" << QRegularExpression("<b>(.*)<\\/b>");
QTest::newRow("html minimal") << "/^<b>(.*)<\\/b>$/" << QRegularExpression("^<b>(.*)<\\/b>$");
QTest::newRow("aaa") << "/a{2,5}/" << QRegularExpression("a{2,5}");
QTest::newRow("aaa minimal") << "/^a{2,5}$/" << QRegularExpression("^a{2,5}$");
QTest::newRow("minimal") << "/^.*\\} [*8]$/" << QRegularExpression("^.*\\} [*8]$");
QTest::newRow(".? minimal") << "/^.?$/" << QRegularExpression("^.?$");
QTest::newRow(".+ minimal") << "/^.+$/" << QRegularExpression("^.+$");
QTest::newRow("[.?] minimal") << "/^[.?]$/" << QRegularExpression("^[.?]$");
QTest::newRow("[.+] minimal") << "/^[.+]$/" << QRegularExpression("^[.+]$");
QTest::newRow("multiline") << "/^.*$/m" << QRegularExpression("^.*$", QRegularExpression::MultilineOption);
}
void tst_QJSEngine::qRegularExpressionExport()
{
QFETCH(QString, js);
QFETCH(QRegularExpression, regularexpression);
QJSEngine eng;
QJSValue rexp;
rexp = eng.evaluate(js);
QCOMPARE(rexp.isRegExp(), true);
QCOMPARE(rexp.isCallable(), false);
QRegularExpression rx = qjsvalue_cast<QRegularExpression>(rexp);
QCOMPARE(rx, regularexpression);
}
// QScriptValue::toDateTime() returns a local time, whereas JS dates
// are always stored as UTC. Qt Script must respect the current time
// zone, and correctly adjust for daylight saving time that may be in