diff --git a/src/corelib/kernel/qtranslator.cpp b/src/corelib/kernel/qtranslator.cpp index 52ca3ffb4ee..4e02f2b589f 100644 --- a/src/corelib/kernel/qtranslator.cpp +++ b/src/corelib/kernel/qtranslator.cpp @@ -629,40 +629,37 @@ static QString find_translation(const QLocale & locale, // see http://www.unicode.org/reports/tr35/#LanguageMatching for inspiration - // For each language_country returned by locale.uiLanguages(), add - // also a lowercase version to the list. Since these languages are - // used to create file names, this is important on case-sensitive - // file systems, where otherwise a file called something like - // "prefix_en_us.qm" won't be found under the "en_US" locale. Note - // that the Qt resource system is always case-sensitive, even on - // Windows (in other words: this codepath is *not* UNIX-only). - QStringList languages = locale.uiLanguages(QLocale::TagSeparator::Underscore); + // For each name returned by locale.uiLanguages(), also try a lowercase + // version. Since these languages are used to create file names, this is + // important on case-sensitive file systems, where otherwise a file called + // something like "prefix_en_us.qm" won't be found under the "en_US" + // locale. Note that the Qt resource system is always case-sensitive, even + // on Windows (in other words: this codepath is *not* UNIX-only). + const QStringList languages = locale.uiLanguages(QLocale::TagSeparator::Underscore); qCDebug(lcTranslator) << "Requested UI languages" << languages; - for (qsizetype i = languages.size() - 1; i >= 0; --i) { - const QString &lang = languages.at(i); - QString lowerLang = lang.toLower(); - if (lang != lowerLang) - languages.insert(i + 1, lowerLang); - } - for (QString localeName : std::as_const(languages)) { - // try the complete locale name first and progressively truncate from - // the end until a matching language tag is found (with or without suffix) - for (;;) { - realname += localeName + suffixOrDotQM; + for (const QString &localeName : languages) { + QString loc = localeName; + // First try this given name, then in lower-case form (if different): + while (true) { + // First, try with suffix: + realname += loc + suffixOrDotQM; if (is_readable_file(realname)) return realname; - realname.truncate(realNameBaseSize + localeName.size()); + // Next, try without: + realname.truncate(realNameBaseSize + loc.size()); if (is_readable_file(realname)) return realname; - + // Reset realname: realname.truncate(realNameBaseSize); - qsizetype rightmost = localeName.lastIndexOf(u'_'); - if (rightmost <= 0) - break; // no truncations anymore, break - localeName.truncate(rightmost); + // Non-trivial while-loop condition: + if (loc != localeName) // loc was the lower-case form, we're done. + break; + loc = std::move(loc).toLower(); // Try lower-case next, + if (loc == localeName) // but only if different. + break; } } diff --git a/src/corelib/text/qlocale.cpp b/src/corelib/text/qlocale.cpp index 7cc0b26cba9..8c5e651ca36 100644 --- a/src/corelib/text/qlocale.cpp +++ b/src/corelib/text/qlocale.cpp @@ -4848,14 +4848,25 @@ QString QLocale::formattedDataSize(qint64 bytes, int precision, DataSizeFormats than dashes, to separate locale tags, pass QLocale::TagSeparator::Underscore as \a separator. - The returned list may contain entries for more than one language. - In particular, this happens for \l{QLocale::system()}{system locale} - when the user has configured the system to accept several languages - for user-interface translations. In such a case, the order of entries - for distinct languages is significant. For example, where a user has - configured a primarily German system to also accept English and Chinese, - in that order of preference, the returned list shall contain some - entries for German, then some for English, and finally some for Chinese. + Returns a list of locale names. This may include multiple languages, + especially for the system locale when multiple UI translation languages are + configured. The order of entries is significant. For example, for the system + locale, it reflects user preferences. + + Prior to Qt 6.9, the list only contained explicitly configured locales and + their equivalents. This led some callers to add truncations (such as from + 'en-Latn-DE' to 'en') as fallbacks. This could sometimes result in + inappropriate choices, especially if these were tried before later entries + that would be more appropriate fallbacks. + + Starting from Qt 6.9, reasonable truncations are included in the returned + list \e after the explicitly specified locales. This change allows for more + accurate fallback options without callers needing to do any truncation. + + Users can explicitly include preferred fallback locales (such as en-US) in + their system configuration to control the order of preference. You are + advised to rely on the order of entries in uiLanguages() rather than using + custom fallback methods. Most likely you do not need to use this function directly, but just pass the QLocale object to the QTranslator::load() function. @@ -4911,24 +4922,45 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const { localeIds.append(d->m_data->id()); } + + // Warning: this processing is quadratic in the length of the final list. + // Hopefully that list isn't too long, though. + QStringList fallbacks; + auto gatherTruncations = [&fallbacks, cut = QLatin1Char(sep)](const QString &name) { + fallbacks.removeAll(name); + for (qsizetype at = name.indexOf(cut); at >= 0; at = name.indexOf(cut, at + 1)) { + Q_ASSERT(at > 1); // First sub-tag length is >= 2 (C is handled separately). + // We find shorter entries before fuller ones; we want the long ones + // earlier, the short ones later, but all before any entries from + // later in the localeIds list (already present in fallbacks because + // of the reverse traversal). However, that can still leave a + // shorter truncation of a long name before some truncations of + // shorter names of which the shorter one is a prefix; that's + // handled in the final appending to uiLanguages; see exclude. + fallbacks.insert(0, name.first(at)); + } + }; + for (qsizetype i = localeIds.size(); i-- > 0; ) { QLocaleId id = localeIds.at(i); qsizetype j; - QByteArray prior; + if (id.language_id == C) { + if (!uiLanguages.contains(u"C"_s)) + uiLanguages.append(u"C"_s); + // Attempt no likely sub-tag amendments to C. + continue; + } + + const auto prior = QString::fromLatin1(id.name(sep)); if (isSystem && i < uiLanguages.size()) { // Adding likely-adjusted forms to system locale's list. - // Name the locale is derived from: - prior = uiLanguages.at(i).toLatin1(); // Insert just after the entry we're supplementing: + Q_ASSERT(uiLanguages.at(i) == prior); j = i + 1; - } else if (id.language_id == C) { - // Attempt no likely sub-tag amendments to C: - uiLanguages.append(QString::fromLatin1(id.name(sep))); - continue; } else { // Plain locale or empty system uiLanguages; just append. - prior = id.name(sep); - uiLanguages.append(QString::fromLatin1(prior)); + if (!uiLanguages.contains(prior)) + uiLanguages.append(prior); j = uiLanguages.size(); } @@ -4936,37 +4968,74 @@ QStringList QLocale::uiLanguages(TagSeparator separator) const const QLocaleId min = max.withLikelySubtagsRemoved(); // Include minimal version (last) unless it's what our locale is derived from: - if (auto name = min.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); - else if (!isSystem) - --j; // bcp47Name() matches min(): put more specific forms *before* it. + if (auto name = QString::fromLatin1(min.name(sep)); name != prior) { + uiLanguages.insert(j, name); + gatherTruncations(name); + } else if (!isSystem && min == id) { + --j; // Put more specific forms *before* minimal entry. + } if (id.script_id) { // Include scriptless version if likely-equivalent and distinct: id.script_id = 0; if (id != min && id.withLikelySubtagsAdded() == max) { - if (auto name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); + if (auto name = QString::fromLatin1(id.name(sep)); name != prior) { + uiLanguages.insert(j, name); + gatherTruncations(name); + } } } if (!id.territory_id) { Q_ASSERT(!min.territory_id); Q_ASSERT(!id.script_id); // because we just cleared it. - // Include version with territory if it likely-equivalent and distinct: + // Include version with territory if likely-equivalent and distinct: id.territory_id = max.territory_id; if (id != max && id.withLikelySubtagsAdded() == max) { - if (auto name = id.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); + if (auto name = QString::fromLatin1(id.name(sep)); name != prior) { + uiLanguages.insert(j, name); + gatherTruncations(name); + } } } + gatherTruncations(prior); // After trimmed forms, before max. // Include version with all likely sub-tags (first) if distinct from the rest: if (max != min && max != id) { - if (auto name = max.name(sep); name != prior) - uiLanguages.insert(j, QString::fromLatin1(name)); + if (auto name = QString::fromLatin1(max.name(sep)); name != prior) { + uiLanguages.insert(j, name); + gatherTruncations(name); + } } } + + auto exclude = [fallbacks, cut = QLatin1Char(sep)](QStringList::const_iterator entry, + const QStringList &uiLanguages) { + // If this entry in fallbacks reappears later, after one of which it is + // a prefix, that's not yet in uiLanguages, leave it for the later entry + // to take care of. + const QString &name = *entry; + // entry < constEnd(), so found < constEnd() + auto found = entry; + // Initial found < constEnd(), so found + 1 is at worst constEnd(), not beyond it. + while ((found = std::find(found + 1, fallbacks.constEnd(), name)) != fallbacks.constEnd()) { + // entry < returned found < constEnd() + // *found is a repeat of name + // entry < found, so found - 1 is at worst entry + const QString &prev = *(found - 1); + if (uiLanguages.contains(prev)) + continue; + if (prev.size() > name.size() && prev.startsWith(name) && prev[name.size()] == cut) + return true; + } + return false; + }; + for (auto it = fallbacks.constBegin(); it < fallbacks.constEnd(); ++it) { + const QString &name = *it; + if (uiLanguages.contains(name) || exclude(it, uiLanguages)) + continue; + uiLanguages.append(name); + } return uiLanguages; } diff --git a/tests/auto/corelib/kernel/qtranslator/tst_qtranslator.cpp b/tests/auto/corelib/kernel/qtranslator/tst_qtranslator.cpp index 3a60040b71c..3124ef3e418 100644 --- a/tests/auto/corelib/kernel/qtranslator/tst_qtranslator.cpp +++ b/tests/auto/corelib/kernel/qtranslator/tst_qtranslator.cpp @@ -181,6 +181,9 @@ void tst_QTranslator::loadLocale_data() QTest::addRow("System, mixed languages") << QLocale::system() << QStringList{"en-NO", "nb-NO", "de-DE", "zh-Hant-NO"}; + QTest::addRow("System, mixed dialects") + << QLocale::system() + << QStringList{"en-AU", "en-NZ", "de-DE", "en-GB"}; } void tst_QTranslator::loadLocale() @@ -254,16 +257,6 @@ void tst_QTranslator::loadLocale() // more general alternatives, or to languages with lower priority. for (const auto &filePath : files) { QVERIFY(tor.load(wantedLocale, "foo", "-", path, ".qm")); - // we search 'en_Latn_US/AU', 'en_Latn', and 'en', but never 'en_US/AU' - if (filePath.endsWith("en_US") || filePath.endsWith("en_US.qm")) { - QEXPECT_FAIL("US English", - "QTBUG-124898 - we search 'en_Latn_US', 'en_Latn', and 'en', but never 'en_US", - Continue); - } else if (filePath.endsWith("en_AU") || filePath.endsWith("en_AU.qm")) { - QEXPECT_FAIL("Australia", - "QTBUG-124898 - we search 'en_Latn_AU', 'en_Latn', and 'en', but never 'en_AU", - Continue); - } QCOMPARE(tor.filePath(), filePath); QVERIFY2(file.remove(filePath), qPrintable(file.errorString())); } diff --git a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp index 2757adbb936..6ec8fa9aecd 100644 --- a/tests/auto/corelib/text/qlocale/tst_qlocale.cpp +++ b/tests/auto/corelib/text/qlocale/tst_qlocale.cpp @@ -3685,32 +3685,36 @@ void tst_QLocale::uiLanguages_data() QTest::newRow("C") << QLocale::c() << QStringList{u"C"_s}; QTest::newRow("en_US") - << QLocale("en_US") << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en"_s}; - QTest::newRow("en_Latn_US") // Specifying the default script makes no difference - << QLocale("en_Latn_US") << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en"_s}; + << QLocale("en_US") << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en"_s, u"en-Latn"_s}; + QTest::newRow("en_Latn_US") + << QLocale("en_Latn_US") // Specifying the default script makes no difference + << QStringList{u"en-Latn-US"_s, u"en-US"_s, u"en"_s, u"en-Latn"_s}; + QTest::newRow("en_GB") - << QLocale("en_GB") << QStringList{u"en-Latn-GB"_s, u"en-GB"_s}; + << QLocale("en_GB") << QStringList{u"en-Latn-GB"_s, u"en-GB"_s, u"en-Latn"_s, u"en"_s}; QTest::newRow("en_Dsrt_US") - << QLocale("en_Dsrt_US") << QStringList{u"en-Dsrt-US"_s, u"en-Dsrt"_s}; + << QLocale("en_Dsrt_US") << QStringList{u"en-Dsrt-US"_s, u"en-Dsrt"_s, u"en"_s}; QTest::newRow("ru_RU") - << QLocale("ru_RU") << QStringList{u"ru-Cyrl-RU"_s, u"ru-RU"_s, u"ru"_s}; + << QLocale("ru_RU") << QStringList{u"ru-Cyrl-RU"_s, u"ru-RU"_s, u"ru"_s, u"ru-Cyrl"_s}; QTest::newRow("zh_Hant") - << QLocale("zh_Hant") << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s}; + << QLocale("zh_Hant") + << QStringList{u"zh-Hant-TW"_s, u"zh-TW"_s, u"zh-Hant"_s, u"zh"_s}; + QTest::newRow("zh_Hans_CN") << QLocale(QLocale::Chinese, QLocale::SimplifiedHanScript, QLocale::China) - << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh"_s}; + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh"_s, u"zh-Hans"_s}; // GB has no native Punjabi locales, so is eliminated by likely subtag rules: QTest::newRow("pa_IN") - << QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa"_s}; + << QLocale("pa_IN") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa"_s, u"pa-Guru"_s}; QTest::newRow("pa_GB") - << QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa"_s}; + << QLocale("pa_GB") << QStringList{u"pa-Guru-IN"_s, u"pa-IN"_s, u"pa"_s, u"pa-Guru"_s}; QTest::newRow("pa_PK") - << QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s}; + << QLocale("pa_PK") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s}; QTest::newRow("pa_Arab_GB") - << QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s}; + << QLocale("pa_Arab_GB") << QStringList{u"pa-Arab-PK"_s, u"pa-PK"_s, u"pa-Arab"_s, u"pa"_s}; // We presently map und (or any other unrecognized language) to C, ignoring // what a sub-tag lookup would surely find us. @@ -4120,71 +4124,70 @@ void tst_QLocale::mySystemLocale_data() QTest::addRow("catalan") << u"ca"_s << QLocale::Catalan - << QStringList{u"ca"_s, u"ca-Latn-ES"_s, u"ca-ES"_s}; + << QStringList{u"ca"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca-Latn"_s}; QTest::addRow("catalan-spain") << u"ca-ES"_s << QLocale::Catalan - << QStringList{u"ca-ES"_s, u"ca-Latn-ES"_s, u"ca"_s}; + << QStringList{u"ca-ES"_s, u"ca-Latn-ES"_s, u"ca"_s, u"ca-Latn"_s}; QTest::addRow("catalan-latin") << u"ca-Latn"_s << QLocale::Catalan << QStringList{u"ca-Latn"_s, u"ca-Latn-ES"_s, u"ca-ES"_s, u"ca"_s}; QTest::addRow("ukrainian") << u"uk"_s << QLocale::Ukrainian - << QStringList{u"uk"_s, u"uk-Cyrl-UA"_s, u"uk-UA"_s}; + << QStringList{u"uk"_s, u"uk-Cyrl-UA"_s, u"uk-UA"_s, u"uk-Cyrl"_s}; + QTest::addRow("english-germany") << u"en-DE"_s << QLocale::English // First two were missed out before fix to QTBUG-104930: << QStringList{u"en-DE"_s, u"en-Latn-DE"_s, u"en-GB"_s, u"en-Latn-GB"_s, - u"de-DE"_s, u"de-Latn-DE"_s, u"de"_s}; + u"de-DE"_s, u"de-Latn-DE"_s, u"de"_s, + // Fallbacks implied by those: + u"en-Latn"_s, u"en"_s, u"de-Latn"_s}; + QTest::addRow("german") << u"de"_s << QLocale::German - << QStringList{u"de"_s, u"de-Latn-DE"_s, u"de-DE"_s}; + << QStringList{u"de"_s, u"de-Latn-DE"_s, u"de-DE"_s, u"de-Latn"_s}; QTest::addRow("german-britain") << u"de-GB"_s << QLocale::German - << QStringList{u"de-GB"_s, u"de-Latn-GB"_s}; + << QStringList{u"de-GB"_s, u"de-Latn-GB"_s, u"de-Latn"_s, u"de"_s}; QTest::addRow("chinese-min") << u"zh"_s << QLocale::Chinese - << QStringList{u"zh"_s, u"zh-Hans-CN"_s, u"zh-CN"_s}; + << QStringList{u"zh"_s, u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh-Hans"_s}; QTest::addRow("chinese-full") << u"zh-Hans-CN"_s << QLocale::Chinese - << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh"_s}; + << QStringList{u"zh-Hans-CN"_s, u"zh-CN"_s, u"zh"_s, u"zh-Hans"_s}; // For C, it should preserve what the system gave us but only add "C", never anything more: - QTest::addRow("C") - << u"C"_s << QLocale::C << QStringList{u"C"_s}; - QTest::addRow("C-Latn") - << u"C-Latn"_s << QLocale::C - << QStringList{u"C-Latn"_s, u"C"_s}; - QTest::addRow("C-US") - << u"C-US"_s << QLocale::C - << QStringList{u"C-US"_s, u"C"_s}; + QTest::addRow("C") << u"C"_s << QLocale::C << QStringList{u"C"_s}; + QTest::addRow("C-Latn") << u"C-Latn"_s << QLocale::C << QStringList{u"C-Latn"_s, u"C"_s}; + QTest::addRow("C-US") << u"C-US"_s << QLocale::C << QStringList{u"C-US"_s, u"C"_s}; QTest::addRow("C-Latn-US") - << u"C-Latn-US"_s << QLocale::C - << QStringList{u"C-Latn-US"_s, u"C"_s}; - QTest::addRow("C-Hans") - << u"C-Hans"_s << QLocale::C - << QStringList{u"C-Hans"_s, u"C"_s}; - QTest::addRow("C-CN") - << u"C-CN"_s << QLocale::C - << QStringList{u"C-CN"_s, u"C"_s}; + << u"C-Latn-US"_s << QLocale::C << QStringList{u"C-Latn-US"_s, u"C"_s}; + QTest::addRow("C-Hans") << u"C-Hans"_s << QLocale::C << QStringList{u"C-Hans"_s, u"C"_s}; + QTest::addRow("C-CN") << u"C-CN"_s << QLocale::C << QStringList{u"C-CN"_s, u"C"_s}; QTest::addRow("C-Hans-CN") - << u"C-Hans-CN"_s << QLocale::C - << QStringList{u"C-Hans-CN"_s, u"C"_s}; + << u"C-Hans-CN"_s << QLocale::C << QStringList{u"C-Hans-CN"_s, u"C"_s}; QTest::newRow("en-Dsrt-GB") << u"en-Dsrt-GB"_s << QLocale::English - << QStringList{u"en-Dsrt-GB"_s, u"en-GB"_s, u"en-Latn-GB"_s}; + << QStringList{u"en-Dsrt-GB"_s, u"en-GB"_s, u"en-Latn-GB"_s, + // Fallbacks - plain "en" last, not between the others: + u"en-Dsrt"_s, u"en-Latn"_s, u"en"_s}; QTest::newRow("en-mixed") << u"en-FO"_s << QLocale::English << QStringList{u"en-FO"_s, u"en-Latn-FO"_s, u"en-DK"_s, u"en-Latn-DK"_s, u"en-GB"_s, u"en-Latn-GB"_s, u"fo-FO"_s, u"fo-Latn-FO"_s, u"fo"_s, - u"da-FO"_s, u"da-Latn-FO"_s, u"da-DK"_s, u"da-Latn-DK"_s, u"da"_s}; + u"da-FO"_s, u"da-Latn-FO"_s, u"da-DK"_s, u"da-Latn-DK"_s, u"da"_s, + // Fallbacks implied by those: + u"en-Latn"_s, u"en"_s, u"fo-Latn"_s, u"da-Latn"_s}; QTest::newRow("polylingual-CA") << u"de-CA"_s << QLocale::German << QStringList{u"de-CA"_s, u"de-Latn-CA"_s, u"en-CA"_s, u"en-Latn-CA"_s, u"fr-CA"_s, u"fr-Latn-CA"_s, u"de-AT"_s, u"de-Latn-AT"_s, - u"en-GB"_s, u"en-Latn-GB"_s, u"fr-FR"_s, u"fr-Latn-FR"_s, u"fr"_s}; + u"en-GB"_s, u"en-Latn-GB"_s, u"fr-FR"_s, u"fr-Latn-FR"_s, u"fr"_s, + // Fallbacks: + u"de-Latn"_s, u"de"_s, u"en-Latn"_s, u"en"_s, u"fr-Latn"_s}; QTest::newRow("und-US") << u"und-US"_s << QLocale::C