Kaydet (Commit) fed7a57f authored tarafından Eike Rathke's avatar Eike Rathke

more flexible language tag override mechanism

* allow overriding higher level lll-Ssss-CC with lower level lll-CC for
  known MS-LangID mappings that use tags with suppress-script
* internal override "canonicalization", e.g. ca-XV => ca-ES-valencia

Change-Id: I067d7515fb9144a896697617ad1b079e294f1ced
üst 986fa38e
...@@ -160,7 +160,7 @@ LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sa ...@@ -160,7 +160,7 @@ LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sa
LanguageType nOrigLang = nLang; LanguageType nOrigLang = nLang;
if (bResolveSystem || nLang == LANGUAGE_DONTKNOW) if (bResolveSystem || nLang == LANGUAGE_DONTKNOW)
nLang = MsLangId::getRealLanguage( nLang); nLang = MsLangId::getRealLanguage( nLang);
convertLanguageToLocaleImpl( nLang, aLocale); convertLanguageToLocaleImpl( nLang, aLocale, true);
if (bResolveSystem && aLocale.Language.isEmpty() && simplifySystemLanguages( nOrigLang) == LANGUAGE_SYSTEM) if (bResolveSystem && aLocale.Language.isEmpty() && simplifySystemLanguages( nOrigLang) == LANGUAGE_SYSTEM)
{ {
// None found but resolve requested, last resort is "en-US". // None found but resolve requested, last resort is "en-US".
...@@ -454,7 +454,7 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo ...@@ -454,7 +454,7 @@ LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang, bo
default: default:
break; // nothing break; // nothing
case LANGUAGE_OBSOLETE_USER_LATIN: case LANGUAGE_OBSOLETE_USER_LATIN:
nLang = LANGUAGE_LATIN; nLang = LANGUAGE_USER_LATIN_VATICAN;
break; break;
case LANGUAGE_OBSOLETE_USER_MAORI: case LANGUAGE_OBSOLETE_USER_MAORI:
nLang = LANGUAGE_MAORI_NEW_ZEALAND; nLang = LANGUAGE_MAORI_NEW_ZEALAND;
......
...@@ -924,7 +924,8 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const ...@@ -924,7 +924,8 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
{ {
if (pImpl->mnLangID != LANGUAGE_DONTKNOW) if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
{ {
// May have involved canonicalize(), so compare with pImpl->maBcp47! // May have involved canonicalize(), so compare with
// pImpl->maBcp47 instead of maBcp47!
aBcp47 = LanguageTagImpl::convertToBcp47( aBcp47 = LanguageTagImpl::convertToBcp47(
MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true)); MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
bInsert = (aBcp47 == pImpl->maBcp47); bInsert = (aBcp47 == pImpl->maBcp47);
...@@ -1156,6 +1157,19 @@ bool LanguageTagImpl::canonicalize() ...@@ -1156,6 +1157,19 @@ bool LanguageTagImpl::canonicalize()
meIsLiblangtagNeeded = DECISION_NO; // known fallback meIsLiblangtagNeeded = DECISION_NO; // known fallback
} }
} }
// We may have an internal override "canonicalization".
lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
if (!aNew.Language.isEmpty() &&
(aNew.Language != maLocale.Language ||
aNew.Country != maLocale.Country ||
aNew.Variant != maLocale.Variant))
{
maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
bChanged = true;
meIsIsoLocale = DECISION_DONTKNOW;
meIsIsoODF = DECISION_DONTKNOW;
meIsLiblangtagNeeded = DECISION_NO; // known locale
}
} }
if (bTemporaryLocale) if (bTemporaryLocale)
{ {
...@@ -1173,6 +1187,7 @@ bool LanguageTagImpl::canonicalize() ...@@ -1173,6 +1187,7 @@ bool LanguageTagImpl::canonicalize()
meIsValid = DECISION_YES; // really, known must be valid ... meIsValid = DECISION_YES; // really, known must be valid ...
return bChanged; // that's it return bChanged; // that's it
} }
meIsLiblangtagNeeded = DECISION_YES; meIsLiblangtagNeeded = DECISION_YES;
SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'"); SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
...@@ -1190,12 +1205,12 @@ bool LanguageTagImpl::canonicalize() ...@@ -1190,12 +1205,12 @@ bool LanguageTagImpl::canonicalize()
SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'"); SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
if (pTag) if (pTag)
{ {
OUString aOld( maBcp47); OUString aNew( OUString::createFromAscii( pTag));
maBcp47 = OUString::createFromAscii( pTag);
// Make the lt_tag_t follow the new string if different, which // Make the lt_tag_t follow the new string if different, which
// removes default script and such. // removes default script and such.
if (maBcp47 != aOld) if (maBcp47 != aNew)
{ {
maBcp47 = aNew;
bChanged = true; bChanged = true;
meIsIsoLocale = DECISION_DONTKNOW; meIsIsoLocale = DECISION_DONTKNOW;
meIsIsoODF = DECISION_DONTKNOW; meIsIsoODF = DECISION_DONTKNOW;
...@@ -2182,6 +2197,8 @@ LanguageTag & LanguageTag::makeFallback() ...@@ -2182,6 +2197,8 @@ LanguageTag & LanguageTag::makeFallback()
aTmp = aLanguage + "-" + aCountry + "-" + aVariants; aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
if (aTmp != maBcp47) if (aTmp != maBcp47)
aVec.push_back( aTmp); aVec.push_back( aTmp);
if (maBcp47 == "ca-ES-valencia")
aVec.push_back( "ca-XV");
// Language with variant but without country before language // Language with variant but without country before language
// without variant but with country. // without variant but with country.
// But only if variant is not from a grandfathered tag that // But only if variant is not from a grandfathered tag that
......
...@@ -84,22 +84,22 @@ ...@@ -84,22 +84,22 @@
<FormatCode>0,00%</FormatCode> <FormatCode>0,00%</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey1" default="true" type="short" usage="CURRENCY" formatindex="12"> <FormatElement msgid="CurrencyFormatskey1" default="true" type="short" usage="CURRENCY" formatindex="12">
<FormatCode>[$€-476] #.##0;-[$€-476] #.##0</FormatCode> <FormatCode>[$€-8076] #.##0;-[$€-8076] #.##0</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey2" default="false" type="medium" usage="CURRENCY" formatindex="13"> <FormatElement msgid="CurrencyFormatskey2" default="false" type="medium" usage="CURRENCY" formatindex="13">
<FormatCode>[$€-476] #.##0;-[$€-476] #.##0</FormatCode> <FormatCode>[$€-8076] #.##0;-[$€-8076] #.##0</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey3" default="true" type="medium" usage="CURRENCY" formatindex="14"> <FormatElement msgid="CurrencyFormatskey3" default="true" type="medium" usage="CURRENCY" formatindex="14">
<FormatCode>[$€-476] #.##0;[RED]-[$€-476] #.##0</FormatCode> <FormatCode>[$€-8076] #.##0;[RED]-[$€-8076] #.##0</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey4" default="false" type="medium" usage="CURRENCY" formatindex="15"> <FormatElement msgid="CurrencyFormatskey4" default="false" type="medium" usage="CURRENCY" formatindex="15">
<FormatCode>[$€-476] #.##0;[RED]-[$€-476] #.##0</FormatCode> <FormatCode>[$€-8076] #.##0;[RED]-[$€-8076] #.##0</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey5" default="false" type="medium" usage="CURRENCY" formatindex="16"> <FormatElement msgid="CurrencyFormatskey5" default="false" type="medium" usage="CURRENCY" formatindex="16">
<FormatCode>#.##0 CCC</FormatCode> <FormatCode>#.##0 CCC</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="CurrencyFormatskey6" default="false" type="medium" usage="CURRENCY" formatindex="17"> <FormatElement msgid="CurrencyFormatskey6" default="false" type="medium" usage="CURRENCY" formatindex="17">
<FormatCode>[$€-476] #.##0;[RED]-[$€-476] #.##0</FormatCode> <FormatCode>[$€-8076] #.##0;[RED]-[$€-8076] #.##0</FormatCode>
</FormatElement> </FormatElement>
<FormatElement msgid="DateFormatskey1" default="true" type="short" usage="DATE" formatindex="18"> <FormatElement msgid="DateFormatskey1" default="true" type="short" usage="DATE" formatindex="18">
<FormatCode>DD/MM/YY</FormatCode> <FormatCode>DD/MM/YY</FormatCode>
......
...@@ -160,7 +160,7 @@ typedef unsigned short LanguageType; ...@@ -160,7 +160,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_BULGARIAN 0x0402 #define LANGUAGE_BULGARIAN 0x0402
#define LANGUAGE_BURMESE 0x0455 #define LANGUAGE_BURMESE 0x0455
#define LANGUAGE_CATALAN 0x0403 #define LANGUAGE_CATALAN 0x0403
#define LANGUAGE_CATALAN_VALENCIAN 0x0803 /* obsoletes LANGUAGE_USER_CATALAN_VALENCIAN */ #define LANGUAGE_CATALAN_VALENCIAN 0x0803 /* obsoletes LANGUAGE_USER_CATALAN_VALENCIAN 0x8003 */
#define LANGUAGE_CHEROKEE_UNITED_STATES 0x045C #define LANGUAGE_CHEROKEE_UNITED_STATES 0x045C
#define LANGUAGE_CHEROKEE_CHEROKEE_LSO 0x7C5C #define LANGUAGE_CHEROKEE_CHEROKEE_LSO 0x7C5C
#define LANGUAGE_CHINESE_HONGKONG 0x0C04 #define LANGUAGE_CHINESE_HONGKONG 0x0C04
...@@ -276,18 +276,18 @@ typedef unsigned short LanguageType; ...@@ -276,18 +276,18 @@ typedef unsigned short LanguageType;
#define LANGUAGE_KIRGHIZ 0x0440 /* AKA Kyrgyz */ #define LANGUAGE_KIRGHIZ 0x0440 /* AKA Kyrgyz */
#define LANGUAGE_KONKANI 0x0457 #define LANGUAGE_KONKANI 0x0457
#define LANGUAGE_KOREAN 0x0412 #define LANGUAGE_KOREAN 0x0412
#define LANGUAGE_KOREAN_JOHAB 0x0812 #define LANGUAGE_KOREAN_JOHAB 0x0812 /* not mentioned in MS-LCID.pdf, oh joy */
#define LANGUAGE_KURDISH_ARABIC_IRAQ 0x0492 /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ */ #define LANGUAGE_KURDISH_ARABIC_IRAQ 0x0492 /* TODO: obsoletes LANGUAGE_USER_KURDISH_IRAQ 0x0E26 */
#define LANGUAGE_KURDISH_ARABIC_LSO 0x7C92 #define LANGUAGE_KURDISH_ARABIC_LSO 0x7C92
#define LANGUAGE_LAO 0x0454 #define LANGUAGE_LAO 0x0454
#define LANGUAGE_LATIN 0x0476 /* obsoletes LANGUAGE_USER_LATIN 0x0610 */ #define LANGUAGE_LATIN_LSO 0x0476 /* obsoletes LANGUAGE_USER_LATIN 0x0610 */
#define LANGUAGE_LATVIAN 0x0426 #define LANGUAGE_LATVIAN 0x0426
#define LANGUAGE_LITHUANIAN 0x0427 #define LANGUAGE_LITHUANIAN 0x0427
#define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */ #define LANGUAGE_LITHUANIAN_CLASSIC 0x0827 /* MS in its MS-LCID.pdf now says "Neither defined nor reserved" */
#define LANGUAGE_LU_CHINA 0x0490 #define LANGUAGE_LU_CHINA 0x0490
#define LANGUAGE_LUXEMBOURGISH_LUXEMBOURG 0x046E /* obsoletes LANGUAGE_USER_LUXEMBOURGISH 0x0630 */ #define LANGUAGE_LUXEMBOURGISH_LUXEMBOURG 0x046E /* obsoletes LANGUAGE_USER_LUXEMBOURGISH 0x0630 */
#define LANGUAGE_MACEDONIAN 0x042F #define LANGUAGE_MACEDONIAN 0x042F
#define LANGUAGE_MALAGASY_PLATEAU 0x048D /* obsoletes LANGUAGE_USER_MALAGASY_PLATEAU */ #define LANGUAGE_MALAGASY_PLATEAU 0x048D /* obsoletes LANGUAGE_USER_MALAGASY_PLATEAU 0x064F */
#define LANGUAGE_MALAYALAM 0x044C /* in India */ #define LANGUAGE_MALAYALAM 0x044C /* in India */
#define LANGUAGE_MALAY_BRUNEI_DARUSSALAM 0x083E #define LANGUAGE_MALAY_BRUNEI_DARUSSALAM 0x083E
#define LANGUAGE_MALAY_MALAYSIA 0x043E #define LANGUAGE_MALAY_MALAYSIA 0x043E
...@@ -417,7 +417,7 @@ typedef unsigned short LanguageType; ...@@ -417,7 +417,7 @@ typedef unsigned short LanguageType;
#define LANGUAGE_TIGRIGNA_ETHIOPIA 0x0473 #define LANGUAGE_TIGRIGNA_ETHIOPIA 0x0473
#define LANGUAGE_TSONGA 0x0431 #define LANGUAGE_TSONGA 0x0431
#define LANGUAGE_TSWANA 0x0432 /* AKA Setsuana, for South Africa */ #define LANGUAGE_TSWANA 0x0432 /* AKA Setsuana, for South Africa */
#define LANGUAGE_TSWANA_BOTSWANA 0x0832 /* obsoletes LANGUAGE_USER_TSWANA_BOTSWANA */ #define LANGUAGE_TSWANA_BOTSWANA 0x0832 /* obsoletes LANGUAGE_USER_TSWANA_BOTSWANA 0x8032 */
#define LANGUAGE_TURKISH 0x041F #define LANGUAGE_TURKISH 0x041F
#define LANGUAGE_TURKMEN 0x0442 #define LANGUAGE_TURKMEN 0x0442
#define LANGUAGE_UIGHUR_CHINA 0x0480 #define LANGUAGE_UIGHUR_CHINA 0x0480
...@@ -444,11 +444,11 @@ typedef unsigned short LanguageType; ...@@ -444,11 +444,11 @@ typedef unsigned short LanguageType;
#define LANGUAGE_qps_ploca 0x05FE /* 'qps-ploca', qps is a reserved for local use code */ #define LANGUAGE_qps_ploca 0x05FE /* 'qps-ploca', qps is a reserved for local use code */
#define LANGUAGE_qps_plocm 0x09FF /* 'qps-plocm', qps is a reserved for local use code */ #define LANGUAGE_qps_plocm 0x09FF /* 'qps-plocm', qps is a reserved for local use code */
#define LANGUAGE_ar_Ploc_SA__reserved 0x4401 /* 'ar-Ploc-SA', 'Ploc'?? */ #define LANGUAGE_ar_Ploc_SA__reserved 0x4401 /* 'ar-Ploc-SA', 'Ploc'?? */
#define LANGUAGE_ja_Ploc_JP__reserved 0x0811 /* 'ja-Ploc-JP', 'Ploc'?? */ #define LANGUAGE_ja_Ploc_JP__reserved 0x0811 /* 'ja-Ploc-JP', 'Ploc'?? */
#define LANGUAGE_pap_029__reserved 0x0479 /* 'pap-029' */ #define LANGUAGE_pap_029__reserved 0x0479 /* 'pap-029' */
#define LANGUAGE_ar_145__reserved 0x4801 /* 'ar-145' */ #define LANGUAGE_ar_145__reserved 0x4801 /* 'ar-145' */
#define LANGUAGE_es_419 0x580A /* 'es-419', not reserved, used? */ #define LANGUAGE_es_419 0x580A /* 'es-419', not reserved, used? */
/* Seems these values were used or reserved at one point of time ... */ /* Seems these values were used or reserved at one point of time ... */
#define LANGUAGE_Neither_defined_nor_reserved_0x007B 0x007B #define LANGUAGE_Neither_defined_nor_reserved_0x007B 0x007B
...@@ -512,7 +512,8 @@ typedef unsigned short LanguageType; ...@@ -512,7 +512,8 @@ typedef unsigned short LanguageType;
* mapping ISO back to LANGID will return the new value. * mapping ISO back to LANGID will return the new value.
*/ */
#define LANGUAGE_OBSOLETE_USER_LATIN 0x0610 #define LANGUAGE_OBSOLETE_USER_LATIN 0x0610
#define LANGUAGE_USER_LATIN LANGUAGE_LATIN #define LANGUAGE_USER_LATIN LANGUAGE_LATIN_LSO
#define LANGUAGE_USER_LATIN_VATICAN 0x8076 /* makeLangID( 0x20, getPrimaryLanguage( LANGUAGE_LATIN_LSO)) */
#define LANGUAGE_USER_ESPERANTO 0x0611 /* no locale possible */ #define LANGUAGE_USER_ESPERANTO 0x0611 /* no locale possible */
#define LANGUAGE_USER_INTERLINGUA 0x0612 /* no locale, but conventions */ #define LANGUAGE_USER_INTERLINGUA 0x0612 /* no locale, but conventions */
#define LANGUAGE_OBSOLETE_USER_MAORI 0x0620 #define LANGUAGE_OBSOLETE_USER_MAORI 0x0620
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <vector> #include <vector>
struct IsoLanguageCountryEntry; struct IsoLanguageCountryEntry;
struct IsoLanguageScriptCountryEntry;
/** Methods related to Microsoft language IDs. For details about MS-LANGIDs /** Methods related to Microsoft language IDs. For details about MS-LANGIDs
please see lang.h */ please see lang.h */
...@@ -226,6 +227,10 @@ public: ...@@ -226,6 +227,10 @@ public:
I18NLANGTAG_DLLPRIVATE static LanguageType convertPrivateUseToLanguage( I18NLANGTAG_DLLPRIVATE static LanguageType convertPrivateUseToLanguage(
const OUString& rPriv ); const OUString& rPriv );
/** Used by LanguageTag::canonicalize() */
I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale getOverride(
const ::com::sun::star::lang::Locale & rLocale );
/** Used by convertLocaleToLanguage(Locale) */ /** Used by convertLocaleToLanguage(Locale) */
I18NLANGTAG_DLLPRIVATE static LanguageType convertIsoNamesToLanguage( I18NLANGTAG_DLLPRIVATE static LanguageType convertIsoNamesToLanguage(
const OUString& rLang, const OUString& rCountry ); const OUString& rLang, const OUString& rCountry );
...@@ -240,6 +245,10 @@ public: ...@@ -240,6 +245,10 @@ public:
I18NLANGTAG_DLLPRIVATE static com::sun::star::lang::Locale getLocale( I18NLANGTAG_DLLPRIVATE static com::sun::star::lang::Locale getLocale(
const IsoLanguageCountryEntry * pEntry ); const IsoLanguageCountryEntry * pEntry );
/** Used by lookupFallbackLocale(Locale) */
I18NLANGTAG_DLLPRIVATE static com::sun::star::lang::Locale getLocale(
const IsoLanguageScriptCountryEntry * pEntry );
/** Convert a LanguageType to a Locale. /** Convert a LanguageType to a Locale.
...@@ -252,14 +261,21 @@ public: ...@@ -252,14 +261,21 @@ public:
LanguageType nLang, bool bResolveSystem ); LanguageType nLang, bool bResolveSystem );
/** Used by convertLanguageToLocale(LanguageType,bool) and /** Used by convertLanguageToLocale(LanguageType,bool) and
getLocale(IsoLanguageCountryEntry*) getLocale(IsoLanguageCountryEntry*) and
getLocale(IsoLanguageScriptCountryEntry)
@param bIgnoreOverride
If bIgnoreOverride==true, a matching entry is used even if
mnOverride is set, for conversion to an even outdated tag.
If bIgnoreOverride==false, a matching entry is skipped if
mnOverride is set and instead the override is followed.
@return rLocale set to mapped values, unchanged if no mapping was @return rLocale set to mapped values, unchanged if no mapping was
found. E.g. pass empty Locale to obtain empty SYSTEM locale found. E.g. pass empty Locale to obtain empty SYSTEM locale
for that case. for that case.
*/ */
I18NLANGTAG_DLLPRIVATE static void convertLanguageToLocaleImpl( I18NLANGTAG_DLLPRIVATE static void convertLanguageToLocaleImpl(
LanguageType nLang, ::com::sun::star::lang::Locale & rLocale ); LanguageType nLang, ::com::sun::star::lang::Locale & rLocale, bool bIgnoreOverride );
I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale lookupFallbackLocale( I18NLANGTAG_DLLPRIVATE static ::com::sun::star::lang::Locale lookupFallbackLocale(
......
...@@ -197,7 +197,7 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE ...@@ -197,7 +197,7 @@ StringArray STR_ARR_SVT_LANGUAGE_TABLE
< "Uzbek Latin" ; LANGUAGE_UZBEK_LATIN ; > ; < "Uzbek Latin" ; LANGUAGE_UZBEK_LATIN ; > ;
< "Uzbek Cyrillic" ; LANGUAGE_UZBEK_CYRILLIC ; > ; < "Uzbek Cyrillic" ; LANGUAGE_UZBEK_CYRILLIC ; > ;
< "Welsh" ; LANGUAGE_WELSH ; > ; < "Welsh" ; LANGUAGE_WELSH ; > ;
< "Latin" ; LANGUAGE_LATIN ; > ; < "Latin" ; LANGUAGE_USER_LATIN_VATICAN ; > ;
< "Esperanto" ; LANGUAGE_USER_ESPERANTO ; > ; < "Esperanto" ; LANGUAGE_USER_ESPERANTO ; > ;
< "Kinyarwanda (Rwanda)" ; LANGUAGE_USER_KINYARWANDA ; > ; < "Kinyarwanda (Rwanda)" ; LANGUAGE_USER_KINYARWANDA ; > ;
< "Maori" ; LANGUAGE_MAORI_NEW_ZEALAND ; > ; < "Maori" ; LANGUAGE_MAORI_NEW_ZEALAND ; > ;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment