Kaydet (Commit) 16cd9748 authored tarafından Caolán McNamara's avatar Caolán McNamara

Resolves: fdo#40292 Tamil grapheme cluster rules

üst f0a5e147
...@@ -80,10 +80,10 @@ private: ...@@ -80,10 +80,10 @@ private:
uno::Reference<i18n::XBreakIterator> m_xBreak; uno::Reference<i18n::XBreakIterator> m_xBreak;
}; };
//See https://bugs.freedesktop.org/show_bug.cgi?id=31271 for motivation //See https://bugs.freedesktop.org/show_bug.cgi?id=31271
void TestBreakIterator::testLineBreaking() void TestBreakIterator::testLineBreaking()
{ {
::rtl::OUString aTest1(RTL_CONSTASCII_USTRINGPARAM("(some text here)")); ::rtl::OUString aTest(RTL_CONSTASCII_USTRINGPARAM("(some text here)"));
i18n::LineBreakHyphenationOptions aHyphOptions; i18n::LineBreakHyphenationOptions aHyphOptions;
i18n::LineBreakUserOptions aUserOptions; i18n::LineBreakUserOptions aUserOptions;
...@@ -94,18 +94,19 @@ void TestBreakIterator::testLineBreaking() ...@@ -94,18 +94,19 @@ void TestBreakIterator::testLineBreaking()
{ {
//Here we want the line break to leave text here) on the next line //Here we want the line break to leave text here) on the next line
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions); i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6); CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 6);
} }
{ {
//Here we want the line break to leave "here)" on the next line //Here we want the line break to leave "here)" on the next line
i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest1, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions); i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11); CPPUNIT_ASSERT_MESSAGE("Expected a break at the the start of the word", aResult.breakIndex == 11);
} }
} }
//See http://qa.openoffice.org/issues/show_bug.cgi?id=111152 for motivation //See http://qa.openoffice.org/issues/show_bug.cgi?id=111152
//See https://bugs.freedesktop.org/show_bug.cgi?id=40292
void TestBreakIterator::testGraphemeIteration() void TestBreakIterator::testGraphemeIteration()
{ {
lang::Locale aLocale; lang::Locale aLocale;
...@@ -114,46 +115,90 @@ void TestBreakIterator::testGraphemeIteration() ...@@ -114,46 +115,90 @@ void TestBreakIterator::testGraphemeIteration()
{ {
const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF }; const sal_Unicode BA_HALANT_LA[] = { 0x09AC, 0x09CD, 0x09AF };
::rtl::OUString aTest1(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA)); ::rtl::OUString aTest(BA_HALANT_LA, SAL_N_ELEMENTS(BA_HALANT_LA));
sal_Int32 nDone=0; sal_Int32 nDone=0;
sal_Int32 nPos; sal_Int32 nPos;
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale, nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA)); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(BA_HALANT_LA));
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale, nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(BA_HALANT_LA), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
} }
{ {
const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF }; const sal_Unicode HA_HALANT_NA_VOWELSIGNI[] = { 0x09B9, 0x09CD, 0x09A3, 0x09BF };
::rtl::OUString aTest1(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI)); ::rtl::OUString aTest(HA_HALANT_NA_VOWELSIGNI, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
sal_Int32 nDone=0; sal_Int32 nDone=0;
sal_Int32 nPos; sal_Int32 nPos;
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale, nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI)); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI));
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale, nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(HA_HALANT_NA_VOWELSIGNI), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
} }
{ {
const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF }; const sal_Unicode TA_HALANT_MA_HALANT_YA [] = { 0x09A4, 0x09CD, 0x09AE, 0x09CD, 0x09AF };
::rtl::OUString aTest1(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA)); ::rtl::OUString aTest(TA_HALANT_MA_HALANT_YA, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
sal_Int32 nDone=0; sal_Int32 nDone=0;
sal_Int32 nPos; sal_Int32 nPos;
nPos = m_xBreak->nextCharacters(aTest1, 0, aLocale, nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA)); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA));
nPos = m_xBreak->previousCharacters(aTest1, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale, nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(TA_HALANT_MA_HALANT_YA), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0); CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
} }
aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("ta"));
aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("IN"));
{
const sal_Unicode KA_VIRAMA_SSA[] = { 0x0B95, 0x0BCD, 0x0BB7 };
::rtl::OUString aTest(KA_VIRAMA_SSA, SAL_N_ELEMENTS(KA_VIRAMA_SSA));
sal_Int32 nDone=0;
sal_Int32 nPos = 0;
nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == SAL_N_ELEMENTS(KA_VIRAMA_SSA));
nPos = m_xBreak->previousCharacters(aTest, SAL_N_ELEMENTS(KA_VIRAMA_SSA), aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip full grapheme", nPos == 0);
}
{
const sal_Unicode CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI[] =
{ 0x0B9A, 0x0BBF, 0x0BA4, 0x0BCD, 0x0BA4, 0x0BBF, 0x0BB0, 0x0BC8 };
::rtl::OUString aTest(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI,
SAL_N_ELEMENTS(CA_VOWELSIGNI_TA_VIRAMA_TA_VOWELSIGNI_RA_VOWELSIGNAI));
sal_Int32 nDone=0;
sal_Int32 nPos=0;
for (sal_Int32 i = 0; i < 4; ++i)
{
sal_Int32 nOldPos = nPos;
nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos+2);
}
for (sal_Int32 i = 0; i < 4; ++i)
{
sal_Int32 nOldPos = nPos;
nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
CPPUNIT_ASSERT_MESSAGE("Should skip 2 units", nPos == nOldPos-2);
}
}
{ {
const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 }; const sal_Unicode ALEF_QAMATS [] = { 0x05D0, 0x05B8 };
::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS)); ::rtl::OUString aText(ALEF_QAMATS, SAL_N_ELEMENTS(ALEF_QAMATS));
......
...@@ -40,8 +40,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71]; ...@@ -40,8 +40,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
$OriyaSignVirama = \u0B4D; $OriyaSignVirama = \u0B4D;
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E]; $GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
$GurmukhiSignVirama = \u0A4D; $GurmukhiSignVirama = \u0A4D;
$TamilLetter = [\u0B85-\u0BB9]; $TamilKa = \u0B95;
$TamilSignVirama = \u0BCD; $TamilSignVirama = \u0BCD;
$TamilSsa = \u0BB7;
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61]; $TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
$TeluguSignVirama = \u0C4D; $TeluguSignVirama = \u0C4D;
...@@ -70,7 +71,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+; ...@@ -70,7 +71,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+; $MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+; $OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+; $GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
$TamilLetter ($TamilSignVirama $TamilLetter?)+; $TamilKa $TamilSignVirama $TamilSsa;
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+; $TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
$L ($L | $V | $LV | $LVT); $L ($L | $V | $LV | $LVT);
...@@ -95,7 +96,7 @@ $LF $CR; ...@@ -95,7 +96,7 @@ $LF $CR;
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter; ($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter; ($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter; ($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
($TamilLetter? $TamilSignVirama)+ $TamilLetter; $TamilSsa $TamilSignVirama $TamilKa;
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter; ($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
($L | $V | $LV | $LVT) $L; ($L | $V | $LV | $LVT) $L;
($V | $T) ($LV | $V); ($V | $T) ($LV | $V);
......
...@@ -32,8 +32,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71]; ...@@ -32,8 +32,9 @@ $OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
$OriyaSignVirama = \u0B4D; $OriyaSignVirama = \u0B4D;
$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E]; $GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
$GurmukhiSignVirama = \u0A4D; $GurmukhiSignVirama = \u0A4D;
$TamilLetter = [\u0B85-\u0BB9]; $TamilKa = \u0B95;
$TamilSignVirama = \u0BCD; $TamilSignVirama = \u0BCD;
$TamilSsa = \u0BB7;
$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61]; $TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
$TeluguSignVirama = \u0C4D; $TeluguSignVirama = \u0C4D;
...@@ -62,7 +63,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+; ...@@ -62,7 +63,7 @@ $KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+; $MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+; $OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+; $GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
$TamilLetter ($TamilSignVirama $TamilLetter?)+; $TamilKa $TamilSignVirama $TamilSsa;
$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+; $TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
$L ($L | $V | $LV | $LVT); $L ($L | $V | $LV | $LVT);
...@@ -86,7 +87,7 @@ $LF $CR; ...@@ -86,7 +87,7 @@ $LF $CR;
($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter; ($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter; ($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter; ($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
($TamilLetter? $TamilSignVirama)+ $TamilLetter; $TamilSsa $TamilSignVirama $TamilKa;
($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter; ($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
($L | $V | $LV | $LVT) $L; ($L | $V | $LV | $LVT) $L;
($V | $T) ($LV | $V); ($V | $T) ($LV | $V);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment