Kaydet (Commit) 4647e778 authored tarafından Mark Hung's avatar Mark Hung

tdf#81129 Support reading non-BMP characters in HTML documents.

1. Allow character entity ( &#nnnn; ) to exceed 0xffff in HTMLParser::ScanText()
2. Return a character as sal_uInt32 ( utf32 ) instead of sal_Unicode ( utf16 )
   from SvParser::GetNextChar().

Conflicts:
	sw/qa/extras/htmlexport/htmlexport.cxx

Change-Id: Ida455040970fae800f0f11471b27f53461fb78e4
Reviewed-on: https://gerrit.libreoffice.org/21152Tested-by: 's avatarJenkins <ci@libreoffice.org>
Reviewed-by: 's avatarMark Hung <marklh9@gmail.com>
üst 35966131
...@@ -222,6 +222,19 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF; ...@@ -222,6 +222,19 @@ sal_uInt32 const surrogatesLowLast = 0xDFFF;
} }
/// @endcond /// @endcond
/** Check if a codepoint is accessible via utf16 per RFC3629
@param code A non-BMP Unicode code point.
@return True if the code is a valid codepoint.
@since LibreOffice 5.2
*/
inline bool isValidCodePoint( sal_uInt32 code)
{
return code <= 0x10FFFF;
}
/** Check for high surrogate. /** Check for high surrogate.
@param code A Unicode code point. @param code A Unicode code point.
......
...@@ -59,7 +59,7 @@ protected: ...@@ -59,7 +59,7 @@ protected:
rtl_TextEncoding eSrcEnc; // Source encoding rtl_TextEncoding eSrcEnc; // Source encoding
sal_uLong nNextChPos; sal_uLong nNextChPos;
sal_Unicode nNextCh; // current character for the "lex" sal_uInt32 nNextCh; // current character codepoint in UTF32 for the "lex"
bool bDownloadingFile : 1; // true: An external file is bool bDownloadingFile : 1; // true: An external file is
...@@ -128,7 +128,7 @@ public: ...@@ -128,7 +128,7 @@ public:
inline void SetLineNr( sal_uLong nlNum ); // inline bottom inline void SetLineNr( sal_uLong nlNum ); // inline bottom
inline void SetLinePos( sal_uLong nlPos ); // inline bottom inline void SetLinePos( sal_uLong nlPos ); // inline bottom
sal_Unicode GetNextChar(); sal_uInt32 GetNextChar(); // Return next Unicode codepoint in UTF32.
void RereadLookahead(); void RereadLookahead();
inline bool IsParserWorking() const { return SVPAR_WORKING == eState; } inline bool IsParserWorking() const { return SVPAR_WORKING == eState; }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <tools/color.hxx> #include <tools/color.hxx>
#include <rtl/ustrbuf.hxx> #include <rtl/ustrbuf.hxx>
#include <rtl/strbuf.hxx> #include <rtl/strbuf.hxx>
#include <rtl/character.hxx>
#include <tools/tenccvt.hxx> #include <tools/tenccvt.hxx>
#include <tools/datetime.hxx> #include <tools/datetime.hxx>
...@@ -429,7 +430,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -429,7 +430,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
OUStringBuffer sTmpBuffer( MAX_LEN ); OUStringBuffer sTmpBuffer( MAX_LEN );
bool bContinue = true; bool bContinue = true;
bool bEqSignFound = false; bool bEqSignFound = false;
sal_Unicode cQuote = 0U; sal_uInt32 cQuote = 0U;
while( bContinue && IsParserWorking() ) while( bContinue && IsParserWorking() )
{ {
...@@ -445,7 +446,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -445,7 +446,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
sal_uLong nStreamPos = rInput.Tell(); sal_uLong nStreamPos = rInput.Tell();
sal_uLong nLinePos = GetLinePos(); sal_uLong nLinePos = GetLinePos();
sal_Unicode cChar = 0U; sal_uInt32 cChar = 0U;
if( '#' == (nNextCh = GetNextChar()) ) if( '#' == (nNextCh = GetNextChar()) )
{ {
nNextCh = GetNextChar(); nNextCh = GetNextChar();
...@@ -460,10 +461,10 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -460,10 +461,10 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
{ {
cChar = cChar * 16U + cChar = cChar * 16U +
( nNextCh <= '9' ( nNextCh <= '9'
? sal_Unicode( nNextCh - '0' ) ? sal_uInt32( nNextCh - '0' )
: ( nNextCh <= 'F' : ( nNextCh <= 'F'
? sal_Unicode( nNextCh - 'A' + 10 ) ? sal_uInt32( nNextCh - 'A' + 10 )
: sal_Unicode( nNextCh - 'a' + 10 ) ) ); : sal_uInt32( nNextCh - 'a' + 10 ) ) );
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
} }
...@@ -471,7 +472,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -471,7 +472,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
{ {
do do
{ {
cChar = cChar * 10U + sal_Unicode( nNextCh - '0'); cChar = cChar * 10U + sal_uInt32( nNextCh - '0');
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
while( HTML_ISDIGIT(nNextCh) ); while( HTML_ISDIGIT(nNextCh) );
...@@ -500,6 +501,9 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -500,6 +501,9 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
} }
else else
nNextCh = 0U; nNextCh = 0U;
if ( ! rtl::isValidCodePoint( cChar ) )
cChar = '?';
} }
else if( HTML_ISALPHA( nNextCh ) ) else if( HTML_ISALPHA( nNextCh ) )
{ {
...@@ -507,7 +511,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -507,7 +511,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
sal_Int32 nPos = 0L; sal_Int32 nPos = 0L;
do do
{ {
sEntityBuffer.append( nNextCh ); sEntityBuffer.appendUtf32( nNextCh );
nPos++; nPos++;
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
...@@ -637,7 +641,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -637,7 +641,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
if( IsParserWorking() ) if( IsParserWorking() )
{ {
if( cChar ) if( cChar )
sTmpBuffer.append( cChar ); sTmpBuffer.appendUtf32( cChar );
} }
else if( SVPAR_PENDING==eState && '>'!=cBreak ) else if( SVPAR_PENDING==eState && '>'!=cBreak )
{ {
...@@ -661,7 +665,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -661,7 +665,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
case '=': case '=':
if( '>'==cBreak && !cQuote ) if( '>'==cBreak && !cQuote )
bEqSignFound = true; bEqSignFound = true;
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
break; break;
case '\\': case '\\':
...@@ -684,7 +688,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -684,7 +688,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
else if( cQuote && (cQuote==nNextCh ) ) else if( cQuote && (cQuote==nNextCh ) )
cQuote = 0U; cQuote = 0U;
} }
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
bEqSignFound = false; bEqSignFound = false;
break; break;
...@@ -695,14 +699,15 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -695,14 +699,15 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
} }
else else
{ {
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
} }
break; break;
case '<': case '<':
bEqSignFound = false; bEqSignFound = false;
if( '>'==cBreak ) if( '>'==cBreak )
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
else else
bContinue = false; // break, String zusammen bContinue = false; // break, String zusammen
break; break;
...@@ -725,7 +730,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -725,7 +730,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
if( '>'==cBreak ) if( '>'==cBreak )
{ {
// cr/lf in tag is handled in _GetNextToken() // cr/lf in tag is handled in _GetNextToken()
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
break; break;
} }
else if( bReadListing || bReadXMP || bReadPRE || bReadTextArea ) else if( bReadListing || bReadXMP || bReadPRE || bReadTextArea )
...@@ -752,7 +757,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -752,7 +757,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
nNextCh = ' '; nNextCh = ' ';
// no break; // no break;
case ' ': case ' ':
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
if( '>'!=cBreak && (!bReadListing && !bReadXMP && if( '>'!=cBreak && (!bReadListing && !bReadXMP &&
!bReadPRE && !bReadTextArea) ) !bReadPRE && !bReadTextArea) )
{ {
...@@ -787,7 +792,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak ) ...@@ -787,7 +792,7 @@ int HTMLParser::ScanText( const sal_Unicode cBreak )
{ {
do { do {
// All remaining characters make their way into the text. // All remaining characters make their way into the text.
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
if( MAX_LEN == sTmpBuffer.getLength() ) if( MAX_LEN == sTmpBuffer.getLength() )
{ {
aToken += sTmpBuffer.makeStringAndClear(); aToken += sTmpBuffer.makeStringAndClear();
...@@ -864,7 +869,7 @@ int HTMLParser::_GetNextRawToken() ...@@ -864,7 +869,7 @@ int HTMLParser::_GetNextRawToken()
} }
else if( '!' == nNextCh ) else if( '!' == nNextCh )
{ {
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
...@@ -872,7 +877,7 @@ int HTMLParser::_GetNextRawToken() ...@@ -872,7 +877,7 @@ int HTMLParser::_GetNextRawToken()
while( (HTML_ISALPHA(nNextCh) || '-'==nNextCh) && while( (HTML_ISALPHA(nNextCh) || '-'==nNextCh) &&
IsParserWorking() && sTmpBuffer.getLength() < MAX_LEN ) IsParserWorking() && sTmpBuffer.getLength() < MAX_LEN )
{ {
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
...@@ -959,7 +964,7 @@ int HTMLParser::_GetNextRawToken() ...@@ -959,7 +964,7 @@ int HTMLParser::_GetNextRawToken()
} }
break; break;
case '-': case '-':
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
if( bReadComment ) if( bReadComment )
{ {
bool bTwoMinus = false; bool bTwoMinus = false;
...@@ -970,7 +975,7 @@ int HTMLParser::_GetNextRawToken() ...@@ -970,7 +975,7 @@ int HTMLParser::_GetNextRawToken()
if( MAX_LEN == sTmpBuffer.getLength() ) if( MAX_LEN == sTmpBuffer.getLength() )
aToken += sTmpBuffer.makeStringAndClear(); aToken += sTmpBuffer.makeStringAndClear();
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
...@@ -1015,7 +1020,7 @@ int HTMLParser::_GetNextRawToken() ...@@ -1015,7 +1020,7 @@ int HTMLParser::_GetNextRawToken()
// no break // no break
default: default:
// all remaining characters are appended to the buffer // all remaining characters are appended to the buffer
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
break; break;
} }
...@@ -1095,7 +1100,7 @@ int HTMLParser::_GetNextToken() ...@@ -1095,7 +1100,7 @@ int HTMLParser::_GetNextToken()
{ {
OUStringBuffer sTmpBuffer; OUStringBuffer sTmpBuffer;
do { do {
sTmpBuffer.append( nNextCh ); sTmpBuffer.appendUtf32( nNextCh );
if( MAX_LEN == sTmpBuffer.getLength() ) if( MAX_LEN == sTmpBuffer.getLength() )
aToken += sTmpBuffer.makeStringAndClear(); aToken += sTmpBuffer.makeStringAndClear();
nNextCh = GetNextChar(); nNextCh = GetNextChar();
...@@ -1166,10 +1171,10 @@ int HTMLParser::_GetNextToken() ...@@ -1166,10 +1171,10 @@ int HTMLParser::_GetNextToken()
} }
bDone = aToken.endsWith( "--" ); bDone = aToken.endsWith( "--" );
if( !bDone ) if( !bDone )
aToken += OUString(nNextCh); aToken += OUString(&nNextCh,1);
} }
else else
aToken += OUString(nNextCh); aToken += OUString(&nNextCh,1);
if( !bDone ) if( !bDone )
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
...@@ -1261,7 +1266,7 @@ int HTMLParser::_GetNextToken() ...@@ -1261,7 +1266,7 @@ int HTMLParser::_GetNextToken()
bDone = '>'==nNextCh && aToken.endsWith("%"); bDone = '>'==nNextCh && aToken.endsWith("%");
if( !bDone ) if( !bDone )
{ {
aToken += OUString(nNextCh); aToken += OUString(&nNextCh,1);
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
} }
......
...@@ -191,7 +191,7 @@ int SvRTFParser::_GetNextToken() ...@@ -191,7 +191,7 @@ int SvRTFParser::_GetNextToken()
// can be also \{, \}, \'88 // can be also \{, \}, \'88
for( sal_uInt8 m = 0; m < nUCharOverread; ++m ) for( sal_uInt8 m = 0; m < nUCharOverread; ++m )
{ {
sal_Unicode cAnsi = nNextCh; sal_uInt32 cAnsi = nNextCh;
while( 0xD == cAnsi ) while( 0xD == cAnsi )
cAnsi = GetNextChar(); cAnsi = GetNextChar();
while( 0xA == cAnsi ) while( 0xA == cAnsi )
...@@ -382,7 +382,7 @@ void SvRTFParser::ScanText( const sal_Unicode cBreak ) ...@@ -382,7 +382,7 @@ void SvRTFParser::ScanText( const sal_Unicode cBreak )
case '}': case '}':
case '{': case '{':
case '+': // I found in a RTF file case '+': // I found in a RTF file
aStrBuffer.append(nNextCh); aStrBuffer.append(sal_Unicode(nNextCh));
break; break;
case '~': // nonbreaking space case '~': // nonbreaking space
aStrBuffer.append(static_cast< sal_Unicode >(0xA0)); aStrBuffer.append(static_cast< sal_Unicode >(0xA0));
...@@ -484,7 +484,7 @@ void SvRTFParser::ScanText( const sal_Unicode cBreak ) ...@@ -484,7 +484,7 @@ void SvRTFParser::ScanText( const sal_Unicode cBreak )
{ {
do { do {
// all other characters end up in the text // all other characters end up in the text
aStrBuffer.append(nNextCh); aStrBuffer.appendUtf32(nNextCh);
if (sal_Unicode(EOF) == (nNextCh = GetNextChar())) if (sal_Unicode(EOF) == (nNextCh = GetNextChar()))
{ {
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <tools/debug.hxx> #include <tools/debug.hxx>
#include <rtl/textcvt.h> #include <rtl/textcvt.h>
#include <rtl/tencinfo.h> #include <rtl/tencinfo.h>
#include <rtl/character.hxx>
#include <vector> #include <vector>
...@@ -35,7 +36,7 @@ struct SvParser_Impl ...@@ -35,7 +36,7 @@ struct SvParser_Impl
long nTokenValue; // extra value (RTF) long nTokenValue; // extra value (RTF)
bool bTokenHasValue; // indicates whether nTokenValue is valid bool bTokenHasValue; // indicates whether nTokenValue is valid
int nToken; // actual Token int nToken; // actual Token
sal_Unicode nNextCh; // actual character sal_uInt32 nNextCh; // actual character
int nSaveToken; // the token from Continue int nSaveToken; // the token from Continue
rtl_TextToUnicodeConverter hConv; rtl_TextToUnicodeConverter hConv;
...@@ -148,9 +149,9 @@ void SvParser::RereadLookahead() ...@@ -148,9 +149,9 @@ void SvParser::RereadLookahead()
nNextCh = GetNextChar(); nNextCh = GetNextChar();
} }
sal_Unicode SvParser::GetNextChar() sal_uInt32 SvParser::GetNextChar()
{ {
sal_Unicode c = 0U; sal_uInt32 c = 0U;
// When reading multiple bytes, we don't have to care about the file // When reading multiple bytes, we don't have to care about the file
// position when we run into the pending state. The file position is // position when we run into the pending state. The file position is
...@@ -257,7 +258,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -257,7 +258,7 @@ sal_Unicode SvParser::GetNextChar()
) )
{ {
// no convserion shall take place // no convserion shall take place
c = (sal_Unicode)c1; c = reinterpret_cast<sal_uChar&>( c1 );
nChars = 1; nChars = 1;
} }
else else
...@@ -280,6 +281,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -280,6 +281,7 @@ sal_Unicode SvParser::GetNextChar()
// read enough characters. // read enough characters.
if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) ) if( pImplData->hContext != reinterpret_cast<rtl_TextToUnicodeContext>(1) )
{ {
sal_Unicode sCh[2];
while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 ) while( (nInfo&RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL) != 0 )
{ {
rInput.ReadChar( c1 ); rInput.ReadChar( c1 );
...@@ -289,7 +291,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -289,7 +291,7 @@ sal_Unicode SvParser::GetNextChar()
nChars = rtl_convertTextToUnicode( nChars = rtl_convertTextToUnicode(
pImplData->hConv, pImplData->hContext, pImplData->hConv, pImplData->hContext,
&c1, 1, &cUC, 1, &c1, 1, sCh , 2,
RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR|
RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR|
RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR, RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
...@@ -299,7 +301,11 @@ sal_Unicode SvParser::GetNextChar() ...@@ -299,7 +301,11 @@ sal_Unicode SvParser::GetNextChar()
{ {
if( 1 == nChars && 0 == nInfo ) if( 1 == nChars && 0 == nInfo )
{ {
c = cUC; c = sal_uInt32( sCh[0] );
}
else if( 2 == nChars && 0 == nInfo )
{
c = rtl::combineSurrogates( sCh[0], sCh[1] );
} }
else if( 0 != nChars || 0 != nInfo ) else if( 0 != nChars || 0 != nInfo )
{ {
...@@ -311,7 +317,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -311,7 +317,7 @@ sal_Unicode SvParser::GetNextChar()
"there is a converted character, but an error" ); "there is a converted character, but an error" );
// There are still errors, but nothing we can // There are still errors, but nothing we can
// do // do
c = (sal_Unicode)'?'; c = (sal_uInt32)'?';
nChars = 1; nChars = 1;
} }
} }
...@@ -356,7 +362,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -356,7 +362,7 @@ sal_Unicode SvParser::GetNextChar()
// There are still errors, so we use the first // There are still errors, so we use the first
// character and restart after that. // character and restart after that.
c = (sal_Unicode)sBuffer[0]; c = reinterpret_cast<sal_uChar&>( sBuffer[0] );
rInput.SeekRel( -(nLen-1) ); rInput.SeekRel( -(nLen-1) );
nChars = 1; nChars = 1;
} }
...@@ -378,7 +384,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -378,7 +384,7 @@ sal_Unicode SvParser::GetNextChar()
"there is no converted character and no error" ); "there is no converted character and no error" );
// #73398#: If the character could not be converted, // #73398#: If the character could not be converted,
// because a conversion is not available, do no conversion at all. // because a conversion is not available, do no conversion at all.
c = (sal_Unicode)c1; c = reinterpret_cast<sal_uChar&>( c1 );
nChars = 1; nChars = 1;
} }
...@@ -387,6 +393,10 @@ sal_Unicode SvParser::GetNextChar() ...@@ -387,6 +393,10 @@ sal_Unicode SvParser::GetNextChar()
} }
while( 0 == nChars && !bErr ); while( 0 == nChars && !bErr );
} }
if ( ! rtl::isValidCodePoint( c ) )
c = (sal_uInt32) '?' ;
if( bErr ) if( bErr )
{ {
if( ERRCODE_IO_PENDING == rInput.GetError() ) if( ERRCODE_IO_PENDING == rInput.GetError() )
...@@ -405,6 +415,7 @@ sal_Unicode SvParser::GetNextChar() ...@@ -405,6 +415,7 @@ sal_Unicode SvParser::GetNextChar()
} }
else else
IncLinePos(); IncLinePos();
return c; return c;
} }
......
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8"/>
</head>
<body>
<p>𤭢</p>
<p>&#x24b62;</p>
</body>
</html>
...@@ -272,6 +272,19 @@ DECLARE_HTMLEXPORT_TEST(testTdf83890, "tdf83890.odt") ...@@ -272,6 +272,19 @@ DECLARE_HTMLEXPORT_TEST(testTdf83890, "tdf83890.odt")
assertXPath(pDoc, "/html/body/ol[2]/ol", "start", "2"); assertXPath(pDoc, "/html/body/ol[2]/ol", "start", "2");
} }
DECLARE_HTMLEXPORT_TEST(testExtbChars, "extb.html")
{
sal_uInt32 nCh = 0x24b62;
OUString aExpected( &nCh, 1);
// Assert that UTF8 encoded non-BMP Unicode character is correct
uno::Reference<text::XTextRange> xTextRange1 = getRun(getParagraph(1), 1);
CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange1->getString());
// Assert that non-BMP Unicode in character entity format is correct
uno::Reference<text::XTextRange> xTextRange2 = getRun(getParagraph(2), 1);
CPPUNIT_ASSERT_EQUAL(aExpected, xTextRange2->getString());
}
CPPUNIT_PLUGIN_IMPLEMENT(); CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment