Kaydet (Commit) b297f7bb authored tarafından Jan Holesovsky's avatar Jan Holesovsky

tdf#88821: Set the encoding correctly for HTML files with a BOM.

BOM (Byte Order Mark) in the HTML file changed the underlying eSrcEnc
encoding, but did not actually update the rtl_TextToUnicodeConverter hConv.

Subsequent changes of eSrcEnc in SetSrcEncoding() (triggered by
'content="application/xhtml+xml; charset=UTF-8"' in the HTML file) were then
ignored (eSrcEnc was already set to UTF-8), and the parser was happily using the
old (Windows-1250) hConv.

Change-Id: If432d59891d51c6abe3517e325ed73057d0f8610
üst 5d9d0f3c
...@@ -87,6 +87,7 @@ public: ...@@ -87,6 +87,7 @@ public:
void testTdf91979(); void testTdf91979();
// void testTdf40110(); // void testTdf40110();
void testTdf98657(); void testTdf98657();
void testTdf88821();
CPPUNIT_TEST_SUITE(ScFiltersTest); CPPUNIT_TEST_SUITE(ScFiltersTest);
CPPUNIT_TEST(testTdf64229); CPPUNIT_TEST(testTdf64229);
...@@ -96,6 +97,7 @@ public: ...@@ -96,6 +97,7 @@ public:
CPPUNIT_TEST(testTdf91979); CPPUNIT_TEST(testTdf91979);
// CPPUNIT_TEST(testTdf40110); // CPPUNIT_TEST(testTdf40110);
CPPUNIT_TEST(testTdf98657); CPPUNIT_TEST(testTdf98657);
CPPUNIT_TEST(testTdf88821);
CPPUNIT_TEST_SUITE_END(); CPPUNIT_TEST_SUITE_END();
private: private:
uno::Reference<uno::XInterface> m_xCalcComponent; uno::Reference<uno::XInterface> m_xCalcComponent;
...@@ -243,6 +245,16 @@ void ScFiltersTest::testTdf98657() ...@@ -243,6 +245,16 @@ void ScFiltersTest::testTdf98657()
CPPUNIT_ASSERT_EQUAL(double(285.0), rDoc.GetValue(ScAddress(1, 1, 0))); CPPUNIT_ASSERT_EQUAL(double(285.0), rDoc.GetValue(ScAddress(1, 1, 0)));
} }
void ScFiltersTest::testTdf88821()
{
ScDocShellRef xDocSh = loadDoc("tdf88821.", FORMAT_HTML);
ScDocument& rDoc = xDocSh->GetDocument();
// B2 should be 'Périmètre', not 'Périmètre'
CPPUNIT_ASSERT_EQUAL(OStringToOUString("P\xC3\xA9rim\xC3\xA8tre", RTL_TEXTENCODING_UTF8), rDoc.GetString(1, 1, 0));
xDocSh->DoClose();
}
ScFiltersTest::ScFiltersTest() ScFiltersTest::ScFiltersTest()
: ScBootstrapFixture( "/sc/qa/unit/data" ) : ScBootstrapFixture( "/sc/qa/unit/data" )
......
<meta http-equiv="Content-type" content="application/xhtml+xml; charset=UTF-8" xmlns:myObj="urn:ms-kb" xmlns:myObjConvertBool="urn:ms-bool" xmlns:myObjConvertDecimal="urn:ms-dec" xmlns:myObjConvertText="urn:ms-text" />
<HTML xmlns:myObj="urn:ms-kb" xmlns:myObjConvertBool="urn:ms-bool" xmlns:myObjConvertDecimal="urn:ms-dec" xmlns:myObjConvertText="urn:ms-text">
<HEAD>
<STYLE>.HDR { background-color:bisque;font-weight:bold }</STYLE>
</HEAD>
<BODY>
<TABLE>
<COLGROUP WIDTH="150" ALIGN="LEFT" />
<COLGROUP WIDTH="150" ALIGN="LEFT" />
<TD CLASS="HDR" ALIGN="CENTER">
Code de la liste</TD>
<TD CLASS="HDR" ALIGN="CENTER">
Libellé de la liste</TD>
<TR>
<TD CLASS="TDR">
ACT_PERIMETRE</TD>
<TD CLASS="TDR">
Périmètre</TD>
</TR>
</TABLE>
</BODY>
</HTML>
...@@ -104,7 +104,6 @@ void SvParser::ClearTxtConvContext() ...@@ -104,7 +104,6 @@ void SvParser::ClearTxtConvContext()
void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc ) void SvParser::SetSrcEncoding( rtl_TextEncoding eEnc )
{ {
if( eEnc != eSrcEnc ) if( eEnc != eSrcEnc )
{ {
if( pImplData && pImplData->hConv ) if( pImplData && pImplData->hConv )
...@@ -172,13 +171,13 @@ sal_uInt32 SvParser::GetNextChar() ...@@ -172,13 +171,13 @@ sal_uInt32 SvParser::GetNextChar()
{ {
if( 0xfe == c1 && 0xff == c2 ) if( 0xfe == c1 && 0xff == c2 )
{ {
eSrcEnc = RTL_TEXTENCODING_UCS2; SetSrcEncoding(RTL_TEXTENCODING_UCS2);
bUCS2BSrcEnc = true; bUCS2BSrcEnc = true;
bSeekBack = false; bSeekBack = false;
} }
else if( 0xff == c1 && 0xfe == c2 ) else if( 0xff == c1 && 0xfe == c2 )
{ {
eSrcEnc = RTL_TEXTENCODING_UCS2; SetSrcEncoding(RTL_TEXTENCODING_UCS2);
bUCS2BSrcEnc = false; bUCS2BSrcEnc = false;
bSeekBack = false; bSeekBack = false;
} }
...@@ -198,7 +197,7 @@ sal_uInt32 SvParser::GetNextChar() ...@@ -198,7 +197,7 @@ sal_uInt32 SvParser::GetNextChar()
bErr = rInput.IsEof() || rInput.GetError(); bErr = rInput.IsEof() || rInput.GetError();
if( !bErr && ( 0xbf == c3 ) ) if( !bErr && ( 0xbf == c3 ) )
{ {
eSrcEnc = RTL_TEXTENCODING_UTF8; SetSrcEncoding(RTL_TEXTENCODING_UTF8);
bSeekBack = false; bSeekBack = false;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment