Kaydet (Commit) e47a02b1 authored tarafından Michael Stahl's avatar Michael Stahl

fdo#77979: sw: RTF export: write non-ASCII font names encoded

Currently font names like "微软雅黑" (Microsoft YaHei) are
written as "????" in the RTF export; to avoid that, set the \fcharset
of the font entry to something that at least is able to encode
the font name and alternate name.

This requires a new function since the existing
rtl_TextEncodingToWinCharset was changed in
b88fe998 to return "default" 0x01
(for OOXML) which is quite unhelpful for RTF.

This is not entirely satisfactory, as of course that is no guarantee
that the encoding can represent all of the actual text that has the
font applied; hence there are some \'3f in the fall-back encoded text
of the heading of the bugdoc, which indicates that the detected
Shift-JIS is insufficient and GB-2132 would be required; but it's not
obvious how to do better here without iterating over all the text
twice, and that still leaves the possibility that all text that has a
particular font applied cannot be represented by a single non-Unicode
encoding.

But since we always write text as the \u Unicode + legacy fall-back,
this should not be a big problem since modern RTF readers will simply
read the Unicode.

Change-Id: Ie6a42294c501d014dd9f0df82638519412ca19bb
üst 04d5a280
......@@ -317,6 +317,13 @@ DECLARE_RTFEXPORT_TEST(testMathRuns, "math-runs.rtf")
CPPUNIT_ASSERT_EQUAL(OUString("\\{ left [ right ] left ( right ) \\}"), getFormula(getRun(getParagraph(1), 1)));
}
DECLARE_RTFEXPORT_TEST(testFdo77979, "fdo77979.odt")
{
// font name is encoded with \fcharset of font
CPPUNIT_ASSERT_EQUAL(OUString("微软雅黑", 12, RTL_TEXTENCODING_UTF8),
getProperty<OUString>(getRun(getParagraph(1), 1), "CharFontName"));
}
DECLARE_RTFEXPORT_TEST(testFdo53113, "fdo53113.odt")
{
/*
......
......@@ -59,8 +59,7 @@ namespace sw
{
/** MSOffice appears to set the charset of unicode fonts to MS 932
Arial Unicode MS for example is a unicode font, but word sets
exported uses of it to the MS 932 charset
But we do "default", whatever that means.
@param eTextEncoding
the OOo encoding to convert from
......@@ -73,6 +72,15 @@ namespace sw
*/
sal_uInt8 rtl_TextEncodingToWinCharset(rtl_TextEncoding eTextEncoding);
/** MSOffice appears to set the charset of unicode fonts to MS 932
Arial Unicode MS for example is a unicode font, but word sets
exported uses of it to the MS 932 charset
*/
sal_uInt8 rtl_TextEncodingToWinCharsetRTF(OUString const& rFontName,
OUString const& rAltName, rtl_TextEncoding eTextEncoding);
/** Import a MSWord XE field. Suitable for .doc and .rtf
@param rDoc
......
......@@ -2073,7 +2073,12 @@ void RtfAttributeOutput::CharFont(const SvxFontItem& rFont)
m_aStylesEnd.append(OOO_STRING_SVTOOLS_RTF_LOCH);
m_aStylesEnd.append(OOO_STRING_SVTOOLS_RTF_F);
m_aStylesEnd.append((sal_Int32)m_rExport.maFontHelper.GetId(rFont));
m_rExport.eCurrentEncoding = rtl_getTextEncodingFromWindowsCharset(rtl_getBestWindowsCharsetFromTextEncoding(rFont.GetCharSet()));
// FIXME: this may be a tad expensive... but the charset needs to be
// consistent with what wwFont::WriteRtf() does
FontMapExport aTmp(rFont.GetFamilyName());
m_rExport.eCurrentEncoding = rtl_getTextEncodingFromWindowsCharset(
sw::ms::rtl_TextEncodingToWinCharsetRTF(
aTmp.msPrimary, aTmp.msSecondary, rFont.GetCharSet()));
if (m_rExport.eCurrentEncoding == RTL_TEXTENCODING_DONTKNOW)
m_rExport.eCurrentEncoding = m_rExport.eDefaultEncoding;
}
......@@ -3270,20 +3275,27 @@ MSWordExportBase& RtfAttributeOutput::GetExport()
/// Start the font.
void RtfAttributeOutput::StartFont(const OUString& rFamilyName) const
{
m_rExport.Strm().WriteCharPtr(OUStringToOString(rFamilyName, m_rExport.eCurrentEncoding).getStr());
// write the font name hex-encoded, but without Unicode - Word at least
// cannot read *both* Unicode and fallback as written by OutString
m_rExport.Strm().WriteCharPtr(
msfilter::rtfutil::OutString(rFamilyName, m_rExport.eCurrentEncoding, false).getStr());
}
/// End the font.
void RtfAttributeOutput::EndFont() const
{
m_rExport.Strm().WriteCharPtr(";}");
m_rExport.eCurrentEncoding = m_rExport.eDefaultEncoding;
}
/// Alternate name for the font.
void RtfAttributeOutput::FontAlternateName(const OUString& rName) const
{
m_rExport.Strm().WriteChar('{').WriteCharPtr(OOO_STRING_SVTOOLS_RTF_IGNORE).WriteCharPtr(OOO_STRING_SVTOOLS_RTF_FALT).WriteChar(' ');
m_rExport.Strm().WriteCharPtr(OUStringToOString(rName, m_rExport.eCurrentEncoding).getStr()).WriteChar('}');
// write the font name hex-encoded, but without Unicode - Word at least
// cannot read *both* Unicode and fallback as written by OutString
m_rExport.Strm().WriteCharPtr(
msfilter::rtfutil::OutString(rName, m_rExport.eCurrentEncoding, false).getStr()).WriteChar('}');
}
/// Font charset.
......@@ -3292,6 +3304,7 @@ void RtfAttributeOutput::FontCharset(sal_uInt8 nCharSet) const
m_rExport.Strm().WriteCharPtr(OOO_STRING_SVTOOLS_RTF_FCHARSET);
m_rExport.OutULong(nCharSet);
m_rExport.Strm().WriteChar(' ');
m_rExport.eCurrentEncoding =rtl_getTextEncodingFromWindowsCharset(nCharSet);
}
/// Font family.
......
......@@ -712,6 +712,55 @@ namespace sw
return nRet;
}
static bool
CanEncode(OUString const& rString, rtl_TextEncoding const eEncoding)
{
rtl::OString tmp;
return rString.convertToString(&tmp, eEncoding,
RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR |
RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR);
}
sal_uInt8 rtl_TextEncodingToWinCharsetRTF(
OUString const& rFontName, OUString const& rAltName,
rtl_TextEncoding eTextEncoding)
{
sal_uInt8 nRet =
rtl_getBestWindowsCharsetFromTextEncoding(eTextEncoding);
switch (eTextEncoding)
{
case RTL_TEXTENCODING_DONTKNOW:
case RTL_TEXTENCODING_UCS2:
case RTL_TEXTENCODING_UTF7:
case RTL_TEXTENCODING_UTF8:
case RTL_TEXTENCODING_JAVA_UTF8:
static struct { rtl_TextEncoding enc; sal_uInt8 charset; }
const s_fallbacks [] = {
{ RTL_TEXTENCODING_MS_932, 0x80 }, // Shift-JIS
{ RTL_TEXTENCODING_MS_936, 0x86 }, // GB-2312
{ RTL_TEXTENCODING_MS_950, 0x88 }, // Big5
{ RTL_TEXTENCODING_MS_949, 0x81 }, // EUC-KR
};
for (size_t i = 0; i < SAL_N_ELEMENTS(s_fallbacks); ++i)
{
// fall back to a charset that can at least encode
// the font's name
if (CanEncode(rFontName, s_fallbacks[i].enc)
&& CanEncode(rAltName, s_fallbacks[i].enc))
{
return s_fallbacks[i].charset;
}
}
SAL_INFO("sw.rtf", "no fallback charset found for font: "
<< rFontName << " " << rAltName);
nRet = 0x01; // all hope lost: "default", whatever that is
break;
default:
break;
}
return nRet;
}
long DateTime2DTTM( const DateTime& rDT )
{
/*
......
......@@ -856,7 +856,8 @@ void wwFont::WriteRtf( const RtfAttributeOutput* rAttrOutput ) const
{
rAttrOutput->FontFamilyType( meFamily, *this );
rAttrOutput->FontPitchType( mePitch );
rAttrOutput->FontCharset( rtl_getBestWindowsCharsetFromTextEncoding( meChrSet ) );
rAttrOutput->FontCharset(
sw::ms::rtl_TextEncodingToWinCharsetRTF(msFamilyNm, msAltNm, meChrSet));
rAttrOutput->StartFont( msFamilyNm );
if ( mbAlt )
rAttrOutput->FontAlternateName( msAltNm );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment