Kaydet (Commit) 6a736800 authored tarafından Stephan Bergmann's avatar Stephan Bergmann

Fix conversion of non-BMP chars

...which are apparently encoded as two consecutive \uXXXX\uXXXX escape sequences
representing a UTF-16 surrogate pair

Change-Id: Ic47a678dee5e28ab4dc43e115ae5c4efefb1db96
Reviewed-on: https://gerrit.libreoffice.org/67245
Tested-by: Jenkins
Reviewed-by: 's avatarStephan Bergmann <sbergman@redhat.com>
üst e9db8ece
...@@ -30,24 +30,24 @@ using namespace dbahsql; ...@@ -30,24 +30,24 @@ using namespace dbahsql;
namespace namespace
{ {
//Find ascii escaped unicode int getHexValue(sal_Unicode c)
sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
{ {
const OString sHexDigits = "0123456789abcdefABCDEF"; if (c >= '0' && c <= '9')
sal_Int32 nIndex = rSource.indexOf("\\u", nFrom);
if (nIndex == -1)
{ {
return -1; return c - '0';
} }
bool bIsUnicode = true; else if (c >= 'A' && c <= 'F')
for (short nDist = 2; nDist <= 5; ++nDist)
{ {
if (sHexDigits.indexOf(rSource[nIndex + nDist]) == -1) return c - 'A' + 10;
{ }
bIsUnicode = false; else if (c >= 'a' && c <= 'f')
} {
return c - 'a' + 10;
}
else
{
return -1;
} }
return bIsUnicode ? nIndex : -1;
} }
} // unnamed namespace } // unnamed namespace
...@@ -55,17 +55,38 @@ sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0) ...@@ -55,17 +55,38 @@ sal_Int32 lcl_IndexOfUnicode(const OString& rSource, const sal_Int32 nFrom = 0)
//Convert ascii escaped unicode to utf-8 //Convert ascii escaped unicode to utf-8
OUString utils::convertToUTF8(const OString& original) OUString utils::convertToUTF8(const OString& original)
{ {
OString sResult = original; OUString res = OStringToOUString(original, RTL_TEXTENCODING_UTF8);
sal_Int32 nIndex = lcl_IndexOfUnicode(sResult); for (sal_Int32 i = 0;;)
while (nIndex != -1 && nIndex < original.getLength())
{ {
const OString sHex = original.copy(nIndex + 2, 4); i = res.indexOf("\\u", i);
const sal_Unicode cDec = static_cast<sal_Unicode>(strtol(sHex.getStr(), nullptr, 16)); if (i == -1)
const OString sNewChar = OString(&cDec, 1, RTL_TEXTENCODING_UTF8); {
sResult = sResult.replaceAll("\\u" + sHex, sNewChar); break;
nIndex = lcl_IndexOfUnicode(original, nIndex + 1); }
i += 2;
if (res.getLength() - i >= 4)
{
bool escape = true;
sal_Unicode c = 0;
for (sal_Int32 j = 0; j != 4; ++j)
{
auto const n = getHexValue(res[i + j]);
if (n == -1)
{
escape = false;
break;
}
c = (c << 4) | n;
}
if (escape)
{
i -= 2;
res = res.replaceAt(i, 6, OUString(c));
++i;
}
}
} }
return OStringToOUString(sResult, RTL_TEXTENCODING_UTF8); return res;
} }
OUString utils::getTableNameFromStmt(const OUString& sSql) OUString utils::getTableNameFromStmt(const OUString& sSql)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment