Kaydet (Commit) 41c083a7 authored tarafından Caolán McNamara's avatar Caolán McNamara

expose an API to map from UScriptCode to i18n::ScriptType

üst c57ba947
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <breakiteratorImpl.hxx> #include <breakiteratorImpl.hxx>
#include <unicode/uchar.h> #include <unicode/uchar.h>
#include <i18nutil/unicode.hxx>
#include <rtl/ustrbuf.hxx> #include <rtl/ustrbuf.hxx>
using namespace ::com::sun::star::uno; using namespace ::com::sun::star::uno;
...@@ -445,64 +446,10 @@ sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/, ...@@ -445,64 +446,10 @@ sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
namespace namespace
{ {
//See unicode/uscript.h
static sal_Int16 scriptTypes[] =
{
ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
// 15
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
// 30
ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 45
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 60
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
// 75
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 90
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
// 105
ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
// 120
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 135
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX,
ScriptType::WEAK
};
# define scriptTypesCount SAL_N_ELEMENTS(scriptTypes)
sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar) sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
{ {
sal_Int16 nRet;
int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT); int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
if (script < 0) return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
nRet = ScriptType::WEAK;
else if (static_cast<size_t>(script) >= SAL_N_ELEMENTS(scriptTypes))
nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild
else
nRet = scriptTypes[script];
return nRet;
} }
struct UBlock2Script struct UBlock2Script
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <com/sun/star/i18n/UnicodeScript.hpp> #include <com/sun/star/i18n/UnicodeScript.hpp>
#include <sal/types.h> #include <sal/types.h>
#include <unicode/uscript.h>
#include "i18nutildllapi.h" #include "i18nutildllapi.h"
typedef struct _ScriptTypeList { typedef struct _ScriptTypeList {
...@@ -61,6 +62,9 @@ public: ...@@ -61,6 +62,9 @@ public:
static sal_Bool SAL_CALL isWhiteSpace( const sal_Unicode ch); static sal_Bool SAL_CALL isWhiteSpace( const sal_Unicode ch);
static sal_Bool SAL_CALL isAlphaDigit( const sal_Unicode ch); static sal_Bool SAL_CALL isAlphaDigit( const sal_Unicode ch);
static sal_Bool SAL_CALL isPunctuation( const sal_Unicode ch); static sal_Bool SAL_CALL isPunctuation( const sal_Unicode ch);
//Map an ISO 15924 script code to Latin/Asian/Complex/Weak
static sal_Int16 SAL_CALL getScriptClassFromUScriptCode(UScriptCode eScript);
}; };
#endif #endif
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <com/sun/star/i18n/UnicodeType.hpp> #include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/KCharacterType.hpp> #include <com/sun/star/i18n/KCharacterType.hpp>
#include <com/sun/star/i18n/ScriptType.hpp>
#include <i18nutil/unicode.hxx> #include <i18nutil/unicode.hxx>
#include "unicode_data.h" #include "unicode_data.h"
...@@ -497,5 +498,61 @@ sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch ) ...@@ -497,5 +498,61 @@ sal_Int32 SAL_CALL unicode::getCharType( const sal_Unicode ch )
} }
} }
sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
{
//See unicode/uscript.h
static sal_Int16 scriptTypes[] =
{
ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
// 15
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
// 30
ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 45
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 60
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
// 75
ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 90
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
// 105
ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
// 120
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
// 135
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
ScriptType::COMPLEX,
ScriptType::WEAK
};
sal_Int16 nRet;
if (eScript < USCRIPT_COMMON)
nRet = ScriptType::WEAK;
else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes))
nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild
else
nRet = scriptTypes[eScript];
return nRet;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment