Kaydet (Commit) f0b38738 authored tarafından Caolán McNamara's avatar Caolán McNamara

Related: tdf#105426 upgrade to hunspell-1.6.0

Change-Id: I275222d1a7d78cbfb1ca046562fc8a3c314b0fae
Reviewed-on: https://gerrit.libreoffice.org/33454Reviewed-by: 's avatarCaolán McNamara <caolanm@redhat.com>
Tested-by: 's avatarCaolán McNamara <caolanm@redhat.com>
üst 44cb1d16
......@@ -9543,7 +9543,7 @@ if test "$with_system_hunspell" = "yes"; then
HUNSPELL_LIBS=-lhunspell
fi
AC_LANG_POP([C++])
libo_MINGW_CHECK_DLL([libhunspell-1.5])
libo_MINGW_CHECK_DLL([libhunspell-1.6])
HUNSPELL_CFLAGS=$(printf '%s' "$HUNSPELL_CFLAGS" | sed -e "s/-I/${ISYSTEM?}/g")
FilterLibs "${HUNSPELL_LIBS}"
HUNSPELL_LIBS="${filteredlibs}"
......@@ -9554,7 +9554,7 @@ else
if test "$COM" = "MSC"; then
HUNSPELL_LIBS="${WORKDIR}/LinkTarget/StaticLibrary/hunspell.lib"
else
HUNSPELL_LIBS="-L${WORKDIR}/UnpackedTarball/hunspell/src/hunspell/.libs -lhunspell-1.5"
HUNSPELL_LIBS="-L${WORKDIR}/UnpackedTarball/hunspell/src/hunspell/.libs -lhunspell-1.6"
fi
BUILD_TYPE="$BUILD_TYPE HUNSPELL"
fi
......
......@@ -60,7 +60,7 @@ export GRAPHITE_TARBALL := 3069842a88b8f40c6b83ad2850cda293-graphite2-minimal-1.
export HARFBUZZ_MD5SUM := 5986e1bfcd983d1f6caa53ef64c4abc5
export HARFBUZZ_TARBALL := harfbuzz-1.3.2.tar.bz2
export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
export HUNSPELL_TARBALL := 9849a2381bacbeb2714034ad825bede8-hunspell-1.5.4.tar.gz
export HUNSPELL_TARBALL := 047c3feb121261b76dc16cdb62f54483-hunspell-1.6.0.tar.gz
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
export ICU_TARBALL := 1901302aaff1c1633ef81862663d2917-icu4c-58_1-src.tgz
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
......
This source diff could not be displayed because it is too large. You can view the blob instead.
From bf05e232805f6c1fae5dea3c223de8bdaab425e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 13:26:53 +0000
Subject: [PATCH 1/3] unroll this a bit
---
src/hunspell/csutil.cxx | 49 ++++++++++++++++++++++++++++---------------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index ac5cd98..c1666a5 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -518,18 +518,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) {
w_char upper_utf(w_char u, int langnum) {
unsigned short idx = (u.h << 8) + u.l;
- if (idx != unicodetoupper(idx, langnum)) {
- u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u.h = (unsigned char)(upridx >> 8);
+ u.l = (unsigned char)(upridx & 0x00FF);
}
return u;
}
w_char lower_utf(w_char u, int langnum) {
unsigned short idx = (u.h << 8) + u.l;
- if (idx != unicodetolower(idx, langnum)) {
- u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u.h = (unsigned char)(lwridx >> 8);
+ u.l = (unsigned char)(lwridx & 0x00FF);
}
return u;
}
@@ -551,12 +553,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
}
std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
- int langnum) {
+ int langnum) {
for (size_t i = 0; i < u.size(); ++i) {
unsigned short idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetolower(idx, langnum)) {
- u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[i].h = (unsigned char)(lwridx >> 8);
+ u[i].l = (unsigned char)(lwridx & 0x00FF);
}
}
return u;
@@ -565,9 +568,10 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
for (size_t i = 0; i < u.size(); i++) {
unsigned short idx = (u[i].h << 8) + u[i].l;
- if (idx != unicodetoupper(idx, langnum)) {
- u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[i].h = (unsigned char)(upridx >> 8);
+ u[i].l = (unsigned char)(upridx & 0x00FF);
}
}
return u;
@@ -583,9 +587,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
- if (idx != unicodetoupper(idx, langnum)) {
- u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8);
- u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF);
+ unsigned short upridx = unicodetoupper(idx, langnum);
+ if (idx != upridx) {
+ u[0].h = (unsigned char)(upridx >> 8);
+ u[0].l = (unsigned char)(upridx & 0x00FF);
}
}
return u;
@@ -601,9 +606,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
- if (idx != unicodetolower(idx, langnum)) {
- u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8);
- u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF);
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx) {
+ u[0].h = (unsigned char)(lwridx >> 8);
+ u[0].l = (unsigned char)(lwridx & 0x00FF);
}
}
return u;
@@ -2533,9 +2539,10 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
size_t firstcap = 0;
for (size_t i = 0; i < word.size(); ++i) {
unsigned short idx = (word[i].h << 8) + word[i].l;
- if (idx != unicodetolower(idx, langnum))
+ unsigned short lwridx = unicodetolower(idx, langnum);
+ if (idx != lwridx)
ncap++;
- if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum))
+ if (unicodetoupper(idx, langnum) == lwridx)
nneutral++;
}
if (ncap) {
--
2.9.3
From 3a935abd0539143ee952d2f86ec513be6a056d5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 13:35:13 +0000
Subject: [PATCH 2/3] rename std::vector<w_char> to wide::string
---
src/hunspell/affixmgr.cxx | 8 ++---
src/hunspell/affixmgr.hxx | 10 +++----
src/hunspell/csutil.cxx | 28 +++++++++---------
src/hunspell/csutil.hxx | 26 ++++++++--------
src/hunspell/hashmgr.cxx | 12 ++++----
src/hunspell/hashmgr.hxx | 2 +-
src/hunspell/hunspell.cxx | 46 ++++++++++++++---------------
src/hunspell/hunspell.hxx | 2 +-
src/hunspell/suggestmgr.cxx | 72 ++++++++++++++++++++++-----------------------
src/hunspell/suggestmgr.hxx | 12 ++++----
src/hunspell/w_char.hxx | 6 ++++
src/parsers/textparser.cxx | 2 +-
src/tools/hunspell.cxx | 10 +++----
13 files changed, 121 insertions(+), 115 deletions(-)
diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx
index 21cf384..4f64721 100644
--- a/src/hunspell/affixmgr.cxx
+++ b/src/hunspell/affixmgr.cxx
@@ -1338,7 +1338,7 @@ int AffixMgr::cpdcase_check(const char* word, int pos) {
for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--)
;
std::string pair(p);
- std::vector<w_char> pair_u;
+ wide::string pair_u;
u8_u16(pair_u, pair);
unsigned short a = pair_u.size() > 1 ? ((pair_u[1].h << 8) + pair_u[1].l) : 0;
unsigned short b = !pair_u.empty() ? ((pair_u[0].h << 8) + pair_u[0].l) : 0;
@@ -1523,7 +1523,7 @@ short AffixMgr::get_syllable(const std::string& word) {
}
}
} else if (!cpdvowels_utf16.empty()) {
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, word);
for (size_t i = 0; i < w.size(); ++i) {
if (std::binary_search(cpdvowels_utf16.begin(),
@@ -3505,7 +3505,7 @@ const char* AffixMgr::get_ignore() const {
}
// return the preferred ignore string for suggestions
-const std::vector<w_char>& AffixMgr::get_ignore_utf16() const {
+const wide::string& AffixMgr::get_ignore_utf16() const {
return ignorechars_utf16;
}
@@ -3528,7 +3528,7 @@ const std::string& AffixMgr::get_wordchars() const {
return wordchars;
}
-const std::vector<w_char>& AffixMgr::get_wordchars_utf16() const {
+const wide::string& AffixMgr::get_wordchars_utf16() const {
return wordchars_utf16;
}
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
index 83a4b42..11f1a67 100644
--- a/src/hunspell/affixmgr.hxx
+++ b/src/hunspell/affixmgr.hxx
@@ -146,7 +146,7 @@ class AffixMgr {
int cpdwordmax;
int cpdmaxsyllable;
std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
- std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
+ wide::string cpdvowels_utf16; //vowels for UTF-8 encoding
std::string cpdsyllablenum; // syllable count incrementing flag
const char* pfxappnd; // BUG: not stateless
const char* sfxappnd; // BUG: not stateless
@@ -157,9 +157,9 @@ class AffixMgr {
PfxEntry* pfx; // BUG: not stateless
int checknum;
std::string wordchars; // letters + spec. word characters
- std::vector<w_char> wordchars_utf16;
+ wide::string wordchars_utf16;
std::string ignorechars; // letters + spec. word characters
- std::vector<w_char> ignorechars_utf16;
+ wide::string ignorechars_utf16;
std::string version; // affix and dictionary file version string
std::string lang; // language
int langnum;
@@ -306,9 +306,9 @@ class AffixMgr {
char* get_key_string();
char* get_try_string() const;
const std::string& get_wordchars() const;
- const std::vector<w_char>& get_wordchars_utf16() const;
+ const wide::string& get_wordchars_utf16() const;
const char* get_ignore() const;
- const std::vector<w_char>& get_ignore_utf16() const;
+ const wide::string& get_ignore_utf16() const;
int get_compound() const;
FLAG get_compoundflag() const;
FLAG get_forbiddenword() const;
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index c1666a5..2f59b3d 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -143,10 +143,10 @@ void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mod
stream.open(path, mode);
}
-std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
+std::string& u16_u8(std::string& dest, const wide::string& src) {
dest.clear();
- std::vector<w_char>::const_iterator u2 = src.begin();
- std::vector<w_char>::const_iterator u2_max = src.end();
+ wide::string::const_iterator u2 = src.begin();
+ wide::string::const_iterator u2_max = src.end();
while (u2 < u2_max) {
signed char u8;
if (u2->h) { // > 0xFF
@@ -180,7 +180,7 @@ std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
return dest;
}
-int u8_u16(std::vector<w_char>& dest, const std::string& src) {
+int u8_u16(wide::string& dest, const std::string& src) {
dest.clear();
std::string::const_iterator u8 = src.begin();
std::string::const_iterator u8_max = src.end();
@@ -474,7 +474,7 @@ size_t reverseword(std::string& word) {
// reverse word
size_t reverseword_utf(std::string& word) {
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, word);
std::reverse(w.begin(), w.end());
u16_u8(word, w);
@@ -552,7 +552,7 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) {
return s;
}
-std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
+wide::string& mkallsmall_utf(wide::string& u,
int langnum) {
for (size_t i = 0; i < u.size(); ++i) {
unsigned short idx = (u[i].h << 8) + u[i].l;
@@ -565,7 +565,7 @@ std::vector<w_char>& mkallsmall_utf(std::vector<w_char>& u,
return u;
}
-std::vector<w_char>& mkallcap_utf(std::vector<w_char>& u, int langnum) {
+wide::string& mkallcap_utf(wide::string& u, int langnum) {
for (size_t i = 0; i < u.size(); i++) {
unsigned short idx = (u[i].h << 8) + u[i].l;
unsigned short upridx = unicodetoupper(idx, langnum);
@@ -584,7 +584,7 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) {
return s;
}
-std::vector<w_char>& mkinitcap_utf(std::vector<w_char>& u, int langnum) {
+wide::string& mkinitcap_utf(wide::string& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
unsigned short upridx = unicodetoupper(idx, langnum);
@@ -603,7 +603,7 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) {
return s;
}
-std::vector<w_char>& mkinitsmall_utf(std::vector<w_char>& u, int langnum) {
+wide::string& mkinitsmall_utf(wide::string& u, int langnum) {
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
unsigned short lwridx = unicodetolower(idx, langnum);
@@ -2532,7 +2532,7 @@ int get_captype(const std::string& word, cs_info* csconv) {
return HUHCAP;
}
-int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
+int get_captype_utf8(const wide::string& word, int langnum) {
// now determine the capitalization type of the first nl letters
size_t ncap = 0;
size_t nneutral = 0;
@@ -2565,9 +2565,9 @@ int get_captype_utf8(const std::vector<w_char>& word, int langnum) {
// strip all ignored characters in the string
size_t remove_ignored_chars_utf(std::string& word,
- const std::vector<w_char>& ignored_chars) {
- std::vector<w_char> w;
- std::vector<w_char> w2;
+ const wide::string& ignored_chars) {
+ wide::string w;
+ wide::string w2;
u8_u16(w, word);
for (size_t i = 0; i < w.size(); ++i) {
@@ -2626,7 +2626,7 @@ bool parse_string(const std::string& line, std::string& out, int ln) {
bool parse_array(const std::string& line,
std::string& out,
- std::vector<w_char>& out_utf16,
+ wide::string& out_utf16,
int utf8,
int ln) {
if (!parse_string(line, out, ln))
diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
index 302d7e9..313672e 100644
--- a/src/hunspell/csutil.hxx
+++ b/src/hunspell/csutil.hxx
@@ -134,10 +134,10 @@ LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
// convert UTF-16 characters to UTF-8
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
- const std::vector<w_char>& src);
+ const wide::string& src);
// convert UTF-8 characters to UTF-16
-LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
+LIBHUNSPELL_DLL_EXPORTED int u8_u16(wide::string& dest,
const std::string& src);
// remove end of line char(s)
@@ -219,31 +219,31 @@ LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
const struct cs_info* csconv);
// convert first letter of UTF-8 string to capital
-LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
-mkinitcap_utf(std::vector<w_char>& u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED wide::string&
+mkinitcap_utf(wide::string& u, int langnum);
// convert UTF-8 string to little
-LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
-mkallsmall_utf(std::vector<w_char>& u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED wide::string&
+mkallsmall_utf(wide::string& u, int langnum);
// convert first letter of UTF-8 string to little
-LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
-mkinitsmall_utf(std::vector<w_char>& u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED wide::string&
+mkinitsmall_utf(wide::string& u, int langnum);
// convert UTF-8 string to capital
-LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
-mkallcap_utf(std::vector<w_char>& u, int langnum);
+LIBHUNSPELL_DLL_EXPORTED wide::string&
+mkallcap_utf(wide::string& u, int langnum);
// get type of capitalization
LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
// get type of capitalization (UTF-8)
-LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
+LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const wide::string& q, int langnum);
// strip all ignored characters in the string
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
std::string& word,
- const std::vector<w_char>& ignored_chars);
+ const wide::string& ignored_chars);
// strip all ignored characters in the string
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
@@ -256,7 +256,7 @@ LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
std::string& out,
- std::vector<w_char>& out_utf16,
+ wide::string& out_utf16,
int utf8,
int ln);
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 1de1690..6d92e9b 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -345,7 +345,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
flags2[flagslen] = ONLYUPCASEFLAG;
if (utf8) {
std::string st;
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, word);
mkallsmall_utf(w, langnum);
mkinitcap_utf(w, langnum);
@@ -366,7 +366,7 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
int len;
if (utf8) {
- std::vector<w_char> dest_utf;
+ wide::string dest_utf;
len = u8_u16(dest_utf, word);
*captype = get_captype_utf8(dest_utf, langnum);
} else {
@@ -688,7 +688,7 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
break;
}
case FLAG_UNI: { // UTF-8 characters
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, flags);
len = w.size();
*result = (unsigned short*)malloc(len * sizeof(unsigned short));
@@ -760,7 +760,7 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
break;
}
case FLAG_UNI: { // UTF-8 characters
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, flags);
size_t len = w.size();
size_t origsize = result.size();
@@ -793,7 +793,7 @@ unsigned short HashMgr::decode_flag(const char* f) const {
s = (unsigned short)i;
break;
case FLAG_UNI: {
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, f);
if (!w.empty())
memcpy(&s, &w[0], 1 * sizeof(short));
@@ -820,7 +820,7 @@ char* HashMgr::encode_flag(unsigned short f) const {
ch = stream.str();
} else if (flag_mode == FLAG_UNI) {
const w_char* w_c = (const w_char*)&f;
- std::vector<w_char> w(w_c, w_c + 1);
+ wide::string w(w_c, w_c + 1);
u16_u8(ch, w);
} else {
ch.push_back((unsigned char)(f));
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 812171a..312c8ba 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -96,7 +96,7 @@ class HashMgr {
std::string lang;
struct cs_info* csconv;
std::string ignorechars;
- std::vector<w_char> ignorechars_utf16;
+ wide::string ignorechars_utf16;
int numaliasf; // flag vector `compression' with aliases
unsigned short** aliasf;
unsigned short* aliasflen;
diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
index a8d78dc..46f1df9 100644
--- a/src/hunspell/hunspell.cxx
+++ b/src/hunspell/hunspell.cxx
@@ -103,7 +103,7 @@ public:
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
std::vector<std::string> suggest(const std::string& word);
const std::string& get_wordchars() const;
- const std::vector<w_char>& get_wordchars_utf16() const;
+ const wide::string& get_wordchars_utf16() const;
const std::string& get_dict_encoding() const;
int add(const std::string& word);
int add_with_affix(const std::string& word, const std::string& example);
@@ -127,15 +127,15 @@ private:
private:
void cleanword(std::string& dest, const std::string&, int* pcaptype, int* pabbrev);
size_t cleanword2(std::string& dest,
- std::vector<w_char>& dest_u,
+ wide::string& dest_u,
const std::string& src,
int* pcaptype,
size_t* pabbrev);
void mkinitcap(std::string& u8);
- int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
- int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
+ int mkinitcap2(std::string& u8, wide::string& u16);
+ int mkinitsmall2(std::string& u8, wide::string& u16);
void mkallcap(std::string& u8);
- int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
+ int mkallsmall2(std::string& u8, wide::string& u16);
struct hentry* checkword(const std::string& source, int* info, std::string* root);
std::string sharps_u8_l1(const std::string& source);
hentry*
@@ -231,7 +231,7 @@ int HunspellImpl::add_dic(const char* dpath, const char* key) {
// return the length of the "cleaned" (and UTF-8 encoded) word
size_t HunspellImpl::cleanword2(std::string& dest,
- std::vector<w_char>& dest_utf,
+ wide::string& dest_utf,
const std::string& src,
int* pcaptype,
size_t* pabbrev) {
@@ -313,7 +313,7 @@ void HunspellImpl::cleanword(std::string& dest,
// remember to terminate the destination string
firstcap = csconv[static_cast<unsigned char>(dest[0])].ccase;
} else {
- std::vector<w_char> t;
+ wide::string t;
u8_u16(t, src);
for (size_t i = 0; i < t.size(); ++i) {
unsigned short idx = (t[i].h << 8) + t[i].l;
@@ -346,7 +346,7 @@ void HunspellImpl::cleanword(std::string& dest,
void HunspellImpl::mkallcap(std::string& u8) {
if (utf8) {
- std::vector<w_char> u16;
+ wide::string u16;
u8_u16(u16, u8);
::mkallcap_utf(u16, langnum);
u16_u8(u8, u16);
@@ -355,7 +355,7 @@ void HunspellImpl::mkallcap(std::string& u8) {
}
}
-int HunspellImpl::mkallsmall2(std::string& u8, std::vector<w_char>& u16) {
+int HunspellImpl::mkallsmall2(std::string& u8, wide::string& u16) {
if (utf8) {
::mkallsmall_utf(u16, langnum);
u16_u8(u8, u16);
@@ -438,7 +438,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
size_t wl = 0;
std::string scw;
- std::vector<w_char> sunicw;
+ wide::string sunicw;
// input conversion
RepList* rl = pAMgr ? pAMgr->get_iconvtable() : NULL;
@@ -519,7 +519,7 @@ bool HunspellImpl::spell(const std::string& word, int* info, std::string* root)
std::string part1 = scw.substr(0, apos+1);
std::string part2 = scw.substr(apos+1);
if (utf8) {
- std::vector<w_char> part1u, part2u;
+ wide::string part1u, part2u;
u8_u16(part1u, part1);
u8_u16(part2u, part2);
mkinitcap2(part2, part2u);
@@ -704,7 +704,7 @@ struct hentry* HunspellImpl::checkword(const std::string& w, int* info, std::str
if (ignoredchars != NULL) {
w2.assign(w);
if (utf8) {
- const std::vector<w_char>& ignoredchars_utf16 =
+ const wide::string& ignoredchars_utf16 =
pAMgr->get_ignore_utf16();
remove_ignored_chars_utf(w2, ignoredchars_utf16);
} else {
@@ -855,7 +855,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
size_t wl = 0;
std::string scw;
- std::vector<w_char> sunicw;
+ wide::string sunicw;
// input conversion
RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
@@ -909,7 +909,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
std::string postdot = scw.substr(dot_pos + 1);
int captype_;
if (utf8) {
- std::vector<w_char> postdotu;
+ wide::string postdotu;
u8_u16(postdotu, postdot);
captype_ = get_captype_utf8(postdotu, langnum);
} else {
@@ -951,7 +951,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
if ((slen < wl) && strcmp(scw.c_str() + wl - slen, space + 1)) {
std::string first(slst[j].c_str(), space + 1);
std::string second(space + 1);
- std::vector<w_char> w;
+ wide::string w;
if (utf8)
u8_u16(w, second);
mkinitcap2(second, w);
@@ -1109,7 +1109,7 @@ std::vector<std::string> HunspellImpl::suggest(const std::string& word) {
for (size_t j = 0; j < slst.size(); ++j) {
if (slst[j].find(' ') == std::string::npos && !spell(slst[j])) {
std::string s;
- std::vector<w_char> w;
+ wide::string w;
if (utf8) {
u8_u16(w, slst[j]);
} else {
@@ -1262,17 +1262,17 @@ const std::string& HunspellImpl::get_wordchars() const {
return pAMgr->get_wordchars();
}
-const std::vector<w_char>& Hunspell::get_wordchars_utf16() const {
+const wide::string& Hunspell::get_wordchars_utf16() const {
return m_Impl->get_wordchars_utf16();
}
-const std::vector<w_char>& HunspellImpl::get_wordchars_utf16() const {
+const wide::string& HunspellImpl::get_wordchars_utf16() const {
return pAMgr->get_wordchars_utf16();
}
void HunspellImpl::mkinitcap(std::string& u8) {
if (utf8) {
- std::vector<w_char> u16;
+ wide::string u16;
u8_u16(u16, u8);
::mkinitcap_utf(u16, langnum);
u16_u8(u8, u16);
@@ -1281,7 +1281,7 @@ void HunspellImpl::mkinitcap(std::string& u8) {
}
}
-int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
+int HunspellImpl::mkinitcap2(std::string& u8, wide::string& u16) {
if (utf8) {
::mkinitcap_utf(u16, langnum);
u16_u8(u8, u16);
@@ -1291,7 +1291,7 @@ int HunspellImpl::mkinitcap2(std::string& u8, std::vector<w_char>& u16) {
return u8.size();
}
-int HunspellImpl::mkinitsmall2(std::string& u8, std::vector<w_char>& u16) {
+int HunspellImpl::mkinitsmall2(std::string& u8, wide::string& u16) {
if (utf8) {
::mkinitsmall_utf(u16, langnum);
u16_u8(u8, u16);
@@ -1379,7 +1379,7 @@ std::vector<std::string> HunspellImpl::analyze(const std::string& word) {
size_t wl = 0;
std::string scw;
- std::vector<w_char> sunicw;
+ wide::string sunicw;
// input conversion
RepList* rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
@@ -1994,7 +1994,7 @@ std::vector<std::string> HunspellImpl::suffix_suggest(const std::string& root_wo
if (ignoredchars != NULL) {
w2.assign(root_word);
if (utf8) {
- const std::vector<w_char>& ignoredchars_utf16 =
+ const wide::string& ignoredchars_utf16 =
pAMgr->get_ignore_utf16();
remove_ignored_chars_utf(w2, ignoredchars_utf16);
} else {
diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx
index 43af66b..375a7da 100644
--- a/src/hunspell/hunspell.hxx
+++ b/src/hunspell/hunspell.hxx
@@ -215,7 +215,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
/* get extra word characters definied in affix file for tokenization */
const char* get_wordchars() const;
const std::string& get_wordchars_cpp() const;
- const std::vector<w_char>& get_wordchars_utf16() const;
+ const wide::string& get_wordchars_utf16() const;
struct cs_info* get_csconv();
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index 54a474f..1deec96 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -179,7 +179,7 @@ void SuggestMgr::suggest(std::vector<std::string>& slst,
const char* w,
int* onlycompoundsug) {
int nocompoundtwowords = 0;
- std::vector<w_char> word_utf;
+ wide::string word_utf;
int wl = 0;
size_t nsugorig = slst.size();
std::string w2;
@@ -313,7 +313,7 @@ void SuggestMgr::capchars_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
mkallcap_utf(candidate_utf, langnum);
std::string candidate;
u16_u8(candidate, candidate_utf);
@@ -491,7 +491,7 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
if (word[i] == word[i - 2]) {
state++;
if (state == 3) {
- std::vector<w_char> candidate_utf(word, word + i - 1);
+ wide::string candidate_utf(word, word + i - 1);
candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
std::string candidate;
u16_u8(candidate, candidate_utf);
@@ -549,7 +549,7 @@ int SuggestMgr::badcharkey_utf(std::vector<std::string>& wlst,
int wl,
int cpdsuggest) {
std::string candidate;
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
// swap out each char one by one and try all the tryme
// chars in its place to see if that makes a good word
for (int i = 0; i < wl; i++) {
@@ -614,7 +614,7 @@ int SuggestMgr::badchar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
std::string candidate;
clock_t timelimit = clock();
int timer = MINTIMER;
@@ -641,7 +641,7 @@ int SuggestMgr::extrachar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
if (candidate_utf.size() < 2)
return wlst.size();
// try omitting one char of word at a time
@@ -703,7 +703,7 @@ int SuggestMgr::forgotchar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
clock_t timelimit = clock();
int timer = MINTIMER;
@@ -852,7 +852,7 @@ int SuggestMgr::swapchar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
if (candidate_utf.size() < 2)
return wlst.size();
@@ -909,10 +909,10 @@ int SuggestMgr::longswapchar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
// try swapping not adjacent chars
- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
- for (std::vector<w_char>::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (wide::string::iterator q = candidate_utf.begin(); q < candidate_utf.end(); ++q) {
if (std::abs(std::distance(q, p)) > 1) {
std::swap(*p, *q);
std::string candidate;
@@ -962,13 +962,13 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
const w_char* word,
int wl,
int cpdsuggest) {
- std::vector<w_char> candidate_utf(word, word + wl);
+ wide::string candidate_utf(word, word + wl);
if (candidate_utf.size() < 2)
return wlst.size();
// try moving a char
- for (std::vector<w_char>::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
- for (std::vector<w_char>::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
+ for (wide::string::iterator p = candidate_utf.begin(); p < candidate_utf.end(); ++p) {
+ for (wide::string::iterator q = p + 1; q < candidate_utf.end() && std::distance(p, q) < 10; ++q) {
std::swap(*q, *(q - 1));
if (std::distance(p, q) < 2)
continue; // omit swap char
@@ -979,8 +979,8 @@ int SuggestMgr::movechar_utf(std::vector<std::string>& wlst,
std::copy(word, word + candidate_utf.size(), candidate_utf.begin());
}
- for (std::vector<w_char>::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
- for (std::vector<w_char>::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
+ for (wide::string::reverse_iterator p = candidate_utf.rbegin(); p < candidate_utf.rend(); ++p) {
+ for (wide::string::reverse_iterator q = p + 1; q < candidate_utf.rend() && std::distance(p, q) < 10; ++q) {
std::swap(*q, *(q - 1));
if (std::distance(p, q) < 2)
continue; // omit swap char
@@ -1032,7 +1032,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
word = w2.c_str();
}
- std::vector<w_char> u8;
+ wide::string u8;
int nc = strlen(word);
int n = (utf8) ? u8_u16(u8, word) : nc;
@@ -1050,7 +1050,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
std::string target;
std::string candidate;
- std::vector<w_char> w_candidate;
+ wide::string w_candidate;
if (ph) {
if (utf8) {
u8_u16(w_candidate, word);
@@ -1069,16 +1069,16 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
FLAG nongramsuggest = pAMgr ? pAMgr->get_nongramsuggest() : FLAG_NULL;
FLAG onlyincompound = pAMgr ? pAMgr->get_onlyincompound() : FLAG_NULL;
- std::vector<w_char> w_word, w_target;
+ wide::string w_word, w_target;
if (utf8) {
u8_u16(w_word, word);
u8_u16(w_target, target);
}
- std::vector<w_char> w_entry;
+ wide::string w_entry;
std::string f;
- std::vector<w_char> w_f;
- std::vector<w_char> w_target2;
+ wide::string w_f;
+ wide::string w_target2;
for (size_t i = 0; i < rHMgr.size(); ++i) {
while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
@@ -1168,7 +1168,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
// find minimum threshold for a passable suggestion
// mangle original word three differnt ways
// and score them to generate a minimum acceptable score
- std::vector<w_char> w_mw;
+ wide::string w_mw;
int thresh = 0;
for (int sp = 1; sp < 4; sp++) {
if (utf8) {
@@ -1210,7 +1210,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
return;
}
- std::vector<w_char> w_glst_word;
+ wide::string w_glst_word;
for (int i = 0; i < MAX_ROOTS; i++) {
if (roots[i]) {
struct hentry* rp = roots[i];
@@ -1288,7 +1288,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
fact = (10.0 - maxd) / 5.0;
}
- std::vector<w_char> w_gl;
+ wide::string w_gl;
for (int i = 0; i < MAX_GUESS; i++) {
if (guess[i]) {
// lowering guess[i]
@@ -1789,8 +1789,8 @@ std::string SuggestMgr::suggest_gen(const std::vector<std::string>& desc, const
// generate an n-gram score comparing s1 and s2, UTF16 version
int SuggestMgr::ngram(int n,
- const std::vector<w_char>& su1,
- const std::vector<w_char>& su2,
+ const wide::string& su1,
+ const wide::string& su2,
int opt) {
int nscore = 0;
int ns;
@@ -1803,8 +1803,8 @@ int SuggestMgr::ngram(int n,
if (l2 == 0)
return 0;
// lowering dictionary word
- const std::vector<w_char>* p_su2 = &su2;
- std::vector<w_char> su2_copy;
+ const wide::string* p_su2 = &su2;
+ wide::string su2_copy;
if (opt & NGRAM_LOWERING) {
su2_copy = su2;
mkallsmall_utf(su2_copy, langnum);
@@ -1894,8 +1894,8 @@ int SuggestMgr::ngram(int n,
// length of the left common substring of s1 and (decapitalised) s2, UTF version
int SuggestMgr::leftcommonsubstring(
- const std::vector<w_char>& su1,
- const std::vector<w_char>& su2) {
+ const wide::string& su1,
+ const wide::string& su2) {
int l1 = su1.size();
int l2 = su2.size();
// decapitalize dictionary word
@@ -1948,8 +1948,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int diffpos[2];
*is_swap = 0;
if (utf8) {
- std::vector<w_char> su1;
- std::vector<w_char> su2;
+ wide::string su1;
+ wide::string su2;
int l1 = u8_u16(su1, s1);
int l2 = u8_u16(su2, s2);
@@ -2004,7 +2004,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int SuggestMgr::mystrlen(const char* word) {
if (utf8) {
- std::vector<w_char> w;
+ wide::string w;
return u8_u16(w, word);
} else
return strlen(word);
@@ -2044,8 +2044,8 @@ void SuggestMgr::lcs(const char* s,
int* l2,
char** result) {
int n, m;
- std::vector<w_char> su;
- std::vector<w_char> su2;
+ wide::string su;
+ wide::string su2;
char* b;
char* c;
int i;
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index 6ba9dc8..9bfa933 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -107,11 +107,11 @@ class SuggestMgr {
private:
char* ckey;
size_t ckeyl;
- std::vector<w_char> ckey_utf;
+ wide::string ckey_utf;
char* ctry;
size_t ctryl;
- std::vector<w_char> ctry_utf;
+ wide::string ctry_utf;
AffixMgr* pAMgr;
unsigned int maxSug;
@@ -173,12 +173,12 @@ class SuggestMgr {
const std::vector<mapentry>&,
int*,
clock_t*);
- int ngram(int n, const std::vector<w_char>& su1,
- const std::vector<w_char>& su2, int opt);
+ int ngram(int n, const wide::string& su1,
+ const wide::string& su2, int opt);
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
int mystrlen(const char* word);
- int leftcommonsubstring(const std::vector<w_char>& su1,
- const std::vector<w_char>& su2);
+ int leftcommonsubstring(const wide::string& su1,
+ const wide::string& su2);
int leftcommonsubstring(const char* s1, const char* s2);
int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
index c561ffc..84ae13c 100644
--- a/src/hunspell/w_char.hxx
+++ b/src/hunspell/w_char.hxx
@@ -42,6 +42,7 @@
#define W_CHAR_HXX_
#include <string>
+#include <vector>
#ifndef GCC
struct w_char {
@@ -72,4 +73,9 @@ struct replentry {
std::string outstrings[4]; // med, ini, fin, isol
};
+namespace wide
+{
+ typedef std::vector<w_char> string;
+}
+
#endif
diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx
index 53548e4..8e43f79 100644
--- a/src/parsers/textparser.cxx
+++ b/src/parsers/textparser.cxx
@@ -81,7 +81,7 @@ int TextParser::is_wordchar(const char* w) {
if (*w == '\0')
return 0;
if (utf8) {
- std::vector<w_char> wc;
+ wide::string wc;
unsigned short idx;
u8_u16(wc, w);
if (wc.empty())
diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx
index 3172409..c39f148 100644
--- a/src/tools/hunspell.cxx
+++ b/src/tools/hunspell.cxx
@@ -199,7 +199,7 @@ enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF };
std::string wordchars;
char* dicpath = NULL;
const w_char* wordchars_utf16 = NULL;
-std::vector<w_char> new_wordchars_utf16;
+wide::string new_wordchars_utf16;
int wordchars_utf16_len;
char* dicname = NULL;
char* privdicname = NULL;
@@ -311,7 +311,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
}
if (io_utf8) {
- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
const std::string& vec_wordchars = pMS->get_wordchars_cpp();
wordchars_utf16_len = vec_wordchars_utf16.size();
wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL;
@@ -356,7 +356,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
ch[1] = '\0';
size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2);
if (res != (size_t)-1) {
- std::vector<w_char> w;
+ wide::string w;
u8_u16(w, std::string(u8, dest));
unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l;
if (unicodeisalpha(idx)) {
@@ -395,7 +395,7 @@ TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
}
#else
if (strcmp(denc, "UTF-8") == 0) {
- const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
+ const wide::string& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
wordchars_utf16 = &vec_wordchars_utf16[0];
wordchars_utf16_len = vec_wordchars_utf16.size();
io_utf8 = 1;
@@ -1199,7 +1199,7 @@ void dialogscreen(TextParser* parser,
std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
std::string utf8str(token);
chenc(utf8str, ioenc, "UTF-8");
- std::vector<w_char> u;
+ wide::string u;
u8_u16(u, utf8str);
if (!u.empty()) {
unsigned short idx = (u[0].h << 8) + u[0].l;
--
2.9.3
From 7c7f56e1c6fe510a2c5e826cc49aeae3f6614f86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 13:36:20 +0000
Subject: [PATCH 3/3] Related: hunspell#406 use a basic_string<w_char> instead
of vector
kcachegrind reports 1,066,887,723 -> 894,015,631 on
echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
---
src/hunspell/w_char.hxx | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/hunspell/w_char.hxx b/src/hunspell/w_char.hxx
index 84ae13c..e112b5c 100644
--- a/src/hunspell/w_char.hxx
+++ b/src/hunspell/w_char.hxx
@@ -42,7 +42,6 @@
#define W_CHAR_HXX_
#include <string>
-#include <vector>
#ifndef GCC
struct w_char {
@@ -75,7 +74,7 @@ struct replentry {
namespace wide
{
- typedef std::vector<w_char> string;
+ typedef std::basic_string<w_char> string;
}
#endif
--
2.9.3
From 1393bd64581d6010a65d368e1031641391bdb154 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 14:30:13 +0000
Subject: [PATCH 1/2] use a per-hashmgr persistent wide string scratch buffer
kcachegrind reports 894,015,631 -> 845,183,693
---
src/hunspell/hashmgr.cxx | 40 ++++++++++++++++++----------------------
src/hunspell/hashmgr.hxx | 1 +
2 files changed, 19 insertions(+), 22 deletions(-)
diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx
index 6d92e9b..8d6189b 100644
--- a/src/hunspell/hashmgr.cxx
+++ b/src/hunspell/hashmgr.cxx
@@ -345,11 +345,10 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
flags2[flagslen] = ONLYUPCASEFLAG;
if (utf8) {
std::string st;
- wide::string w;
- u8_u16(w, word);
- mkallsmall_utf(w, langnum);
- mkinitcap_utf(w, langnum);
- u16_u8(st, w);
+ u8_u16(workbuf, word);
+ mkallsmall_utf(workbuf, langnum);
+ mkinitcap_utf(workbuf, langnum);
+ u16_u8(st, workbuf);
return add_word(st, wcl, flags2, flagslen + 1, dp, true);
} else {
std::string new_word(word);
@@ -366,9 +365,8 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word,
int HashMgr::get_clen_and_captype(const std::string& word, int* captype) {
int len;
if (utf8) {
- wide::string dest_utf;
- len = u8_u16(dest_utf, word);
- *captype = get_captype_utf8(dest_utf, langnum);
+ len = u8_u16(workbuf, word);
+ *captype = get_captype_utf8(workbuf, langnum);
} else {
len = word.size();
*captype = get_captype(word, csconv);
@@ -688,13 +686,12 @@ int HashMgr::decode_flags(unsigned short** result, const std::string& flags, Fil
break;
}
case FLAG_UNI: { // UTF-8 characters
- wide::string w;
- u8_u16(w, flags);
- len = w.size();
+ u8_u16(workbuf, flags);
+ len = workbuf.size();
*result = (unsigned short*)malloc(len * sizeof(unsigned short));
if (!*result)
return -1;
- memcpy(*result, &w[0], len * sizeof(short));
+ memcpy(*result, &workbuf[0], len * sizeof(short));
break;
}
default: { // Ispell's one-character flags (erfg -> e r f g)
@@ -760,12 +757,11 @@ bool HashMgr::decode_flags(std::vector<unsigned short>& result, const std::strin
break;
}
case FLAG_UNI: { // UTF-8 characters
- wide::string w;
- u8_u16(w, flags);
- size_t len = w.size();
+ u8_u16(workbuf, flags);
+ size_t len = workbuf.size();
size_t origsize = result.size();
result.resize(origsize + len);
- memcpy(&result[origsize], &w[0], len * sizeof(short));
+ memcpy(&result[origsize], &workbuf[0], len * sizeof(short));
break;
}
default: { // Ispell's one-character flags (erfg -> e r f g)
@@ -793,10 +789,9 @@ unsigned short HashMgr::decode_flag(const char* f) const {
s = (unsigned short)i;
break;
case FLAG_UNI: {
- wide::string w;
- u8_u16(w, f);
- if (!w.empty())
- memcpy(&s, &w[0], 1 * sizeof(short));
+ u8_u16(workbuf, f);
+ if (!workbuf.empty())
+ memcpy(&s, &workbuf[0], 1 * sizeof(short));
break;
}
default:
@@ -820,8 +815,9 @@ char* HashMgr::encode_flag(unsigned short f) const {
ch = stream.str();
} else if (flag_mode == FLAG_UNI) {
const w_char* w_c = (const w_char*)&f;
- wide::string w(w_c, w_c + 1);
- u16_u8(ch, w);
+ workbuf.clear();
+ workbuf.push_back(*w_c);
+ u16_u8(ch, workbuf);
} else {
ch.push_back((unsigned char)(f));
}
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 312c8ba..78ffb44 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -97,6 +97,7 @@ class HashMgr {
struct cs_info* csconv;
std::string ignorechars;
wide::string ignorechars_utf16;
+ mutable wide::string workbuf;
int numaliasf; // flag vector `compression' with aliases
unsigned short** aliasf;
unsigned short* aliasflen;
--
2.9.3
From 5c7bfa8d36b87a0649f6f88b20624c38a3a5f0ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
Date: Mon, 23 Jan 2017 14:43:19 +0000
Subject: [PATCH 2/2] use a per-hashmgr persistent wide string scratch buffer
kcachegrind reports 845,183,693 -> 812,760,392
---
src/hunspell/suggestmgr.cxx | 55 ++++++++++++++++++++-------------------------
src/hunspell/suggestmgr.hxx | 3 +++
2 files changed, 27 insertions(+), 31 deletions(-)
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index 1deec96..f5ea01b 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -491,10 +491,11 @@ int SuggestMgr::doubletwochars_utf(std::vector<std::string>& wlst,
if (word[i] == word[i - 2]) {
state++;
if (state == 3) {
- wide::string candidate_utf(word, word + i - 1);
- candidate_utf.insert(candidate_utf.end(), word + i + 1, word + wl);
+ workbuf1.clear();
+ workbuf1.insert(workbuf1.end(), word, word + i - 1);
+ workbuf1.insert(workbuf1.end(), word + i + 1, word + wl);
std::string candidate;
- u16_u8(candidate, candidate_utf);
+ u16_u8(candidate, workbuf1);
testsug(wlst, candidate, cpdsuggest, NULL, NULL);
state = 0;
}
@@ -1050,12 +1051,11 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
phonetable* ph = (pAMgr) ? pAMgr->get_phonetable() : NULL;
std::string target;
std::string candidate;
- wide::string w_candidate;
if (ph) {
if (utf8) {
- u8_u16(w_candidate, word);
- mkallcap_utf(w_candidate, langnum);
- u16_u8(candidate, w_candidate);
+ u8_u16(workbuf1, word);
+ mkallcap_utf(workbuf1, langnum);
+ u16_u8(candidate, workbuf1);
} else {
candidate.assign(word);
if (!nonbmp)
@@ -1121,10 +1121,9 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
int scphon = -20000;
if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) {
if (utf8) {
- w_candidate.clear();
- u8_u16(w_candidate, HENTRY_WORD(hp));
- mkallcap_utf(w_candidate, langnum);
- u16_u8(candidate, w_candidate);
+ u8_u16(workbuf1, HENTRY_WORD(hp));
+ mkallcap_utf(workbuf1, langnum);
+ u16_u8(candidate, workbuf1);
} else {
candidate = HENTRY_WORD(hp);
mkallcap(candidate, csconv);
@@ -1804,11 +1803,10 @@ int SuggestMgr::ngram(int n,
return 0;
// lowering dictionary word
const wide::string* p_su2 = &su2;
- wide::string su2_copy;
if (opt & NGRAM_LOWERING) {
- su2_copy = su2;
- mkallsmall_utf(su2_copy, langnum);
- p_su2 = &su2_copy;
+ workbuf1 = su2;
+ mkallsmall_utf(workbuf1, langnum);
+ p_su2 = &workbuf1;
}
for (int j = 1; j <= n; j++) {
ns = 0;
@@ -1948,22 +1946,20 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int diffpos[2];
*is_swap = 0;
if (utf8) {
- wide::string su1;
- wide::string su2;
- int l1 = u8_u16(su1, s1);
- int l2 = u8_u16(su2, s2);
+ int l1 = u8_u16(workbuf1, s1);
+ int l2 = u8_u16(workbuf2, s2);
if (l1 <= 0 || l2 <= 0)
return 0;
// decapitalize dictionary word
if (complexprefixes) {
- su2[l2 - 1] = lower_utf(su2[l2 - 1], langnum);
+ workbuf2[l2 - 1] = lower_utf(workbuf2[l2 - 1], langnum);
} else {
- su2[0] = lower_utf(su2[0], langnum);
+ workbuf2[0] = lower_utf(workbuf2[0], langnum);
}
for (int i = 0; (i < l1) && (i < l2); i++) {
- if (su1[i] == su2[i]) {
+ if (workbuf1[i] == workbuf2[i]) {
num++;
} else {
if (diff < 2)
@@ -1972,8 +1968,8 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
}
}
if ((diff == 2) && (l1 == l2) &&
- (su1[diffpos[0]] == su2[diffpos[1]]) &&
- (su1[diffpos[1]] == su2[diffpos[0]]))
+ (workbuf1[diffpos[0]] == workbuf2[diffpos[1]]) &&
+ (workbuf1[diffpos[1]] == workbuf2[diffpos[0]]))
*is_swap = 1;
} else {
size_t i;
@@ -2004,8 +2000,7 @@ int SuggestMgr::commoncharacterpositions(const char* s1,
int SuggestMgr::mystrlen(const char* word) {
if (utf8) {
- wide::string w;
- return u8_u16(w, word);
+ return u8_u16(workbuf1, word);
} else
return strlen(word);
}
@@ -2044,15 +2039,13 @@ void SuggestMgr::lcs(const char* s,
int* l2,
char** result) {
int n, m;
- wide::string su;
- wide::string su2;
char* b;
char* c;
int i;
int j;
if (utf8) {
- m = u8_u16(su, s);
- n = u8_u16(su2, s2);
+ m = u8_u16(workbuf1, s);
+ n = u8_u16(workbuf2, s2);
} else {
m = strlen(s);
n = strlen(s2);
@@ -2073,7 +2066,7 @@ void SuggestMgr::lcs(const char* s,
c[j] = 0;
for (i = 1; i <= m; i++) {
for (j = 1; j <= n; j++) {
- if (((utf8) && (su[i - 1] == su2[j - 1])) ||
+ if (((utf8) && (workbuf1[i - 1] == workbuf2[j - 1])) ||
((!utf8) && (s[i - 1] == s2[j - 1]))) {
c[i * (n + 1) + j] = c[(i - 1) * (n + 1) + j - 1] + 1;
b[i * (n + 1) + j] = LCS_UPLEFT;
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index 9bfa933..80b0fd7 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -105,6 +105,9 @@ class SuggestMgr {
SuggestMgr& operator=(const SuggestMgr&);
private:
+ mutable wide::string workbuf1;
+ mutable wide::string workbuf2;
+
char* ckey;
size_t ckeyl;
wide::string ckey_utf;
--
2.9.3
......@@ -27,8 +27,10 @@ endif
$(call gb_ExternalProject_get_state_target,hunspell,build):
$(call gb_ExternalProject_run,build,\
$(if $(filter IOS MACOSX,$(OS)),ACLOCAL="aclocal -I $(SRCDIR)/m4/mac") \
LIBS="$(gb_STDLIBS) $(LIBS)" \
./configure --disable-shared --disable-nls --with-pic \
autoreconf && \
$(SHELL) ./configure --disable-shared --disable-nls --with-pic \
$(if $(CROSS_COMPILING),--build=$(BUILD_PLATFORM) --host=$(HOST_PLATFORM))\
$(if $(filter AIX,$(OS)),CFLAGS="-D_LINUX_SOURCE_COMPAT") \
$(if $(filter-out WNTGCC,$(OS)$(COM)),,LDFLAGS="-Wl,--enable-runtime-pseudo-reloc-v2") \
......
......@@ -17,10 +17,15 @@ $(eval $(call gb_UnpackedTarball_set_post_action,hunspell,\
))
endif
$(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,0))
$(eval $(call gb_UnpackedTarball_set_patchlevel,hunspell,1))
$(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/clangcl-werror.patch \
external/hunspell/0001-Revert-Remove-autotools-autogenerated-files.patch \
external/hunspell/0001-unroll-this-a-bit.patch \
external/hunspell/0002-rename-std-vector-w_char-to-wide-string.patch \
external/hunspell/0003-Related-hunspell-406-use-a-basic_string-w_char-inste.patch \
external/hunspell/0004-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
external/hunspell/0005-use-a-per-hashmgr-persistent-wide-string-scratch-buf.patch \
))
# vim: set noet sw=4 ts=4:
--- src/hunspell/hunspell.hxx
+++ src/hunspell/hunspell.hxx
@@ -85,7 +85,7 @@
#define MAXSHARPS 5
#define MAXWORDLEN 176
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
+#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
# define H_DEPRECATED __attribute__((__deprecated__))
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
# define H_DEPRECATED __declspec(deprecated)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment