Kaydet (Commit) 5955a98b authored tarafından Caolán McNamara's avatar Caolán McNamara

upgrade to hunspell 1.3.3

Change-Id: Ibf8345885e99ae8eb9dd6d64ccd9b5407d8d610e
Reviewed-on: https://gerrit.libreoffice.org/11136Reviewed-by: 's avatarCaolán McNamara <caolanm@redhat.com>
Tested-by: 's avatarCaolán McNamara <caolanm@redhat.com>
üst 14fa2698
......@@ -63,7 +63,7 @@ export GRAPHITE_TARBALL := graphite2-1.2.4.tgz
export HARFBUZZ_MD5SUM := a4a9b548577e2ee22f0887937da5fd6c
export HARFBUZZ_TARBALL := harfbuzz-0.9.23.tar.bz2
export HSQLDB_TARBALL := 17410483b5b5f267aa18b7e00b65e6e0-hsqldb_1_8_0.zip
export HUNSPELL_TARBALL := 3121aaf3e13e5d88dfff13fb4a5f1ab8-hunspell-1.3.2.tar.gz
export HUNSPELL_TARBALL := 4967da60b23413604c9e563beacc63b4-hunspell-1.3.3.tar.gz
export HYPHEN_TARBALL := ecaf645cb09bd7b6ad0497b8a91fbd22-hyphen-2.8.7.tar.gz
export ICU_TARBALL := b73baa6fbdfef197608d1f69300919b9-icu4c-53_1-src.tgz
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
......
......@@ -12,15 +12,11 @@ $(eval $(call gb_UnpackedTarball_UnpackedTarball,hunspell))
$(eval $(call gb_UnpackedTarball_set_tarball,hunspell,$(HUNSPELL_TARBALL)))
$(eval $(call gb_UnpackedTarball_add_patches,hunspell,\
external/hunspell/hunspell-twoaffixcompound.patch \
external/hunspell/hunspell-solaris.patch \
external/hunspell/hunspell-1.3.2-overflow.patch \
external/hunspell/hunspell-android.patch \
external/hunspell/hunspell-1.3.2-nullptr.patch \
external/hunspell/hunspell-1.3.2-literal.patch \
external/hunspell/hunspell-1.3.2-compound.patch \
external/hunspell/hunspell.rhbz918938.patch \
external/hunspell/hunspell-wundef.patch.1 \
external/hunspell/hunspell-fdo48017-wfopen.patch \
))
......
--- misc/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2010-02-27 12:59:53.000000000 +0100
+++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2012-09-18 11:50:04.535803282 +0200
@@ -2125,7 +2125,7 @@
}
if (!rv) {
- if (onlycpdrule) break;
+ if (onlycpdrule && strlen(*result) > MAXLNLEN/10) break;
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
......@@ -7,5 +7,5 @@
-#define HUNSPELL_PIPE_HEADING "@(#) International Ispell Version 3.2.06 (but really Hunspell "VERSION")\n"
+#define HUNSPELL_PIPE_HEADING "@(#) International Ispell Version 3.2.06 (but really Hunspell " VERSION ")\n"
#define HUNSPELL_HEADING "Hunspell "
//for debugging only
#define ODF_EXT "odt|ott|odp|otp|odg|otg|ods|ots"
#define ENTITY_APOS "&apos;"
......@@ -18,25 +18,3 @@
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
--- misc/hunspell-1.3.2/src/tools/munch.c
+++ misc/build/hunspell-1.3.2/src/tools/munch.c
@@ -8,7 +8,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#ifdef __linux__
+#if defined(__linux__) && !defined(__ANDROID__)
#include <error.h>
#include <errno.h>
#include <sys/mman.h>
--- misc/hunspell-1.3.2/src/tools/unmunch.c
+++ misc/build/hunspell-1.3.2/src/tools/unmunch.c
@@ -8,7 +8,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#ifdef __linux__
+#if defined(__linux__) && !defined(__ANDROID__)
#include <error.h>
#include <errno.h>
#include <sys/mman.h>
diff -ru hunspell/src/hunspell/csutil.cxx build/hunspell/src/hunspell/csutil.cxx
--- hunspell/src/hunspell/csutil.cxx 2011-02-02 11:35:43.000000000 +0100
+++ build/hunspell/src/hunspell/csutil.cxx 2014-04-24 19:42:01.373285409 +0200
@@ -17,6 +17,11 @@
unsigned short clower;
};
+#ifdef _WIN32
+#include <windows.h>
+#include <wchar.h>
+#endif
+
#ifdef OPENOFFICEORG
# include <unicode/uchar.h>
#else
@@ -51,6 +51,26 @@
static struct unicode_info2 * utf_tbl = NULL;
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
+FILE * myfopen(const char * path, const char * mode) {
+#ifdef _WIN32
+#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
+ if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
+ int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
+ wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
@@ -57,9 +57,14 @@
if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
wchar_t *buff = (wchar_t *) malloc(len * sizeof(wchar_t));
+ wchar_t *buff2 = (wchar_t *) malloc(len * sizeof(wchar_t));
+ FILE * f = NULL;
+ MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
- FILE * f = _wfopen(buff, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
+ if (_wfullpath( buff2, buff, len ) != NULL) {
+ f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
+ }
+ free(buff);
free(buff);
+ free(buff2);
+ return f;
+ }
+#endif
+ return fopen(path, mode);
+}
+
/* only UTF-16 (BMP) implementation */
char * u16_u8(char * dest, int size, const w_char * src, int srclen) {
signed char * u8 = (signed char *)dest;
diff -ru hunspell/src/hunspell/csutil.hxx build/hunspell/src/hunspell/csutil.hxx
--- hunspell/src/hunspell/csutil.hxx 2010-09-06 09:58:53.000000000 +0200
+++ build/hunspell/src/hunspell/csutil.hxx 2014-04-24 19:42:01.373285409 +0200
@@ -52,6 +52,9 @@
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
+// fopen or optional _wfopen to fix long pathname problem of WIN32
+LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
+
// convert UTF-16 characters to UTF-8
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
diff -ru hunspell/src/hunspell/dictmgr.cxx build/hunspell/src/hunspell/dictmgr.cxx
--- hunspell/src/hunspell/dictmgr.cxx 2010-06-02 21:33:59.000000000 +0200
+++ build/hunspell/src/hunspell/dictmgr.cxx 2014-04-24 19:42:01.381285408 +0200
@@ -5,6 +5,7 @@
#include <stdio.h>
#include "dictmgr.hxx"
+#include "csutil.hxx"
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
{
@@ -57,7 +58,7 @@
// open the dictionary list file
FILE * dictlst;
- dictlst = fopen(dictpath,"r");
+ dictlst = myfopen(dictpath,"r");
if (!dictlst) {
return 1;
return f;
}
diff -ru hunspell/src/hunspell/filemgr.cxx build/hunspell/src/hunspell/filemgr.cxx
--- hunspell/src/hunspell/filemgr.cxx 2010-04-14 11:42:03.000000000 +0200
+++ build/hunspell/src/hunspell/filemgr.cxx 2014-04-25 00:44:05.049789586 +0200
@@ -6,6 +6,7 @@
#include <stdio.h>
#include "filemgr.hxx"
+#include "csutil.hxx"
int FileMgr::fail(const char * err, const char * par) {
fprintf(stderr, err, par);
@@ -15,7 +16,7 @@
FileMgr::FileMgr(const char * file, const char * key) {
linenum = 0;
hin = NULL;
- fin = fopen(file, "r");
+ fin = myfopen(file, "r");
if (!fin) {
// check hzipped file
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
diff -ru hunspell/src/hunspell/hunzip.cxx build/hunspell/src/hunspell/hunzip.cxx
--- hunspell/src/hunspell/hunzip.cxx 2010-04-27 16:07:14.000000000 +0200
+++ build/hunspell/src/hunspell/hunzip.cxx 2014-04-24 19:42:01.381285408 +0200
@@ -3,6 +3,7 @@
#include <stdio.h>
#include "hunzip.hxx"
+#include "csutil.hxx"
#define CODELEN 65536
#define BASEBITREC 5000
@@ -38,7 +38,7 @@
if (!filename) return -1;
- fin = fopen(filename, "rb");
+ fin = myfopen(filename, "rb");
if (!fin) return -1;
// read magic number
#endif
--- misc/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2010-06-17 15:56:41.000000000 +0200
+++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.cxx 2011-02-10 20:47:22.000000000 +0100
@@ -48,6 +48,7 @@
compoundroot = FLAG_NULL; // compound word signing flag
compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
+ compoundmoresuffixes = 0; // allow more suffixes within compound words
checkcompounddup = 0; // forbid double words in compounds
checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
@@ -404,6 +405,10 @@
}
}
+ if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
+ compoundmoresuffixes = 1;
+ }
+
if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
checkcompounddup = 1;
}
@@ -1626,8 +1631,9 @@
if (onlycpdrule) break;
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
- if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
- FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
+ if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+ FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
sfx->getCont() &&
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
sfx->getContLen())) || (compoundend &&
@@ -1640,9 +1646,11 @@
if (rv ||
(((wordnum == 0) && compoundbegin &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
((wordnum > 0) && compoundmiddle &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
) checked_prefix = 1;
// else check forbiddenwords and needaffix
@@ -2118,8 +2126,9 @@
if (onlycpdrule) break;
if (compoundflag &&
!(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
- if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
- FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
+ if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+ FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
sfx->getCont() &&
((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
sfx->getContLen())) || (compoundend &&
@@ -2132,9 +2141,11 @@
if (rv ||
(((wordnum == 0) && compoundbegin &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffix+compound
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
((wordnum > 0) && compoundmiddle &&
((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
+ (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffix+compound
(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
) {
// char * p = prefix_check_morph(st, i, 0, compound);
--- misc/hunspell-1.3.2/src/hunspell/affixmgr.hxx 2010-06-17 15:56:41.000000000 +0200
+++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.hxx 2011-02-10 20:47:22.000000000 +0100
@@ -41,6 +41,7 @@
FLAG compoundroot;
FLAG compoundforbidflag;
FLAG compoundpermitflag;
+ int compoundmoresuffixes;
int checkcompounddup;
int checkcompoundrep;
int checkcompoundcase;
--- a/src/hunspell/hunvisapi.h.in 2013-04-01 14:41:16.507546705 +0200
+++ b/src/hunspell/hunvisapi.h.in 2013-04-01 14:41:24.537547190 +0200
@@ -9,7 +9,7 @@
# else
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
# endif
-#elif BUILDING_LIBHUNSPELL && @HAVE_VISIBILITY@
+#elif defined(BUILDING_LIBHUNSPELL) && @HAVE_VISIBILITY@
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
#else
# define LIBHUNSPELL_DLL_EXPORTED
--- misc/hunspell-1.3.2/src/hunspell/hunspell.cxx 2011-02-02 12:04:29.000000000 +0000
+++ misc/build/hunspell-1.3.2/src/hunspell/hunspell.cxx 2013-03-13 16:50:50.667928521 +0000
@@ -12,6 +12,8 @@
#endif
#include "csutil.hxx"
+#include <string>
+
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
{
encoding = NULL;
@@ -1710,6 +1712,19 @@
return n;
}
+namespace
+{
+ void myrep(std::string& str, const std::string& search, const std::string& replace)
+ {
+ size_t pos = 0;
+ while ((pos = str.find(search, pos)) != std::string::npos)
+ {
+ str.replace(pos, search.length(), replace);
+ pos += replace.length();
+ }
+ }
+}
+
int Hunspell::spellml(char*** slst, const char * word)
{
char *q, *q2;
@@ -1721,26 +1736,26 @@
q2 = strstr(q2, "<word");
if (!q2) return 0; // bad XML input
if (check_xml_par(q, "type=", "analyze")) {
- int n = 0, s = 0;
+ int n = 0;
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
if (n == 0) return 0;
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
- for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
- char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
- if (!r) return 0;
- strcpy(r, "<code>");
+ std::string r;
+ r.append("<code>");
for (int i = 0; i < n; i++) {
- int l = strlen(r);
- strcpy(r + l, "<a>");
- strcpy(r + l + 3, (*slst)[i]);
- mystrrep(r + l + 3, "\t", " ");
- mystrrep(r + l + 3, "<", "&lt;");
- mystrrep(r + l + 3, "&", "&amp;");
- strcat(r, "</a>");
+ r.append("<a>");
+
+ std::string entry((*slst)[i]);
free((*slst)[i]);
+ myrep(entry, "\t", " ");
+ myrep(entry, "&", "&amp;");
+ myrep(entry, "<", "&lt;");
+ r.append(entry);
+
+ r.append("</a>");
}
- strcat(r, "</code>");
- (*slst)[0] = r;
+ r.append("</code>");
+ (*slst)[0] = mystrdup(r.c_str());
return 1;
} else if (check_xml_par(q, "type=", "stem")) {
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment