Kaydet (Commit) 27aa4dcc authored tarafından Caolán McNamara's avatar Caolán McNamara

bump hyphen to 2.8.3

üst 5c1dc778
--- misc/build/hyphen-2.7.1/hyphen.c.old 2011-10-10 15:58:33.317260138 +0200
+++ misc/build/hyphen-2.7.1/hyphen.c 2011-10-10 15:58:55.221260136 +0200
@@ -226,115 +226,61 @@
}
#ifdef VERBOSE
-HashTab *global;
+HashTab *global[1];
static char *
-get_state_str (int state)
+get_state_str (int state, int level)
{
int i;
HashEntry *e;
for (i = 0; i < HASH_SIZE; i++)
- for (e = global->entries[i]; e; e = e->next)
+ for (e = global[level]->entries[i]; e; e = e->next)
if (e->val == state)
return e->key;
return NULL;
}
#endif
-HyphenDict *
-hnj_hyphen_load (const char *fn)
-{
- HyphenDict *dict[2];
- HashTab *hashtab;
- FILE *f;
- char buf[MAX_CHARS];
+void hnj_hyphen_load_line(char * buf, HyphenDict * dict, HashTab * hashtab) {
+ int i, j;
char word[MAX_CHARS];
char pattern[MAX_CHARS];
char * repl;
signed char replindex;
signed char replcut;
- int state_num = 0, last_state;
- int i, j, k;
+ int state_num = 0;
+ int last_state;
char ch;
int found;
- HashEntry *e;
- int nextlevel = 0;
-
- f = fopen (fn, "r");
- if (f == NULL)
- return NULL;
-
-// loading one or two dictionaries (separated by NEXTLEVEL keyword)
-for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
- hashtab = hnj_hash_new ();
-#ifdef VERBOSE
- global = hashtab;
-#endif
- hnj_hash_insert (hashtab, "", 0);
- dict[k] = hnj_malloc (sizeof(HyphenDict));
- dict[k]->num_states = 1;
- dict[k]->states = hnj_malloc (sizeof(HyphenState));
- dict[k]->states[0].match = NULL;
- dict[k]->states[0].repl = NULL;
- dict[k]->states[0].fallback_state = -1;
- dict[k]->states[0].num_trans = 0;
- dict[k]->states[0].trans = NULL;
- dict[k]->nextlevel = NULL;
- dict[k]->lhmin = 0;
- dict[k]->rhmin = 0;
- dict[k]->clhmin = 0;
- dict[k]->crhmin = 0;
- dict[k]->nohyphen = NULL;
- dict[k]->nohyphenl = 0;
-
- /* read in character set info */
- if (k == 0) {
- for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
- fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
- for (i=0;i<MAX_NAME;i++)
- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
- dict[k]->cset[i] = 0;
- dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
- } else {
- strcpy(dict[k]->cset, dict[0]->cset);
- dict[k]->utf8 = dict[0]->utf8;
- }
- while (fgets (buf, sizeof(buf), f) != NULL)
- {
- if (buf[0] != '%')
- {
- if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
- nextlevel = 1;
- break;
- } else if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
- dict[k]->lhmin = atoi(buf + 13);
- continue;
+ if (strncmp(buf, "LEFTHYPHENMIN", 13) == 0) {
+ dict->lhmin = atoi(buf + 13);
+ return;
} else if (strncmp(buf, "RIGHTHYPHENMIN", 14) == 0) {
- dict[k]->rhmin = atoi(buf + 14);
- continue;
+ dict->rhmin = atoi(buf + 14);
+ return;
} else if (strncmp(buf, "COMPOUNDLEFTHYPHENMIN", 21) == 0) {
- dict[k]->clhmin = atoi(buf + 21);
- continue;
+ dict->clhmin = atoi(buf + 21);
+ return;
} else if (strncmp(buf, "COMPOUNDRIGHTHYPHENMIN", 22) == 0) {
- dict[k]->crhmin = atoi(buf + 22);
- continue;
+ dict->crhmin = atoi(buf + 22);
+ return;
} else if (strncmp(buf, "NOHYPHEN", 8) == 0) {
char * space = buf + 8;
while (*space != '\0' && (*space == ' ' || *space == '\t')) space++;
- if (*buf != '\0') dict[k]->nohyphen = hnj_strdup(space);
- if (dict[k]->nohyphen) {
- char * nhe = dict[k]->nohyphen + strlen(dict[k]->nohyphen) - 1;
+ if (*buf != '\0') dict->nohyphen = hnj_strdup(space);
+ if (dict->nohyphen) {
+ char * nhe = dict->nohyphen + strlen(dict->nohyphen) - 1;
*nhe = 0;
- for (nhe = nhe - 1; nhe > dict[k]->nohyphen; nhe--) {
+ for (nhe = nhe - 1; nhe > dict->nohyphen; nhe--) {
if (*nhe == ',') {
- dict[k]->nohyphenl++;
+ dict->nohyphenl++;
*nhe = 0;
}
}
}
- continue;
+ return;
}
j = 0;
pattern[j] = '0';
@@ -379,7 +325,7 @@
} else {
if (*word == '.') i++;
/* convert UTF-8 char. positions of discretionary hyph. replacements to 8-bit */
- if (dict[k]->utf8) {
+ if (dict->utf8) {
int pu = -1; /* unicode character position */
int ps = -1; /* unicode start position (original replindex) */
int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
@@ -403,14 +349,14 @@
printf ("word %s pattern %s, j = %d repl: %s\n", word, pattern + i, j, repl);
#endif
found = hnj_hash_lookup (hashtab, word);
- state_num = hnj_get_state (dict[k], hashtab, word);
- dict[k]->states[state_num].match = hnj_strdup (pattern + i);
- dict[k]->states[state_num].repl = repl;
- dict[k]->states[state_num].replindex = replindex;
+ state_num = hnj_get_state (dict, hashtab, word);
+ dict->states[state_num].match = hnj_strdup (pattern + i);
+ dict->states[state_num].repl = repl;
+ dict->states[state_num].replindex = replindex;
if (!replcut) {
- dict[k]->states[state_num].replcut = (signed char) strlen(word);
+ dict->states[state_num].replcut = (signed char) strlen(word);
} else {
- dict[k]->states[state_num].replcut = replcut;
+ dict->states[state_num].replcut = replcut;
}
/* now, put in the prefix transitions */
@@ -420,11 +366,82 @@
ch = word[j - 1];
word[j - 1] = '\0';
found = hnj_hash_lookup (hashtab, word);
- state_num = hnj_get_state (dict[k], hashtab, word);
- hnj_add_trans (dict[k], state_num, last_state, ch);
+ state_num = hnj_get_state (dict, hashtab, word);
+ hnj_add_trans (dict, state_num, last_state, ch);
}
- }
+}
+
+HyphenDict *
+hnj_hyphen_load (const char *fn)
+{
+ HyphenDict *dict[2];
+ HashTab *hashtab;
+ FILE *f;
+ char buf[MAX_CHARS];
+ int nextlevel = 0;
+ int i, j, k;
+ HashEntry *e;
+ int state_num = 0;
+
+ f = fopen (fn, "r");
+ if (f == NULL)
+ return NULL;
+
+// loading one or two dictionaries (separated by NEXTLEVEL keyword)
+for (k = 0; k < 2; k++) {
+ hashtab = hnj_hash_new ();
+#ifdef VERBOSE
+ global[k] = hashtab;
+#endif
+ hnj_hash_insert (hashtab, "", 0);
+ dict[k] = hnj_malloc (sizeof(HyphenDict));
+ dict[k]->num_states = 1;
+ dict[k]->states = hnj_malloc (sizeof(HyphenState));
+ dict[k]->states[0].match = NULL;
+ dict[k]->states[0].repl = NULL;
+ dict[k]->states[0].fallback_state = -1;
+ dict[k]->states[0].num_trans = 0;
+ dict[k]->states[0].trans = NULL;
+ dict[k]->nextlevel = NULL;
+ dict[k]->lhmin = 0;
+ dict[k]->rhmin = 0;
+ dict[k]->clhmin = 0;
+ dict[k]->crhmin = 0;
+ dict[k]->nohyphen = NULL;
+ dict[k]->nohyphenl = 0;
+
+ /* read in character set info */
+ if (k == 0) {
+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
+ for (i=0;i<MAX_NAME;i++)
+ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+ dict[k]->cset[i] = 0;
+ dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
+ } else {
+ strcpy(dict[k]->cset, dict[0]->cset);
+ dict[k]->utf8 = dict[0]->utf8;
+ }
+
+ if (k == 0 || nextlevel) {
+ while (fgets (buf, sizeof(buf), f) != NULL) {
+ if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
+ nextlevel = 1;
+ break;
+ } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
+ }
+ } else if (k == 1) {
+ /* default first level: hyphen and ASCII apostrophe */
+ if (!dict[0]->utf8) hnj_hyphen_load_line("NOHYPHEN '\n", dict[k], hashtab);
+ else hnj_hyphen_load_line("NOHYPHEN ',\xe2\x80\x93,\xe2\x80\x99\n", dict[k], hashtab);
+ strcpy(buf, "1-1\n"); // buf rewritten by hnj_hyphen_load here
+ hnj_hyphen_load_line(buf, dict[k], hashtab); /* remove hyphen */
+ hnj_hyphen_load_line("1'1\n", dict[k], hashtab); /* ASCII apostrophe */
+ if (dict[0]->utf8) {
+ hnj_hyphen_load_line("1\xe2\x80\x93" "1\n", dict[k], hashtab); /* endash */
+ hnj_hyphen_load_line("1\xe2\x80\x99" "1\n", dict[k], hashtab); /* apostrophe */
}
+ }
/* Could do unioning of matches here (instead of the preprocessor script).
If we did, the pseudocode would look something like this:
@@ -476,7 +493,20 @@
state_num = 0;
}
fclose(f);
- if (k == 2) dict[0]->nextlevel = dict[1];
+ if (nextlevel) dict[0]->nextlevel = dict[1];
+ else {
+ dict[1] -> nextlevel = dict[0];
+ dict[1]->lhmin = dict[0]->lhmin;
+ dict[1]->rhmin = dict[0]->rhmin;
+ dict[1]->clhmin = (dict[0]->clhmin) ? dict[0]->clhmin : ((dict[0]->lhmin) ? dict[0]->lhmin : 3);
+ dict[1]->crhmin = (dict[0]->crhmin) ? dict[0]->crhmin : ((dict[0]->rhmin) ? dict[0]->rhmin : 3);
+#ifdef VERBOSE
+ HashTab *r = global[0];
+ global[0] = global[1];
+ global[1] = r;
+#endif
+ return dict[1];
+ }
return dict[0];
}
@@ -527,8 +557,13 @@
j = 0;
prep_word[j++] = '.';
- for (i = 0; i < word_size; i++)
+ for (i = 0; i < word_size; i++) {
+ if (word[i] <= '9' && word[i] >= '0') {
+ prep_word[j++] = '.';
+ } else {
prep_word[j++] = word[i];
+ }
+ }
prep_word[j++] = '.';
prep_word[j] = '\0';
@@ -557,7 +592,7 @@
#ifdef VERBOSE
char *state_str;
- state_str = get_state_str (state);
+ state_str = get_state_str (state, 0);
for (k = 0; k < i - strlen (state_str); k++)
putchar (' ');
@@ -670,6 +705,9 @@
i += hnj_ligature(word[2]);
}
+ // ignore numbers
+ for (j = 0; word[j] <= '9' && word[j] >= '0'; j++) i--;
+
for (j = 0; i < lhmin && word[j] != '\0'; i++) do {
// check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) {
@@ -696,9 +734,13 @@
int hnj_hyphen_rhmin(int utf8, const char *word, int word_size, char * hyphens,
char *** rep, int ** pos, int ** cut, int rhmin)
{
- int i;
- int j = word_size - 2;
- for (i = 1; i < rhmin && j > 0; j--) {
+ int i = 1;
+ int j;
+
+ // ignore numbers
+ for (j = word_size - 1; j > 0 && word[j] <= '9' && word[j] >= '0'; j--) i--;
+
+ for (j = word_size - 2; i < rhmin && j > 0; j--) {
// check length of the non-standard part
if (*rep && *pos && *cut && (*rep)[j]) {
char * rh = strchr((*rep)[j], '=');
@@ -756,8 +798,15 @@
j = 0;
prep_word[j++] = '.';
- for (i = 0; i < word_size; i++)
+ for (i = 0; i < word_size; i++) {
+ if (word[i] <= '9' && word[i] >= '0') {
+ prep_word[j++] = '.';
+ } else {
prep_word[j++] = word[i];
+ }
+ }
+
+
prep_word[j++] = '.';
prep_word[j] = '\0';
@@ -786,7 +835,7 @@
#ifdef VERBOSE
char *state_str;
- state_str = get_state_str (state);
+ state_str = get_state_str (state, 1);
for (k = 0; k < i - strlen (state_str); k++)
putchar (' ');
@@ -1033,6 +1082,9 @@
}
}
hyphens[j + 1] = '\0';
+#ifdef VERBOSE
+ printf ("nums: %s\n", hyphens);
+#endif
return 0;
}
@@ -1074,8 +1126,8 @@
for (nhi = 0; nhi <= dict->nohyphenl; nhi++) {
char * nhy = (char *) strstr(word, nh);
while (nhy) {
- hyphens[nhy - word + strlen(nh) - 1] = 0;
- if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = 0;
+ hyphens[nhy - word + strlen(nh) - 1] = '0';
+ if (nhy - word - 1 >= 0) hyphens[nhy - word - 1] = '0';
nhy = (char *) strstr(nhy + 1, nh);
}
nh = nh + strlen(nh) + 1;
@@ -1084,6 +1136,9 @@
if (hyphword) hnj_hyphen_hyphword(word, word_size, hyphens, hyphword, rep, pos, cut);
if (dict->utf8) return hnj_hyphen_norm(word, word_size, hyphens, rep, pos, cut);
+#ifdef VERBOSE
+ printf ("nums: %s\n", hyphens);
+#endif
return 0;
}
@@ -1093,8 +1148,10 @@
char *hyphword, char *** rep, int ** pos, int ** cut,
int lhmin, int rhmin, int clhmin, int crhmin)
{
- lhmin = (lhmin > 0 ? lhmin : dict->lhmin);
- rhmin = (rhmin > 0 ? rhmin : dict->rhmin);
+ lhmin = (lhmin > dict->lhmin) ? lhmin : dict->lhmin;
+ rhmin = (rhmin > dict->rhmin) ? rhmin : dict->rhmin;
+ clhmin = (clhmin > dict->clhmin) ? clhmin : dict->clhmin;
+ crhmin = (crhmin > dict->crhmin) ? crhmin : dict->crhmin;
hnj_hyphen_hyph_(dict, word, word_size, hyphens, rep, pos, cut,
clhmin, crhmin, 1, 1);
hnj_hyphen_lhmin(dict->utf8, word, word_size, hyphens,
--- misc/hyphen-2.7.1/hyphen.c 2010-12-01 01:47:22.000000000 +0100
+++ misc/build/hyphen-2.7.1/hyphen.c 2011-01-18 16:26:50.953125000 +0100
@@ -291,13 +291,10 @@
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
- for (i=0;i<MAX_NAME;i++)
- if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
- dict[k]->cset[i] = 0;
- } else {
- dict[k]->cset[0] = 0;
- }
+ fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
+ for (i=0;i<MAX_NAME;i++)
+ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+ dict[k]->cset[i] = 0;
dict[k]->utf8 = (strcmp(dict[k]->cset, "UTF-8") == 0);
} else {
strcpy(dict[k]->cset, dict[0]->cset);
--- misc/hyphen-2.7.1/config.sub
+++ misc/build/hyphen-2.7.1/config.sub
--- misc/hyphen-2.8.3/config.sub
+++ misc/build/hyphen-2.8.3/config.sub
@@ -120,7 +120,7 @@
# Here we must recognize all the valid KERNEL-OS combinations.
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
......
--- misc/hyphen-2.7.1/Makefile.am 2010-07-19 11:23:17.000000000 +0200
+++ misc/build/hyphen-2.7.1/Makefile.am 2010-12-02 10:15:44.390625000 +0100
@@ -25,14 +25,13 @@
--- misc/hyphen-2.8.3/Makefile.am 2010-07-19 11:23:17.000000000 +0200
+++ misc/build/hyphen-2.8.3/Makefile.am 2010-12-02 10:15:44.390625000 +0100
@@ -25,13 +25,13 @@
hyphen.us3:
cp -f $(srcdir)/hyphen.tex hyphen.us
......@@ -11,16 +11,15 @@
cat $(srcdir)/ligpatch.txt >>hyphen.us3
-hyph_en_US.dic: hyphen.us3
- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
+hyph_en_US.dic:
+ @echo "hyph_en_US.txt distributed with Hyphen library"
clean-local:
rm -rf hyphen.us* hyph_en_US.dic
--- misc/hyphen-2.7.1/Makefile.in 2010-12-01 02:31:29.000000000 +0100
+++ misc/build/hyphen-2.7.1/Makefile.in 2010-12-02 10:17:16.546875000 +0100
@@ -940,14 +940,13 @@
--- misc/hyphen-2.8.3/Makefile.in 2010-12-01 02:31:29.000000000 +0100
+++ misc/build/hyphen-2.8.3/Makefile.in 2010-12-02 10:17:16.546875000 +0100
@@ -940,13 +940,13 @@
hyphen.us3:
cp -f $(srcdir)/hyphen.tex hyphen.us
......@@ -31,15 +30,14 @@
cat $(srcdir)/ligpatch.txt >>hyphen.us3
-hyph_en_US.dic: hyphen.us3
- perl $(srcdir)/substrings.pl hyphen.us3 hyphen.us4 UTF-8 2 3 >/dev/null
- cat hyphen.us4 | $(SED) -f $(srcdir)/ooopatch.sed >hyph_en_US.dic
- perl $(srcdir)/substrings.pl hyphen.us3 hyph_en_US.dic UTF-8 2 3 >/dev/null
+hyph_en_US.dic:
+ @echo "hyph_en_US.txt distributed with Hyphen library"
clean-local:
rm -rf hyphen.us* hyph_en_US.dic
--- misc/hyphen-2.7.1/makefile.mk 2010-12-02 10:35:40.265625000 +0100
+++ misc/build/hyphen-2.7.1/makefile.mk 2010-12-02 10:25:45.750000000 +0100
--- misc/hyphen-2.8.3/makefile.mk 2010-12-02 10:35:40.265625000 +0100
+++ misc/build/hyphen-2.8.3/makefile.mk 2010-12-02 10:25:45.750000000 +0100
@@ -1 +1,28 @@
-dummy
+PRJ = ..$/..$/..$/..
......
......@@ -36,15 +36,13 @@ TARGET=hyphen
# --- Files --------------------------------------------------------
TARFILE_NAME=hyphen-2.7.1
TARFILE_MD5=48a9f787f43a09c0a9b7b00cd1fddbbf
TARFILE_NAME=hyphen-2.8.3
TARFILE_MD5=86261f06c097d3e425a2f6d0b0635380
ADDITIONAL_FILES += makefile.mk
PATCH_FILES= \
hyphen-2.7.1.patch \
hyphen-2.7.1-read-charset.patch \
hyphen-2.7.1-2.8.3.patch \
hyphen-build.patch \
hyphen-android.patch
.IF "$(GUI)"=="UNX"
......
..\%__SRC%\slb\hyphen.lib %_DEST%\lib\hyphen.lib
..\%__SRC%\inc\hyphen.h %_DEST%\inc\hyphen.h
..\%__SRC%\misc\build\hyphen-2.7.1\.libs\libhyphen.a %_DEST%\lib\libhyphen.a
..\%__SRC%\misc\build\hyphen-2.7.1\hyph_en_US.dic %_DEST%\bin\hyph_en_US.dic
..\%__SRC%\misc\build\hyphen-2.8.3\.libs\libhyphen.a %_DEST%\lib\libhyphen.a
..\%__SRC%\misc\build\hyphen-2.8.3\hyph_en_US.dic %_DEST%\bin\hyph_en_US.dic
http://dev-www.libreoffice.org/src
48a9f787f43a09c0a9b7b00cd1fddbbf-hyphen-2.7.1.tar.gz
86261f06c097d3e425a2f6d0b0635380-hyphen-2.8.3.tar.gz
63ddc5116488985e820075e65fbe6aa4-openssl-0.9.8o.tar.gz
db5ffcd50064421176e8afb7b85fd1a7-pixman-0.24.0.tar.bz2
0b49ede71c21c0599b0cc19b353a6cb3-README_apache-commons.txt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment