Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
core
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
LibreOffice
core
Commits
bebbe972
Kaydet (Commit)
bebbe972
authored
Eyl 12, 2011
tarafından
Caolán McNamara
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
wtratree unbuilt
üst
f2306b94
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
581 deletions
+0
-581
wtratree.hxx
l10ntools/inc/wtratree.hxx
+0
-161
wtratree.cxx
l10ntools/source/wtratree.cxx
+0
-420
No files found.
l10ntools/inc/wtratree.hxx
deleted
100644 → 0
Dosyayı görüntüle @
f2306b94
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
*
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
#ifndef TX3_WTRATREE_HXX
#define TX3_WTRATREE_HXX
// USED
// Base Classes
// Components
// Parameters
#include <tools/string.hxx>
const
INT16
C_NR_OF_WTT_RESULTS
=
5
;
const
INT16
C_NR_OF_POSSIBLE_CHARS
=
256
;
typedef
unsigned
char
u_char
;
typedef
const
char
*
constr
;
class
WTT_Node
;
/** @task
This class implements the functionality, that class WordTransformer
offers.
WordTransformer is dependant of this class, but NOT the other way!
**/
class
WordTransTree
{
public
:
enum
E_Result
{
OK
=
0
,
HOTKEY_LOST
,
OUTPUT_OVERFLOW
};
// LIFECYCLE
WordTransTree
(
CharSet
i_nWorkingCharSet
=
RTL_TEXTENCODING_MS_1252
);
void
SetCharSet
(
CharSet
i_nWorkingCharSet
);
~
WordTransTree
();
void
AddWordPair
(
const
ByteString
&
i_sOldString
,
const
ByteString
&
i_sReplaceString
);
// OPERATIONS
void
InitTransformation
(
const
char
*
i_sInput
,
/// [!=0], a range of i_nInputLength must be valid memory for read.
UINT32
i_nInputLength
,
UINT32
i_nOutputMaxLength
=
STRING_MAXLEN
-
12
);
E_Result
TransformNextToken
();
// INQUIRY
sal_Bool
TextEndReached
()
const
;
const
char
*
Output
()
const
;
// These 3 functions are valid between two calls of
// TransformNextToken():
E_Result
CurResult
()
const
;
ByteString
CurReplacedString
()
const
;
ByteString
CurReplacingString
()
const
;
char
CurHotkey
()
const
;
private
:
// SERVICE FUNCTONS
UINT8
CalculateBranch
(
u_char
i_cInputChar
)
const
;
void
Handle_Hotkey
();
void
Handle_TokenToKeep
();
void
Handle_TokenToTransform
();
// DATA
// Fixed data
const
u_char
*
sInput
;
UINT32
nInputLength
;
const
u_char
*
pInputEnd
;
u_char
*
sOutput
;
// DYN
UINT32
nOutputMaxLength
;
WTT_Node
*
dpParsingTreeTop
;
// DYN
WTT_Node
*
pUnknownAlpha
;
u_char
cChar2Branch
[
C_NR_OF_POSSIBLE_CHARS
];
u_char
c_AE
,
c_OE
,
c_UE
,
c_ae
,
c_oe
,
c_ue
;
// Working data
const
u_char
*
pInputCurTokenStart
;
const
u_char
*
pInputPosition
;
u_char
*
pOutputPosition
;
WTT_Node
*
pCurParseNode
;
// Data which are valid only after a completed call to TransformNextToken()
E_Result
eCurResult
;
u_char
cCurHotkey
;
// Letter wich is used as hotkey
u_char
cCurHotkeySign
;
// Letter which is used to assign hotkey ('~'or '&') .
};
inline
sal_Bool
WordTransTree
::
TextEndReached
()
const
{
return
pInputPosition
==
pInputEnd
;
}
inline
const
char
*
WordTransTree
::
Output
()
const
{
return
TextEndReached
()
?
(
constr
)
sOutput
:
""
;
}
inline
WordTransTree
::
E_Result
WordTransTree
::
CurResult
()
const
{
return
eCurResult
;
}
inline
ByteString
WordTransTree
::
CurReplacedString
()
const
{
return
ByteString
((
constr
)
pInputCurTokenStart
,
pInputPosition
-
pInputCurTokenStart
);
}
inline
char
WordTransTree
::
CurHotkey
()
const
{
return
cCurHotkey
;
}
inline
UINT8
WordTransTree
::
CalculateBranch
(
u_char
i_cInputChar
)
const
{
return
cChar2Branch
[
i_cInputChar
];
}
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
l10ntools/source/wtratree.cxx
deleted
100644 → 0
Dosyayı görüntüle @
f2306b94
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
*
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_l10ntools.hxx"
#include "wtratree.hxx"
/** @ATTENTION
For reasons of speed, class WordTransTree works with two simple
char arrays, sOutput and sInput, instead of secure containers or
streams. So be extremely careful, when changing this code!!!
**/
// NOT FULLY DECLARED SERVICES
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include "wtranode.hxx"
const
BRANCH_T
BR_END
=
0
;
const
BRANCH_T
BR_NONALPHA
=
1
;
const
BRANCH_T
BR_HOTKEY
=
2
;
const
BRANCH_T
BR_BACKSLASH
=
3
;
const
BRANCH_T
BR_ALPHABASE
=
4
;
/// @ATTENTION All branches not valid for words must be smaller than this value!
const
BRANCH_T
BR_AE
=
30
;
const
BRANCH_T
BR_OE
=
31
;
const
BRANCH_T
BR_UE
=
32
;
const
BRANCH_T
BR_SZ
=
33
;
const
BRANCH_T
BR_MAX
=
34
;
/// @ATTENTION Must be updated always!
const
BRANCH_T
BR_START
=
0
;
WordTransTree
::
WordTransTree
(
CharSet
i_nWorkingCharSet
)
:
sInput
(
0
),
nInputLength
(
0
),
pInputEnd
(
0
),
sOutput
(
0
),
nOutputMaxLength
(
0
),
dpParsingTreeTop
(
0
),
pUnknownAlpha
(
0
),
// cChar2Branch
c_AE
(
u_char
(
'\xC4'
)),
c_OE
(
u_char
(
'\xD6'
)),
c_UE
(
u_char
(
'\xDC'
)),
c_ae
(
u_char
(
'\xE4'
)),
c_oe
(
u_char
(
'\xF6'
)),
c_ue
(
u_char
(
'\xFC'
)),
pInputCurTokenStart
(
0
),
pInputPosition
(
0
),
pOutputPosition
(
0
),
pCurParseNode
(
0
),
eCurResult
(
OK
),
cCurHotkey
(
0
),
cCurHotkeySign
(
u_char
(
'~'
))
{
// Initialize parsing tree:
pUnknownAlpha
=
new
WTT_Node
(
BR_ALPHABASE
,
0
,
0
);
// This will be deleted as part of the parsing tree.
for
(
UINT8
i
=
BR_ALPHABASE
;
i
<
C_NR_OF_BRANCHES
;
i
++
)
{
pUnknownAlpha
->
SetBranch
(
i
,
pUnknownAlpha
);
}
// end for
dpParsingTreeTop
=
new
WTT_Node
(
BR_START
,
0
,
pUnknownAlpha
);
WTT_Node
*
dpNonAlpha
=
new
WTT_Node
(
BR_NONALPHA
,
0
,
0
);
dpNonAlpha
->
SetBranch
(
BR_NONALPHA
,
dpNonAlpha
);
dpParsingTreeTop
->
SetBranch
(
BR_NONALPHA
,
dpNonAlpha
);
WTT_Node
*
dpBackslash
=
new
WTT_Node
(
BR_BACKSLASH
,
dpNonAlpha
,
dpNonAlpha
);
dpBackslash
->
SetBranch
(
BR_END
,
0
);
dpParsingTreeTop
->
SetBranch
(
BR_BACKSLASH
,
dpBackslash
);
dpNonAlpha
->
SetBranch
(
BR_BACKSLASH
,
dpBackslash
);
// Initialize character set:
SetCharSet
(
i_nWorkingCharSet
);
if
(
C_BR_ALPHABASE
!=
BR_ALPHABASE
||
C_NR_OF_BRANCHES
!=
BR_MAX
)
{
fprintf
(
stderr
,
"Assertion failed: file %s line %d."
,
__FILE__
,
__LINE__
);
exit
(
1
);
}
}
void
WordTransTree
::
SetCharSet
(
CharSet
i_nWorkingCharSet
)
{
ByteString
sConvert
(
"
\xC4\xD6\xDC\xE4\xF6\xFC\xDF
"
);
const
u_char
*
pConvert
=
(
const
u_char
*
)
(
sConvert
.
Convert
(
RTL_TEXTENCODING_MS_1252
,
i_nWorkingCharSet
).
GetBuffer
()
);
INT16
i
=
0
;
for
(
;
i
<
C_NR_OF_POSSIBLE_CHARS
;
++
i
)
{
cChar2Branch
[
i
]
=
BR_NONALPHA
;
}
// end for
for
(
i
=
'a'
;
i
<=
'z'
;
++
i
)
{
cChar2Branch
[
i
]
=
BR_ALPHABASE
+
i
-
'a'
;
}
// end for
for
(
i
=
'A'
;
i
<=
'Z'
;
++
i
)
{
cChar2Branch
[
i
]
=
BR_ALPHABASE
+
i
-
'A'
;
}
// end for
cChar2Branch
[
pConvert
[
0
]]
=
BR_AE
;
cChar2Branch
[
pConvert
[
1
]]
=
BR_OE
;
cChar2Branch
[
pConvert
[
2
]]
=
BR_UE
;
cChar2Branch
[
pConvert
[
3
]]
=
BR_AE
;
cChar2Branch
[
pConvert
[
4
]]
=
BR_OE
;
cChar2Branch
[
pConvert
[
5
]]
=
BR_UE
;
cChar2Branch
[
pConvert
[
6
]]
=
BR_SZ
;
cChar2Branch
[
u_char
(
'~'
)]
=
BR_HOTKEY
;
cChar2Branch
[
u_char
(
'&'
)]
=
BR_HOTKEY
;
c_AE
=
pConvert
[
0
];
c_OE
=
pConvert
[
1
];
c_UE
=
pConvert
[
2
];
c_ae
=
pConvert
[
3
];
c_oe
=
pConvert
[
4
];
c_ue
=
pConvert
[
5
];
}
WordTransTree
::~
WordTransTree
()
{
delete
dpParsingTreeTop
;
if
(
sOutput
!=
0
)
delete
[]
sOutput
;
}
void
WordTransTree
::
AddWordPair
(
const
ByteString
&
i_sOldString
,
const
ByteString
&
i_sReplaceString
)
{
if
(
i_sOldString
.
Len
()
==
0
)
return
;
pCurParseNode
=
dpParsingTreeTop
;
WTT_Node
*
pBranch
=
0
;
char
cBranch
=
0
;
for
(
constr
pOld
=
i_sOldString
.
GetBuffer
();
*
pOld
!=
0
;
pOld
++
)
{
cBranch
=
CalculateBranch
(
*
pOld
);
pBranch
=
pCurParseNode
->
GetNextNode
(
cBranch
);
if
(
pBranch
==
0
||
pBranch
==
pUnknownAlpha
)
{
pBranch
=
new
WTT_Node
(
cBranch
,
0
,
pUnknownAlpha
);
pCurParseNode
->
SetBranch
(
cBranch
,
pBranch
);
}
pCurParseNode
=
pBranch
;
}
// end for
pCurParseNode
->
SetAsTokenToReplace
(
i_sReplaceString
);
}
void
WordTransTree
::
InitTransformation
(
const
char
*
i_sInput
,
UINT32
i_nInputLength
,
UINT32
i_nOutputMaxLength
)
{
sInput
=
(
const
u_char
*
)
i_sInput
;
nInputLength
=
i_nInputLength
;
pInputEnd
=
&
sInput
[
i_nInputLength
];
pInputCurTokenStart
=
sInput
;
pInputPosition
=
sInput
;
if
(
nOutputMaxLength
<
i_nOutputMaxLength
)
{
if
(
sOutput
!=
0
)
delete
[]
sOutput
;
sOutput
=
new
unsigned
char
[
i_nOutputMaxLength
];
nOutputMaxLength
=
i_nOutputMaxLength
;
}
pOutputPosition
=
sOutput
;
}
/** pInputCurTokenStart and CurParseNode are updated just when
starting this function. After its end they must not be changed
till this functon is called again.
Outside this function pInputPositon and pOutputPosition are both
on the first not transformed char in their respective array.
**/
WordTransTree
::
E_Result
WordTransTree
::
TransformNextToken
()
{
pInputCurTokenStart
=
pInputPosition
;
pCurParseNode
=
dpParsingTreeTop
;
cCurHotkey
=
0
;
eCurResult
=
OK
;
WTT_Node
*
pBranch
=
0
;
UINT8
cBranch
=
0
;
for
(
pCurParseNode
=
dpParsingTreeTop
;
pInputPosition
!=
pInputEnd
;
++
pInputPosition
)
{
cBranch
=
CalculateBranch
(
*
pInputPosition
);
pBranch
=
pCurParseNode
->
GetNextNode
(
cBranch
);
if
(
pBranch
!=
0
)
{
pCurParseNode
=
pBranch
;
}
else
{
if
(
cBranch
==
BR_HOTKEY
)
// current letter is '~' or '&'.
{
// Logic of the following. There are 9 possible cases -
// A = alphabetic letter, NA = non alphabetic, TB = token begin,
// Eot = end of text:
// 1. A~A set hotkey to following letter, continue
// 2. A~NA token end
// 3. A~Eot token end
// 4. NA~A token end
// 5. NA~NA continue
// 6. A~Eof continue
// 7. TB~A set hotkey to following letter, continue
// 8. TB~NA continue
// 9. TB~Eot continue
// bNext and Prev are true, if there are alphabetic letters:
sal_Bool
bNext
=
pInputPosition
+
1
!=
pInputEnd
?
CalculateBranch
(
pInputPosition
[
1
])
>=
BR_ALPHABASE
:
sal_False
;
sal_Bool
bPrev
=
pCurParseNode
->
Value
()
>=
BR_ALPHABASE
;
if
(
bNext
&&
(
bPrev
||
pCurParseNode
==
dpParsingTreeTop
)
)
{
// case 1. and 7.
Handle_Hotkey
();
continue
;
}
else
if
(
!
bPrev
&&
!
bNext
)
{
// case 5.,6.,8.,9.
continue
;
}
// Case 2.,3.,4. :
// so this should be handled as an end of a token.
}
if
(
pCurParseNode
->
TokenType
()
==
WTT_Node
::
token_to_keep
)
{
Handle_TokenToKeep
();
return
eCurResult
;
}
else
{
Handle_TokenToTransform
();
return
eCurResult
;
}
// endif (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
}
// endif (pBranch == 0) else
}
// end for
// If here, the text end is reached
if
(
pCurParseNode
->
TokenType
()
==
WTT_Node
::
token_to_keep
)
{
Handle_TokenToKeep
();
return
eCurResult
;
}
else
{
Handle_TokenToTransform
();
return
eCurResult
;
}
}
ByteString
WordTransTree
::
CurReplacingString
()
const
{
return
pCurParseNode
->
ReplaceString
();
}
void
WordTransTree
::
Handle_Hotkey
()
{
if
(
cCurHotkey
==
0
)
// Avoid to replace the first found hotkey by
// a later one - though this shouldn't happen anyway.
{
cCurHotkey
=
(
pInputPosition
+
1
)
!=
pInputEnd
?
pInputPosition
[
1
]
:
0
;
cCurHotkeySign
=
*
pInputPosition
;
}
}
void
WordTransTree
::
Handle_TokenToKeep
()
{
UINT32
nTokenLength
=
pInputPosition
-
pInputCurTokenStart
;
memcpy
(
pOutputPosition
,
pInputCurTokenStart
,
nTokenLength
);
pOutputPosition
+=
nTokenLength
;
*
pOutputPosition
=
'\0'
;
}
void
WordTransTree
::
Handle_TokenToTransform
()
{
sal_Bool
bHaveHotkey
=
CalculateBranch
(
cCurHotkey
)
>=
BR_ALPHABASE
;
const
ByteString
&
rReplace
=
pCurParseNode
->
ReplaceString
();
// Find position of hotkey in replace-string:
sal_uInt16
nHotkeyPos
=
bHaveHotkey
?
rReplace
.
Search
(
char
(
cCurHotkey
))
:
STRING_NOTFOUND
;
if
(
nHotkeyPos
==
STRING_NOTFOUND
&&
bHaveHotkey
)
{
if
(
cCurHotkey
<
128
)
{
if
(
islower
(
cCurHotkey
))
nHotkeyPos
=
rReplace
.
Search
(
toupper
(
char
(
cCurHotkey
)));
else
nHotkeyPos
=
rReplace
.
Search
(
tolower
(
char
(
cCurHotkey
)));
}
else
// cCurHotkey >= 128
{
if
(
cCurHotkey
==
c_ae
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_AE
));
else
if
(
cCurHotkey
==
c_oe
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_OE
));
else
if
(
cCurHotkey
==
c_ue
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_UE
));
else
if
(
cCurHotkey
==
c_AE
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_ae
));
else
if
(
cCurHotkey
==
c_OE
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_oe
));
else
if
(
cCurHotkey
==
c_UE
)
nHotkeyPos
=
rReplace
.
Search
(
char
(
c_ue
));
}
// endif (cCurHotkey < 128) else
if
(
nHotkeyPos
==
STRING_NOTFOUND
)
{
eCurResult
=
HOTKEY_LOST
;
bHaveHotkey
=
sal_False
;
}
}
// endif (nHotkeyPos == STRING_NOT_FOUND && bHaveHotkey)
UINT32
nOutputTokenLength
=
rReplace
.
Len
()
+
(
bHaveHotkey
?
1
:
0
);
if
(
bHaveHotkey
)
{
memcpy
(
pOutputPosition
,
pCurParseNode
->
ReplaceString
().
GetBuffer
(),
nHotkeyPos
);
*
(
pOutputPosition
+
nHotkeyPos
)
=
cCurHotkeySign
;
memcpy
(
pOutputPosition
+
nHotkeyPos
+
1
,
pCurParseNode
->
ReplaceString
().
GetBuffer
()
+
nHotkeyPos
,
nOutputTokenLength
-
nHotkeyPos
-
1
);
}
else
{
memcpy
(
pOutputPosition
,
pCurParseNode
->
ReplaceString
().
GetBuffer
(),
nOutputTokenLength
);
}
// Convert first letter into upper if necessary:
u_char
cInStart
=
CalculateBranch
(
*
pInputCurTokenStart
)
==
BR_HOTKEY
?
pInputCurTokenStart
[
1
]
:
pInputCurTokenStart
[
0
]
;
u_char
*
pOutStart
=
nHotkeyPos
==
0
?
pOutputPosition
+
1
:
pOutputPosition
;
if
(
isupper
(
cInStart
)
||
cInStart
>
127
)
{
// Possibly cInStart is upper character:
if
(
isupper
(
cInStart
)
||
cInStart
==
c_AE
||
cInStart
==
c_OE
||
cInStart
==
c_UE
)
{
// Surely cInStart is upper character:
u_char
cOutStart
=
*
pOutStart
;
if
(
cOutStart
<
128
)
*
pOutStart
=
toupper
(
cOutStart
);
else
if
(
cOutStart
==
c_ae
)
*
pOutStart
=
c_AE
;
else
if
(
cOutStart
==
c_oe
)
*
pOutStart
=
c_OE
;
else
if
(
cOutStart
==
c_ue
)
*
pOutStart
=
c_UE
;
}
}
// endif (isupper(cInStart) || cInStart > 127)
pOutputPosition
+=
nOutputTokenLength
;
*
pOutputPosition
=
'\0'
;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment