wtratree unbuilt

bebbe972 · Caolán McNamara · f2306b94 · f2306b94 · f2306b94
Kaydet (Commit) bebbe972 authored Eyl 12, 2011 tarafından Caolán McNamara
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 581 deletions

wtratree.hxx l10ntools/inc/wtratree.hxx +0 -161

wtratree.cxx l10ntools/source/wtratree.cxx +0 -420

No files found.
--- a/l10ntools/inc/wtratree.hxx
+++ b/l10ntools/inc/wtratree.hxx
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org.  If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-#ifndef TX3_WTRATREE_HXX
-#define TX3_WTRATREE_HXX
-// USED
-    // Base Classes
-    // Components
-    // Parameters
-#include <tools/string.hxx>
-const INT16     C_NR_OF_WTT_RESULTS = 5;
-const INT16     C_NR_OF_POSSIBLE_CHARS = 256;
-typedef unsigned char u_char;
-typedef const char * constr;
-class WTT_Node;
-/** @task
-    This class implements the functionality, that class WordTransformer
-    offers.
-    WordTransformer is dependant of this class, but NOT the other way!
-**/
-class WordTransTree
-{
-  public:
-    enum E_Result
-    {
-        OK = 0,
-        HOTKEY_LOST,
-        OUTPUT_OVERFLOW
-    };
-    //  LIFECYCLE
-                        WordTransTree(
-                            CharSet             i_nWorkingCharSet = RTL_TEXTENCODING_MS_1252);
-    void                SetCharSet(
-                            CharSet             i_nWorkingCharSet);
-                        ~WordTransTree();
-    void                AddWordPair(
-                            const ByteString &      i_sOldString,
-                            const ByteString &      i_sReplaceString );
-    // OPERATIONS
-    void                InitTransformation(
-                            const char *        i_sInput,               /// [!=0], a range of i_nInputLength must be valid memory for read.
-                            UINT32              i_nInputLength,
-                            UINT32              i_nOutputMaxLength = STRING_MAXLEN - 12 );
-    E_Result            TransformNextToken();
-    // INQUIRY
-    sal_Bool                TextEndReached() const;
-    const char *        Output() const;
-        // These 3 functions are valid between two calls of
-        //   TransformNextToken():
-    E_Result            CurResult() const;
-    ByteString          CurReplacedString() const;
-    ByteString          CurReplacingString() const;
-    char                CurHotkey() const;
-  private:
-    // SERVICE FUNCTONS
-    UINT8               CalculateBranch(
-                            u_char              i_cInputChar ) const;
-    void                Handle_Hotkey();
-    void                Handle_TokenToKeep();
-    void                Handle_TokenToTransform();
-    // DATA
-        // Fixed data
-    const u_char *      sInput;
-    UINT32              nInputLength;
-    const u_char *      pInputEnd;
-    u_char *            sOutput;                // DYN
-    UINT32              nOutputMaxLength;
-    WTT_Node *          dpParsingTreeTop;       // DYN
-    WTT_Node *          pUnknownAlpha;
-    u_char              cChar2Branch[C_NR_OF_POSSIBLE_CHARS];
-    u_char              c_AE, c_OE, c_UE, c_ae, c_oe, c_ue;
-        // Working data
-    const u_char *      pInputCurTokenStart;
-    const u_char *      pInputPosition;
-    u_char *            pOutputPosition;
-    WTT_Node *          pCurParseNode;
-        // Data which are valid only after a completed call to TransformNextToken()
-    E_Result            eCurResult;
-    u_char              cCurHotkey;             // Letter wich is used as hotkey
-    u_char              cCurHotkeySign;         // Letter which is used to assign hotkey ('~'or '&') .
-};
-inline sal_Bool
-WordTransTree::TextEndReached() const
-    { return pInputPosition == pInputEnd; }
-inline const char *
-WordTransTree::Output() const
-    { return TextEndReached() ? (constr) sOutput : ""; }
-inline WordTransTree::E_Result
-WordTransTree::CurResult() const
-    { return eCurResult; }
-inline ByteString
-WordTransTree::CurReplacedString() const
-    { return ByteString((constr) pInputCurTokenStart,pInputPosition-pInputCurTokenStart); }
-inline char
-WordTransTree::CurHotkey() const
-    { return cCurHotkey; }
-inline UINT8
-WordTransTree::CalculateBranch(u_char i_cInputChar) const
-    { return cChar2Branch[i_cInputChar]; }
-#endif
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
--- a/l10ntools/source/wtratree.cxx
+++ b/l10ntools/source/wtratree.cxx
-/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
-/*************************************************************************
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * Copyright 2000, 2010 Oracle and/or its affiliates.
- *
- * OpenOffice.org - a multi-platform office productivity suite
- *
- * This file is part of OpenOffice.org.
- *
- * OpenOffice.org is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 3
- * only, as published by the Free Software Foundation.
- *
- * OpenOffice.org is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License version 3 for more details
- * (a copy is included in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU Lesser General Public License
- * version 3 along with OpenOffice.org.  If not, see
- * <http://www.openoffice.org/license.html>
- * for a copy of the LGPLv3 License.
- *
- ************************************************************************/
-// MARKER(update_precomp.py): autogen include statement, do not remove
-#include "precompiled_l10ntools.hxx"
-#include "wtratree.hxx"
-/** @ATTENTION
-    For reasons of speed, class WordTransTree works with two simple
-    char arrays, sOutput and sInput, instead of secure containers or
-    streams. So be extremely careful, when changing this code!!!
-**/
-// NOT FULLY DECLARED SERVICES
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#include "wtranode.hxx"
-const BRANCH_T  BR_END          = 0;
-const BRANCH_T  BR_NONALPHA     = 1;
-const BRANCH_T  BR_HOTKEY       = 2;
-const BRANCH_T  BR_BACKSLASH    = 3;
-const BRANCH_T  BR_ALPHABASE    = 4;    /// @ATTENTION  All branches not valid for words must be smaller than this value!
-const BRANCH_T  BR_AE           = 30;
-const BRANCH_T  BR_OE           = 31;
-const BRANCH_T  BR_UE           = 32;
-const BRANCH_T  BR_SZ           = 33;
-const BRANCH_T  BR_MAX          = 34;   /// @ATTENTION  Must be updated always!
-const BRANCH_T  BR_START        = 0;
-WordTransTree::WordTransTree(CharSet  i_nWorkingCharSet)
-    :   sInput(0),
-        nInputLength(0),
-        pInputEnd(0),
-        sOutput(0),
-        nOutputMaxLength(0),
-        dpParsingTreeTop(0),
-        pUnknownAlpha(0),
-        // cChar2Branch
-        c_AE(u_char('\xC4')), c_OE(u_char('\xD6')), c_UE(u_char('\xDC')),
-        c_ae(u_char('\xE4')), c_oe(u_char('\xF6')), c_ue(u_char('\xFC')),
-        pInputCurTokenStart(0),
-        pInputPosition(0),
-        pOutputPosition(0),
-        pCurParseNode(0),
-        eCurResult(OK),
-        cCurHotkey(0),
-        cCurHotkeySign(u_char('~'))
-{
-    // Initialize parsing tree:
-    pUnknownAlpha = new WTT_Node(BR_ALPHABASE,0,0); // This will be deleted as part of the parsing tree.
-    for ( UINT8 i = BR_ALPHABASE; i < C_NR_OF_BRANCHES; i++)
-    {
-        pUnknownAlpha->SetBranch(i,pUnknownAlpha);
-    }  // end for
-    dpParsingTreeTop = new WTT_Node(BR_START,0,pUnknownAlpha);
-    WTT_Node * dpNonAlpha = new WTT_Node(BR_NONALPHA,0,0);
-    dpNonAlpha->SetBranch(BR_NONALPHA,dpNonAlpha);
-    dpParsingTreeTop->SetBranch(BR_NONALPHA,dpNonAlpha);
-    WTT_Node * dpBackslash = new WTT_Node(BR_BACKSLASH,dpNonAlpha,dpNonAlpha);
-    dpBackslash->SetBranch(BR_END,0);
-    dpParsingTreeTop->SetBranch(BR_BACKSLASH,dpBackslash);
-    dpNonAlpha->SetBranch(BR_BACKSLASH,dpBackslash);
-    // Initialize character set:
-    SetCharSet(i_nWorkingCharSet);
-    if (C_BR_ALPHABASE != BR_ALPHABASE || C_NR_OF_BRANCHES != BR_MAX)
-    {
-        fprintf(stderr, "Assertion failed: file %s line %d.", __FILE__,  __LINE__);
-        exit(1);
-    }
-}
-void
-WordTransTree::SetCharSet(CharSet i_nWorkingCharSet)
-{
-    ByteString sConvert("\xC4\xD6\xDC\xE4\xF6\xFC\xDF");
-    const u_char * pConvert = (const u_char * ) ( sConvert.Convert(RTL_TEXTENCODING_MS_1252, i_nWorkingCharSet).GetBuffer() );
-    INT16 i = 0;
-    for ( ; i < C_NR_OF_POSSIBLE_CHARS; ++i )
-    {
-        cChar2Branch[i] = BR_NONALPHA;
-    }  // end for
-    for ( i = 'a'; i <= 'z'; ++i )
-    {
-        cChar2Branch[i] = BR_ALPHABASE + i - 'a';
-    }  // end for
-    for ( i = 'A'; i <= 'Z'; ++i )
-    {
-        cChar2Branch[i] = BR_ALPHABASE + i - 'A';
-    }  // end for
-    cChar2Branch[pConvert[0]] = BR_AE;
-    cChar2Branch[pConvert[1]] = BR_OE;
-    cChar2Branch[pConvert[2]] = BR_UE;
-    cChar2Branch[pConvert[3]] = BR_AE;
-    cChar2Branch[pConvert[4]] = BR_OE;
-    cChar2Branch[pConvert[5]] = BR_UE;
-    cChar2Branch[pConvert[6]] = BR_SZ;
-    cChar2Branch[u_char('~')] = BR_HOTKEY;
-    cChar2Branch[u_char('&')] = BR_HOTKEY;
-    c_AE = pConvert[0];
-    c_OE = pConvert[1];
-    c_UE = pConvert[2];
-    c_ae = pConvert[3];
-    c_oe = pConvert[4];
-    c_ue = pConvert[5];
-}
-WordTransTree::~WordTransTree()
-{
-    delete dpParsingTreeTop;
-    if (sOutput != 0)
-        delete [] sOutput;
-}
-void
-WordTransTree::AddWordPair( const ByteString &      i_sOldString,
-                            const ByteString &      i_sReplaceString )
-{
-    if (i_sOldString.Len() == 0)
-        return;
-    pCurParseNode = dpParsingTreeTop;
-    WTT_Node * pBranch = 0;
-    char cBranch = 0;
-    for ( constr pOld = i_sOldString.GetBuffer();
-          *pOld != 0;
-          pOld++ )
-    {
-        cBranch = CalculateBranch(*pOld);
-        pBranch = pCurParseNode->GetNextNode(cBranch);
-        if (pBranch == 0 || pBranch == pUnknownAlpha)
-        {
-            pBranch = new WTT_Node(cBranch,0,pUnknownAlpha);
-            pCurParseNode->SetBranch(cBranch,pBranch);
-        }
-        pCurParseNode = pBranch;
-    }   // end for
-    pCurParseNode->SetAsTokenToReplace(i_sReplaceString);
-}
-void
-WordTransTree::InitTransformation( const char * i_sInput,
-                                   UINT32       i_nInputLength,
-                                   UINT32       i_nOutputMaxLength )
-{
-    sInput = (const u_char *)i_sInput;
-    nInputLength = i_nInputLength;
-    pInputEnd = &sInput[i_nInputLength];
-    pInputCurTokenStart = sInput;
-    pInputPosition = sInput;
-    if (nOutputMaxLength < i_nOutputMaxLength)
-    {
-        if (sOutput != 0)
-            delete [] sOutput;
-        sOutput = new unsigned char[i_nOutputMaxLength];
-        nOutputMaxLength = i_nOutputMaxLength;
-    }
-    pOutputPosition = sOutput;
-}
-/** pInputCurTokenStart and CurParseNode are updated just when
-    starting this function. After its end they must not be changed
-    till this functon is called again.
-    Outside this function pInputPositon and pOutputPosition are both
-    on the first not transformed char in their respective array.
-**/
-WordTransTree::E_Result
-WordTransTree::TransformNextToken()
-{
-    pInputCurTokenStart = pInputPosition;
-    pCurParseNode = dpParsingTreeTop;
-    cCurHotkey = 0;
-    eCurResult = OK;
-    WTT_Node * pBranch = 0;
-    UINT8 cBranch = 0;
-    for ( pCurParseNode = dpParsingTreeTop;
-          pInputPosition != pInputEnd;
-          ++pInputPosition )
-    {
-        cBranch = CalculateBranch(*pInputPosition);
-        pBranch = pCurParseNode->GetNextNode( cBranch );
-        if (pBranch != 0)
-        {
-            pCurParseNode = pBranch;
-        }
-        else
-        {
-            if (cBranch == BR_HOTKEY)   // current letter is '~' or '&'.
-            {
-                // Logic of the following. There are 9 possible cases -
-                // A = alphabetic letter, NA = non alphabetic, TB = token begin,
-                // Eot = end of text:
-                //   1. A~A          set hotkey to following letter, continue
-                //   2. A~NA         token end
-                //   3. A~Eot        token end
-                //   4. NA~A         token end
-                //   5. NA~NA        continue
-                //   6. A~Eof        continue
-                //   7. TB~A         set hotkey to following letter, continue
-                //   8. TB~NA        continue
-                //   9. TB~Eot       continue
-                // bNext and Prev are true, if there are alphabetic letters:
-                sal_Bool bNext =  pInputPosition + 1 != pInputEnd
-                                    ?   CalculateBranch(pInputPosition[1]) >= BR_ALPHABASE
-                                    :   sal_False;
-                sal_Bool bPrev = pCurParseNode->Value() >= BR_ALPHABASE;
-                if ( bNext && (bPrev || pCurParseNode == dpParsingTreeTop) )
-                {   // case 1. and 7.
-                    Handle_Hotkey();
-                    continue;
-                }
-                else if  (!bPrev && !bNext)
-                {   // case 5.,6.,8.,9.
-                    continue;
-                }
-                // Case 2.,3.,4. :
-                //  so this should be handled as an end of a token.
-            }
-            if (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
-            {
-                Handle_TokenToKeep();
-                return eCurResult;
-            }
-            else
-            {
-                Handle_TokenToTransform();
-                return eCurResult;
-            }   // endif (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
-        }   // endif (pBranch == 0) else
-    }   // end for
-    // If here, the text end is reached
-    if (pCurParseNode->TokenType() == WTT_Node::token_to_keep)
-    {
-        Handle_TokenToKeep();
-        return eCurResult;
-    }
-    else
-    {
-        Handle_TokenToTransform();
-        return eCurResult;
-    }
-}
-ByteString
-WordTransTree::CurReplacingString() const
-{
-    return pCurParseNode->ReplaceString();
-}
-void
-WordTransTree::Handle_Hotkey()
-{
-    if (cCurHotkey == 0)    // Avoid to replace the first found hotkey by
-                            //   a later one - though this shouldn't happen anyway.
-    {
-        cCurHotkey = (pInputPosition+1) != pInputEnd ? pInputPosition[1] : 0;
-        cCurHotkeySign = *pInputPosition;
-    }
-}
-void
-WordTransTree::Handle_TokenToKeep()
-{
-    UINT32 nTokenLength = pInputPosition-pInputCurTokenStart;
-    memcpy(pOutputPosition,pInputCurTokenStart,nTokenLength);
-    pOutputPosition += nTokenLength;
-    *pOutputPosition = '\0';
-}
-void
-WordTransTree::Handle_TokenToTransform()
-{
-    sal_Bool bHaveHotkey = CalculateBranch(cCurHotkey) >= BR_ALPHABASE;
-    const ByteString & rReplace = pCurParseNode->ReplaceString();
-    // Find position of hotkey in replace-string:
-    sal_uInt16 nHotkeyPos = bHaveHotkey
-                            ?   rReplace.Search(char(cCurHotkey))
-                            :   STRING_NOTFOUND;
-    if (nHotkeyPos == STRING_NOTFOUND && bHaveHotkey)
-    {
-        if (cCurHotkey < 128)
-        {
-            if (islower(cCurHotkey))
-                nHotkeyPos = rReplace.Search(toupper(char(cCurHotkey)));
-            else
-                nHotkeyPos = rReplace.Search(tolower(char(cCurHotkey)));
-        }
-        else    // cCurHotkey >= 128
-        {
-            if (cCurHotkey == c_ae)
-                nHotkeyPos = rReplace.Search(char(c_AE));
-            else if (cCurHotkey == c_oe)
-                nHotkeyPos = rReplace.Search(char(c_OE));
-            else if (cCurHotkey == c_ue)
-                nHotkeyPos = rReplace.Search(char(c_UE));
-            else if (cCurHotkey == c_AE)
-                nHotkeyPos = rReplace.Search(char(c_ae));
-            else if (cCurHotkey == c_OE)
-                nHotkeyPos = rReplace.Search(char(c_oe));
-            else if (cCurHotkey == c_UE)
-                nHotkeyPos = rReplace.Search(char(c_ue));
-        }   // endif (cCurHotkey < 128) else
-        if (nHotkeyPos == STRING_NOTFOUND)
-        {
-            eCurResult = HOTKEY_LOST;
-            bHaveHotkey = sal_False;
-        }
-    }   // endif (nHotkeyPos == STRING_NOT_FOUND && bHaveHotkey)
-    UINT32 nOutputTokenLength = rReplace.Len() + (bHaveHotkey ? 1 : 0);
-    if (bHaveHotkey)
-    {
-        memcpy( pOutputPosition,
-                pCurParseNode->ReplaceString().GetBuffer(),
-                nHotkeyPos );
-        *(pOutputPosition + nHotkeyPos) = cCurHotkeySign;
-        memcpy( pOutputPosition + nHotkeyPos + 1,
-                pCurParseNode->ReplaceString().GetBuffer() + nHotkeyPos,
-                nOutputTokenLength - nHotkeyPos - 1);
-    }
-    else
-    {
-        memcpy( pOutputPosition,
-                pCurParseNode->ReplaceString().GetBuffer(),
-                nOutputTokenLength );
-    }
-    // Convert first letter into upper if necessary:
-    u_char cInStart = CalculateBranch(*pInputCurTokenStart) == BR_HOTKEY
-                            ?   pInputCurTokenStart[1]
-                            :   pInputCurTokenStart[0] ;
-    u_char * pOutStart = nHotkeyPos == 0
-                            ?   pOutputPosition + 1
-                            :   pOutputPosition ;
-    if (isupper(cInStart) || cInStart > 127)
-    {   // Possibly cInStart is upper character:
-        if (isupper(cInStart) || cInStart == c_AE || cInStart == c_OE || cInStart == c_UE)
-        {   // Surely cInStart is upper character:
-            u_char cOutStart = *pOutStart;
-            if (cOutStart < 128)
-                *pOutStart = toupper(cOutStart);
-            else if (cOutStart == c_ae)
-                *pOutStart = c_AE;
-            else if (cOutStart == c_oe)
-                *pOutStart = c_OE;
-            else if (cOutStart == c_ue)
-                *pOutStart = c_UE;
-        }
-    }   // endif (isupper(cInStart) || cInStart > 127)
-    pOutputPosition += nOutputTokenLength;
-    *pOutputPosition = '\0';
-}
-/* vim:set shiftwidth=4 softtabstop=4 expandtab: */