Kaydet (Commit) 983a62b5 authored tarafından Michael Meeks's avatar Michael Meeks

re-base on ALv2 code. Includes:

    Patches from Herbert Duerr:
    #i118925# fix old word-boundary emulation
    http://svn.apache.org/viewvc?view=revision&revision=1301596
    #i119031# preserving ASCII in ignore-* transliterations allows full SIMPLE_TRANS_MASK
    http://svn.apache.org/viewvc?view=revision&revision=1301104
    #i118887# ignore zero-length matches in regexp search
    http://svn.apache.org/viewvc?view=revision&revision=1242235
    fix transliteration flags for search pattern preprocessing
    http://svn.apache.org/viewvc?view=revision&revision=1236199
    fix minor typo in comment
    http://svn.apache.org/viewvc?view=revision&revision=1234799
    improve standard conformance regarding unicode UAX#29 word boundaries
    http://svn.apache.org/viewvc?view=revision&revision=1234786
    emulate word boundary matching of old regex engine
    The new ICU regex engine has much improved unicode capabilities.
    The old regex engine had the extensions \< and \> for matching word boundaries.
    For the convenience of a smooth upgrade experience these artifacts now get mapped
    to \b which is supported by almost all regex engines.
    http://svn.apache.org/viewvc?view=revision&revision=1234777
    #i118723# fill in regexp matcher group details
    http://svn.apache.org/viewvc?view=revision&revision=1228084
    use ICU regexp instead of LGPL i18nregexp
    http://svn.apache.org/viewvc?view=revision&revision=1177610
    #i119031# default to preserve ASCII in ignore-width transliteration
    http://svn.apache.org/viewvc?view=revision&revision=1301101
üst bf83aaf6
...@@ -42,7 +42,8 @@ $(eval $(call gb_Library_use_libraries,i18nsearch,\ ...@@ -42,7 +42,8 @@ $(eval $(call gb_Library_use_libraries,i18nsearch,\
comphelper \ comphelper \
cppu \ cppu \
cppuhelper \ cppuhelper \
i18nregexp \ icuuc \
icui18n \
sal \ sal \
$(gb_UWINAPI) \ $(gb_UWINAPI) \
)) ))
......
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/************************************************************************* /*
* * This file is part of the LibreOffice project.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
* *
* OpenOffice.org is distributed in the hope that it will be useful, * This Source Code Form is subject to the terms of the Mozilla Public
* but WITHOUT ANY WARRANTY; without even the implied warranty of * License, v. 2.0. If a copy of the MPL was not distributed with this
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * file, You can obtain one at http://mozilla.org/MPL/2.0/.
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
* *
* You should have received a copy of the GNU Lesser General Public License * This file incorporates work covered by the following license notice:
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
* *
************************************************************************/ * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include "textsearch.hxx" #include "textsearch.hxx"
#include "levdis.hxx" #include "levdis.hxx"
#include <regexp/reclass.hxx>
#include <com/sun/star/lang/Locale.hpp> #include <com/sun/star/lang/Locale.hpp>
#include <com/sun/star/lang/XMultiServiceFactory.hpp> #include <com/sun/star/lang/XMultiServiceFactory.hpp>
#include <comphelper/processfactory.hxx> #include <comphelper/processfactory.hxx>
...@@ -59,7 +48,7 @@ using namespace ::com::sun::star::util; ...@@ -59,7 +48,7 @@ using namespace ::com::sun::star::util;
using namespace ::com::sun::star::uno; using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang; using namespace ::com::sun::star::lang;
using namespace ::com::sun::star::i18n; using namespace ::com::sun::star::i18n;
using namespace ::rtl; using namespace ::com::sun::star;
static sal_Int32 COMPLEX_TRANS_MASK_TMP = static sal_Int32 COMPLEX_TRANS_MASK_TMP =
TransliterationModules_ignoreBaFa_ja_JP | TransliterationModules_ignoreBaFa_ja_JP |
...@@ -70,11 +59,8 @@ static sal_Int32 COMPLEX_TRANS_MASK_TMP = ...@@ -70,11 +59,8 @@ static sal_Int32 COMPLEX_TRANS_MASK_TMP =
TransliterationModules_ignoreIandEfollowedByYa_ja_JP | TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
TransliterationModules_ignoreKiKuFollowedBySa_ja_JP | TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
TransliterationModules_ignoreProlongedSoundMark_ja_JP; TransliterationModules_ignoreProlongedSoundMark_ja_JP;
static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP; static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA | TransliterationModules_FULLWIDTH_HALFWIDTH;
static const sal_Int32 COMPLEX_TRANS_MASK = static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK;
COMPLEX_TRANS_MASK_TMP |
TransliterationModules_IGNORE_KANA |
TransliterationModules_IGNORE_WIDTH;
// Above 2 transliteration is simple but need to take effect in // Above 2 transliteration is simple but need to take effect in
// complex transliteration // complex transliteration
...@@ -82,7 +68,7 @@ TextSearch::TextSearch(const Reference < XComponentContext > & rxContext) ...@@ -82,7 +68,7 @@ TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
: m_xContext( rxContext ) : m_xContext( rxContext )
, pJumpTable( 0 ) , pJumpTable( 0 )
, pJumpTable2( 0 ) , pJumpTable2( 0 )
, pRegExp( 0 ) , pRegexMatcher( NULL )
, pWLD( 0 ) , pWLD( 0 )
{ {
SearchOptions aOpt; SearchOptions aOpt;
...@@ -94,7 +80,7 @@ TextSearch::TextSearch(const Reference < XComponentContext > & rxContext) ...@@ -94,7 +80,7 @@ TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
TextSearch::~TextSearch() TextSearch::~TextSearch()
{ {
delete pRegExp; delete pRegexMatcher;
delete pWLD; delete pWLD;
delete pJumpTable; delete pJumpTable;
delete pJumpTable2; delete pJumpTable2;
...@@ -104,7 +90,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -104,7 +90,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
{ {
aSrchPara = rOptions; aSrchPara = rOptions;
delete pRegExp, pRegExp = 0; delete pRegexMatcher, pRegexMatcher = NULL;
delete pWLD, pWLD = 0; delete pWLD, pWLD = 0;
delete pJumpTable, pJumpTable = 0; delete pJumpTable, pJumpTable = 0;
delete pJumpTable2, pJumpTable2 = 0; delete pJumpTable2, pJumpTable2 = 0;
...@@ -113,10 +99,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -113,10 +99,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
{ {
if( !xTranslit.is() ) if( !xTranslit.is() )
{
xTranslit.set( Transliteration::create( m_xContext ) ); xTranslit.set( Transliteration::create( m_xContext ) );
}
// Load transliteration module
xTranslit->loadModule( xTranslit->loadModule(
(TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ), (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
aSrchPara.Locale); aSrchPara.Locale);
...@@ -128,9 +111,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -128,9 +111,7 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
{ {
if( !xTranslit2.is() ) if( !xTranslit2.is() )
{
xTranslit2.set( Transliteration::create( m_xContext ) ); xTranslit2.set( Transliteration::create( m_xContext ) );
}
// Load transliteration module // Load transliteration module
xTranslit2->loadModule( xTranslit2->loadModule(
(TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ), (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
...@@ -138,19 +119,17 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -138,19 +119,17 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
} }
if ( !xBreak.is() ) if ( !xBreak.is() )
{ xBreak = com::sun::star::i18n::BreakIterator::create( m_xContext );
xBreak = BreakIterator::create(m_xContext);
}
sSrchStr = aSrchPara.searchString; sSrchStr = aSrchPara.searchString;
// use transliteration here, but only if not RegEx, which does it different // use transliteration here
if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() && if ( xTranslit.is() &&
aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ) aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
sSrchStr = xTranslit->transliterateString2String( sSrchStr = xTranslit->transliterateString2String(
aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() && if ( xTranslit2.is() &&
aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ) aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
sSrchStr2 = xTranslit2->transliterateString2String( sSrchStr2 = xTranslit2->transliterateString2String(
aSrchPara.searchString, 0, aSrchPara.searchString.getLength()); aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
...@@ -162,17 +141,15 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -162,17 +141,15 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr, checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
sSrchStr.getLength()-1) == ScriptType::COMPLEX)); sSrchStr.getLength()-1) == ScriptType::COMPLEX));
if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP ) switch( aSrchPara.algorithmType)
{ {
case SearchAlgorithms_REGEXP:
fnForward = &TextSearch::RESrchFrwrd; fnForward = &TextSearch::RESrchFrwrd;
fnBackward = &TextSearch::RESrchBkwrd; fnBackward = &TextSearch::RESrchBkwrd;
RESrchPrepare( aSrchPara);
break;
pRegExp = new Regexpr( aSrchPara, xTranslit ); case SearchAlgorithms_APPROXIMATE:
}
else
{
if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
{
fnForward = &TextSearch::ApproxSrchFrwrd; fnForward = &TextSearch::ApproxSrchFrwrd;
fnBackward = &TextSearch::ApproxSrchBkwrd; fnBackward = &TextSearch::ApproxSrchBkwrd;
...@@ -181,12 +158,12 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep ...@@ -181,12 +158,12 @@ void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeExcep
0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) ); 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
nLimit = pWLD->GetLimit(); nLimit = pWLD->GetLimit();
} break;
else
{ default:
fnForward = &TextSearch::NSrchFrwrd; fnForward = &TextSearch::NSrchFrwrd;
fnBackward = &TextSearch::NSrchBkwrd; fnBackward = &TextSearch::NSrchBkwrd;
} break;
} }
} }
...@@ -383,9 +360,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st ...@@ -383,9 +360,7 @@ SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 st
return sres; return sres;
} }
//---------------------------------------------------------------------
//--------------- die Wort-Trennner ----------------------------------
bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
{ {
...@@ -393,9 +368,7 @@ bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const ...@@ -393,9 +368,7 @@ bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
if( '\x7f' != rStr[nPos]) if( '\x7f' != rStr[nPos])
{ {
if ( !xCharClass.is() ) if ( !xCharClass.is() )
{
xCharClass = CharacterClassification::create( m_xContext ); xCharClass = CharacterClassification::create( m_xContext );
}
sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos, sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
aSrchPara.Locale ); aSrchPara.Locale );
if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA | if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
...@@ -405,10 +378,8 @@ bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const ...@@ -405,10 +378,8 @@ bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
return bRet; return bRet;
} }
// --------- helper methods for Boyer-Moore like text searching ----------
// TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
// --------- methods for the kind of boyer-morre search ------------------
void TextSearch::MakeForwardTab() void TextSearch::MakeForwardTab()
{ {
...@@ -690,10 +661,42 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP ...@@ -690,10 +661,42 @@ SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startP
return aRet; return aRet;
} }
void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
{
// select the transliterated pattern string
const OUString& rPatternStr =
(rOptions.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
: ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : rOptions.searchString);
sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
// map com::sun::star::util::SearchFlags to ICU uregex.h flags
// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
// REG_NEWLINE is neither properly defined nor used anywhere => not implemented
// REG_NOSUB is not used anywhere => not implemented
// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
// LEV_RELAXED is only used for SearchAlgorithm==Approximate
// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care of it???
if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0)
nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
UErrorCode nIcuErr = U_ZERO_ERROR;
// assumption: transliteration didn't mangle regexp control chars
IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
#ifndef DISABLE_WORDBOUND_EMULATION
// for conveniance specific syntax elements of the old regex engine are emulated
// by using regular word boundary matching \b to replace \< and \>
static const IcuUniString aChevronPattern( "\\\\<|\\\\>", -1, IcuUniString::kInvariant);
static const IcuUniString aChevronReplace( "\\\\b", -1, IcuUniString::kInvariant);
static RegexMatcher aChevronMatcher( aChevronPattern, 0, nIcuErr);
aChevronMatcher.reset( aIcuSearchPatStr);
aIcuSearchPatStr = aChevronMatcher.replaceAll( aChevronReplace, nIcuErr);
aChevronMatcher.reset();
#endif
pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
if( nIcuErr)
{ delete pRegexMatcher; pRegexMatcher = NULL;}
}
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
// ------- Methoden fuer die Suche ueber Regular-Expressions --------------
SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
sal_Int32 startPos, sal_Int32 endPos ) sal_Int32 startPos, sal_Int32 endPos )
...@@ -701,121 +704,97 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr, ...@@ -701,121 +704,97 @@ SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
{ {
SearchResult aRet; SearchResult aRet;
aRet.subRegExpressions = 0; aRet.subRegExpressions = 0;
OUString aStr( searchStr ); if( !pRegexMatcher)
return aRet;
bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
struct re_registers regs; if( endPos > searchStr.getLength())
endPos = searchStr.getLength();
// Clear structure // use the ICU RegexMatcher to find the matches
memset((void *)&regs, 0, sizeof(struct re_registers)); UErrorCode nIcuErr = U_ZERO_ERROR;
if ( ! pRegExp->re_search(&regs, startPos) ) const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), endPos);
{ pRegexMatcher->reset( aSearchTargetStr);
if( regs.num_of_match > 0 && // search until there is a valid match
(regs.start[0] != -1 && regs.end[0] != -1) ) for(;;)
{ {
aRet.startOffset.realloc(regs.num_of_match); if( !pRegexMatcher->find( startPos, nIcuErr))
aRet.endOffset.realloc(regs.num_of_match); return aRet;
sal_Int32 i = 0, j = 0; // #i118887# ignore zero-length matches e.g. "a*" in "bc"
while( j < regs.num_of_match ) int nStartOfs = pRegexMatcher->start( nIcuErr);
{ int nEndOfs = pRegexMatcher->end( nIcuErr);
if( regs.start[j] != -1 && regs.end[j] != -1 ) if( nStartOfs < nEndOfs)
{ break;
aRet.startOffset[i] = regs.start[j]; // try at next position if there was a zero-length match
aRet.endOffset[i] = regs.end[j]; if( ++startPos >= endPos)
++i; return aRet;
}
++j;
}
aRet.subRegExpressions = i;
}
if ( regs.num_regs > 0 )
{
if ( regs.start )
free(regs.start);
if ( regs.end )
free(regs.end);
} }
// extract the result of the search
const int nGroupCount = pRegexMatcher->groupCount();
aRet.subRegExpressions = nGroupCount + 1;
aRet.startOffset.realloc( aRet.subRegExpressions);
aRet.endOffset.realloc( aRet.subRegExpressions);
aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
aRet.endOffset[0] = pRegexMatcher->end( nIcuErr);
for( int i = 1; i <= nGroupCount; ++i) {
aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
aRet.endOffset[i] = pRegexMatcher->end( i, nIcuErr);
} }
return aRet; return aRet;
} }
/*
* Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
*/
SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr, SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
sal_Int32 startPos, sal_Int32 endPos ) sal_Int32 startPos, sal_Int32 endPos )
throw(RuntimeException) throw(RuntimeException)
{ {
// NOTE: for backwards search callers provide startPos/endPos inverted!
SearchResult aRet; SearchResult aRet;
aRet.subRegExpressions = 0; aRet.subRegExpressions = 0;
OUString aStr( searchStr ); if( !pRegexMatcher)
return aRet;
sal_Int32 nOffset = 0;
sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
if( startPos ) if( startPos > searchStr.getLength())
nOffset = startPos - 1; startPos = searchStr.getLength();
// search only in the subString // use the ICU RegexMatcher to find the matches
if( bSearchInSel && nStrEnde ) // TODO: use ICU's backward searching once it becomes available
{ // as its replacement using forward search is not as good as the real thing
aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde ); UErrorCode nIcuErr = U_ZERO_ERROR;
if( nOffset > nStrEnde ) const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), startPos);
nOffset = nOffset - nStrEnde; pRegexMatcher->reset( aSearchTargetStr);
else if( !pRegexMatcher->find( endPos, nIcuErr))
nOffset = 0; return aRet;
}
// set the length to negative for reverse search // find the last match
pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) ); int nLastPos = 0;
struct re_registers regs; do {
nLastPos = pRegexMatcher->start( nIcuErr);
} while( pRegexMatcher->find( nLastPos + 1, nIcuErr));
// Clear structure // find last match again to get its details
memset((void *)&regs, 0, sizeof(struct re_registers)); pRegexMatcher->find( nLastPos, nIcuErr);
if ( ! pRegExp->re_search(&regs, nOffset) )
{
if( regs.num_of_match > 0 &&
(regs.start[0] != -1 && regs.end[0] != -1) )
{
nOffset = bSearchInSel ? nStrEnde : 0;
aRet.startOffset.realloc(regs.num_of_match);
aRet.endOffset.realloc(regs.num_of_match);
sal_Int32 i = 0, j = 0; // fill in the details of the last match
while( j < regs.num_of_match ) const int nGroupCount = pRegexMatcher->groupCount();
{ aRet.subRegExpressions = nGroupCount + 1;
if( regs.start[j] != -1 && regs.end[j] != -1 ) aRet.startOffset.realloc( aRet.subRegExpressions);
{ aRet.endOffset.realloc( aRet.subRegExpressions);
aRet.startOffset[i] = regs.end[j] + nOffset; // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
aRet.endOffset[i] = regs.start[j] + nOffset; aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
++i; aRet.endOffset[0] = pRegexMatcher->start( nIcuErr);
} for( int i = 1; i <= nGroupCount; ++i) {
++j; aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
} aRet.endOffset[i] = pRegexMatcher->start( i, nIcuErr);
aRet.subRegExpressions = i;
}
if ( regs.num_regs > 0 )
{
if ( regs.start )
free(regs.start);
if ( regs.end )
free(regs.end);
}
} }
return aRet; return aRet;
} }
// Phonetische Suche von Worten //---------------------------------------------------------------------------
// search for words phonetically
SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr, SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
sal_Int32 startPos, sal_Int32 endPos ) sal_Int32 startPos, sal_Int32 endPos )
throw(RuntimeException) throw(RuntimeException)
...@@ -932,7 +911,7 @@ sal_Bool SAL_CALL ...@@ -932,7 +911,7 @@ sal_Bool SAL_CALL
TextSearch::supportsService(const OUString& rServiceName) TextSearch::supportsService(const OUString& rServiceName)
throw( RuntimeException ) throw( RuntimeException )
{ {
return !rServiceName.compareToAscii( cSearchName ); return rServiceName == cSearchName;
} }
Sequence< OUString > SAL_CALL Sequence< OUString > SAL_CALL
...@@ -950,14 +929,16 @@ SAL_CALL TextSearch_CreateInstance( ...@@ -950,14 +929,16 @@ SAL_CALL TextSearch_CreateInstance(
{ {
return ::com::sun::star::uno::Reference< return ::com::sun::star::uno::Reference<
::com::sun::star::uno::XInterface >( ::com::sun::star::uno::XInterface >(
(::cppu::OWeakObject*) new TextSearch( comphelper::getComponentContext(rxMSF) ) ); (::cppu::OWeakObject*) new TextSearch(
comphelper::getComponentContext( rxMSF ) ) );
} }
extern "C" extern "C"
{ {
SAL_DLLPUBLIC_EXPORT void* SAL_CALL
SAL_DLLPUBLIC_EXPORT void* SAL_CALL i18nsearch_component_getFactory( const sal_Char* sImplementationName, i18nsearch_component_getFactory( const sal_Char* sImplementationName,
void* _pServiceManager, SAL_UNUSED_PARAMETER void* /*_pRegistryKey*/ ) void* _pServiceManager,
SAL_UNUSED_PARAMETER void* )
{ {
void* pRet = NULL; void* pRet = NULL;
...@@ -969,7 +950,7 @@ SAL_DLLPUBLIC_EXPORT void* SAL_CALL i18nsearch_component_getFactory( const sal_C ...@@ -969,7 +950,7 @@ SAL_DLLPUBLIC_EXPORT void* SAL_CALL i18nsearch_component_getFactory( const sal_C
if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) ) if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
{ {
::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1); ::com::sun::star::uno::Sequence< OUString > aServiceNames(1);
aServiceNames[0] = getServiceName_Static(); aServiceNames[0] = getServiceName_Static();
xFactory = ::cppu::createSingleFactory( xFactory = ::cppu::createSingleFactory(
pServiceManager, getImplementationName_Static(), pServiceManager, getImplementationName_Static(),
......
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/************************************************************************* /*
* This file is part of the LibreOffice project.
* *
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* *
* Copyright 2000, 2010 Oracle and/or its affiliates. * This file incorporates work covered by the following license notice:
* *
* OpenOffice.org - a multi-platform office productivity suite * Licensed to the Apache Software Foundation (ASF) under one or more
* * contributor license agreements. See the NOTICE file distributed
* This file is part of OpenOffice.org. * with this work for additional information regarding copyright
* * ownership. The ASF licenses this file to you under the Apache
* OpenOffice.org is free software: you can redistribute it and/or modify * License, Version 2.0 (the "License"); you may not use this file
* it under the terms of the GNU Lesser General Public License version 3 * except in compliance with the License. You may obtain a copy of
* only, as published by the Free Software Foundation. * the License at http://www.apache.org/licenses/LICENSE-2.0 .
* */
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
#ifndef INCLUDED_I18NPOOL_TEXTSEARCH_HXX #ifndef INCLUDED_I18NPOOL_TEXTSEARCH_HXX
#define INCLUDED_I18NPOOL_TEXTSEARCH_HXX #define INCLUDED_I18NPOOL_TEXTSEARCH_HXX
#include <cppuhelper/implbase2.hxx>
#include <com/sun/star/util/XTextSearch.hpp> #include <com/sun/star/util/XTextSearch.hpp>
#include <com/sun/star/i18n/XBreakIterator.hpp> #include <com/sun/star/i18n/XBreakIterator.hpp>
#include <cppuhelper/implbase2.hxx> // helper for implementations
#include <com/sun/star/i18n/XExtendedTransliteration.hpp> #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
#include <com/sun/star/i18n/XCharacterClassification.hpp> #include <com/sun/star/i18n/XCharacterClassification.hpp>
#include <com/sun/star/lang/XServiceInfo.hpp> #include <com/sun/star/lang/XServiceInfo.hpp>
...@@ -40,13 +30,13 @@ ...@@ -40,13 +30,13 @@
#include <map> #include <map>
class Regexpr; #include <unicode/regex.h>
using namespace U_ICU_NAMESPACE;
typedef U_ICU_NAMESPACE::UnicodeString IcuUniString;
class WLevDistance; class WLevDistance;
typedef ::std::map< sal_Unicode, sal_Int32 > TextSearchJumpTable; typedef ::std::map< sal_Unicode, sal_Int32 > TextSearchJumpTable;
// ----------------------------------------------------
// class SearchClass
// ----------------------------------------------------
class TextSearch: public cppu::WeakImplHelper2 class TextSearch: public cppu::WeakImplHelper2
< <
::com::sun::star::util::XTextSearch, ::com::sun::star::util::XTextSearch,
...@@ -95,7 +85,7 @@ class TextSearch: public cppu::WeakImplHelper2 ...@@ -95,7 +85,7 @@ class TextSearch: public cppu::WeakImplHelper2
throw(::com::sun::star::uno::RuntimeException); throw(::com::sun::star::uno::RuntimeException);
// Members and methods for the regular expression search // Members and methods for the regular expression search
Regexpr* pRegExp; RegexMatcher* pRegexMatcher;
::com::sun::star::util::SearchResult SAL_CALL ::com::sun::star::util::SearchResult SAL_CALL
RESrchFrwrd( const ::rtl::OUString& searchStr, RESrchFrwrd( const ::rtl::OUString& searchStr,
sal_Int32 startPos, sal_Int32 endPos ) sal_Int32 startPos, sal_Int32 endPos )
...@@ -104,6 +94,7 @@ class TextSearch: public cppu::WeakImplHelper2 ...@@ -104,6 +94,7 @@ class TextSearch: public cppu::WeakImplHelper2
RESrchBkwrd( const ::rtl::OUString& searchStr, RESrchBkwrd( const ::rtl::OUString& searchStr,
sal_Int32 startPos, sal_Int32 endPos ) sal_Int32 startPos, sal_Int32 endPos )
throw(::com::sun::star::uno::RuntimeException); throw(::com::sun::star::uno::RuntimeException);
void RESrchPrepare( const ::com::sun::star::util::SearchOptions&);
// Members and methods for the "Weight Levenshtein-Distance" search // Members and methods for the "Weight Levenshtein-Distance" search
int nLimit; int nLimit;
...@@ -152,7 +143,6 @@ public: ...@@ -152,7 +143,6 @@ public:
throw( ::com::sun::star::uno::RuntimeException ); throw( ::com::sun::star::uno::RuntimeException );
}; };
#endif #endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/************************************************************************* /*
* This file is part of the LibreOffice project.
* *
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* *
* Copyright 2000, 2010 Oracle and/or its affiliates. * This file incorporates work covered by the following license notice:
* *
* OpenOffice.org - a multi-platform office productivity suite * Licensed to the Apache Software Foundation (ASF) under one or more
* * contributor license agreements. See the NOTICE file distributed
* This file is part of OpenOffice.org. * with this work for additional information regarding copyright
* * ownership. The ASF licenses this file to you under the Apache
* OpenOffice.org is free software: you can redistribute it and/or modify * License, Version 2.0 (the "License"); you may not use this file
* it under the terms of the GNU Lesser General Public License version 3 * except in compliance with the License. You may obtain a copy of
* only, as published by the Free Software Foundation. * the License at http://www.apache.org/licenses/LICENSE-2.0 .
* */
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
// prevent internal compiler error with MSVC6SP3 // prevent internal compiler error with MSVC6SP3
#include <utility> #include <utility>
...@@ -47,17 +37,16 @@ OUString SAL_CALL ...@@ -47,17 +37,16 @@ OUString SAL_CALL
ignoreWidth::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset ) ignoreWidth::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset )
throw(RuntimeException) throw(RuntimeException)
{ {
halfwidthToFullwidth t1; fullwidthToHalfwidth t1;
return t1.transliterate(inStr, startPos, nCount, offset); return t1.transliterate(inStr, startPos, nCount, offset);
} }
Sequence< OUString > SAL_CALL Sequence< OUString > SAL_CALL
ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 ) ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 )
throw(RuntimeException) throw(RuntimeException)
{ {
halfwidthToFullwidth t1; fullwidthToHalfwidth t1;
fullwidthToHalfwidth t2; halfwidthToFullwidth t2;
return transliteration_Ignore::transliterateRange(str1, str2, t1, t2); return transliteration_Ignore::transliterateRange(str1, str2, t1, t2);
} }
...@@ -65,7 +54,7 @@ ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 ) ...@@ -65,7 +54,7 @@ ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 )
sal_Unicode SAL_CALL sal_Unicode SAL_CALL
ignoreWidth::transliterateChar2Char( sal_Unicode inChar) throw(RuntimeException, MultipleCharsOutputException) ignoreWidth::transliterateChar2Char( sal_Unicode inChar) throw(RuntimeException, MultipleCharsOutputException)
{ {
halfwidthToFullwidth t1; fullwidthToHalfwidth t1;
return t1.transliterateChar2Char(inChar); return t1.transliterateChar2Char(inChar);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment