Kaydet (Commit) e18c4c5f authored tarafından Eike Rathke's avatar Eike Rathke

Support occurrence number as REGEX() 4th argument, tdf#113977 follow-up

REGEX( Text ; Expression [ ; [ Replacement ] [ ; Flags|Occurrence ] ] )

REGEX(Text;Expression) extracts the first match of Expression in
Text. If there is no match, #N/A is returned.

REGEX(Text;Expression;Replacement) replaces the first match of
Expression in Text, not extracted. If there is no match, Text is
returned unmodified.

REGEX(Text;Expression;Replacement;"g") replaces all matches of
Expression in Text with Replacement, not extracted. If there is no
match, Text is returned unmodified.

REGEX(Text;Expression;;Occurrence) extracts the n-th match of
Expression in Text. If there is no n-th match, #N/A is returned.
If Occurrence is 0, Text is returned unmodified.

REGEX(Text;Expression;Replacement;Occurrence) replaces the n-th
match of Expression in Text with Replacement, not extracted. If
there is no n-th match, Text is returned unmodified. If Occurrence
is 0, Text is returned unmodified.

Change-Id: Iadb705e4c76415c57bf510489410ec029344cca7
Reviewed-on: https://gerrit.libreoffice.org/64199Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: Jenkins
(cherry picked from commit e3af4947)
Reviewed-on: https://gerrit.libreoffice.org/64219
üst ceef38f0
......@@ -3826,8 +3826,8 @@ const char* SC_OPCODE_REGEX_ARY[] =
NC_("SC_OPCODE_REGEX", "The regular expression pattern to be matched."),
NC_("SC_OPCODE_REGEX", "Replacement"),
NC_("SC_OPCODE_REGEX", "The replacement text and references to capture groups."),
NC_("SC_OPCODE_REGEX", "Flags"),
NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement.")
NC_("SC_OPCODE_REGEX", "Flags or Occurrence"),
NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement. Or number of occurrence to match or replace.")
};
// -=*# Resource for function BASE #*=-
......
......@@ -9226,17 +9226,48 @@ void ScInterpreter::ScSearch()
void ScInterpreter::ScRegex()
{
sal_uInt8 nParamCount = GetByte();
if (MustHaveParamCount( nParamCount, 2, 4))
{
const sal_uInt8 nParamCount = GetByte();
if (!MustHaveParamCount( nParamCount, 2, 4))
return;
// Flags are supported only for replacement, search match flags can be
// individually and much more flexible set in the regular expression
// pattern using (?ismwx-ismwx)
bool bGlobalReplacement = false;
sal_Int32 nOccurrence = 1; // default first occurrence, if any
if (nParamCount == 4)
{
// Argument can be either string or double.
double fOccurrence;
svl::SharedString aFlagsString;
bool bDouble;
if (!IsMissing())
bDouble = GetDoubleOrString( fOccurrence, aFlagsString);
else
{
// For an omitted argument keep the default.
PopError();
bDouble = true;
fOccurrence = nOccurrence;
}
if (nGlobalError != FormulaError::NONE)
{
PushError( nGlobalError);
return;
}
if (bDouble)
{
if (!CheckStringPositionArgument( fOccurrence))
{
PushError( FormulaError::IllegalArgument);
return;
}
nOccurrence = static_cast<sal_Int32>(fOccurrence);
}
else
{
const OUString aFlags( aFlagsString.getString());
// Empty flags string is valid => no flag set.
OUString aFlags( GetString().getString());
if (aFlags.getLength() > 1)
{
// Only one flag supported.
......@@ -9255,14 +9286,17 @@ void ScInterpreter::ScRegex()
}
}
}
}
bool bReplacement = false;
OUString aReplacement;
if (nParamCount >= 3)
{
// A missing argument is not an empty string to replace the match.
if (IsMissing())
Pop();
// nOccurrence==0 forces no replacement, so simply discard the
// argument.
if (IsMissing() || nOccurrence == 0)
PopError();
else
{
aReplacement = GetString().getString();
......@@ -9281,6 +9315,13 @@ void ScInterpreter::ScRegex()
return;
}
// 0-th match or replacement is none, return original string early.
if (nOccurrence == 0)
{
PushString( aText);
return;
}
const icu::UnicodeString aIcuExpression(
reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
UErrorCode status = U_ZERO_ERROR;
......@@ -9293,15 +9334,25 @@ void ScInterpreter::ScRegex()
}
// Guard against pathological patterns, limit steps of engine, see
// https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
aRegexMatcher.setTimeLimit ( 23*1000, status);
aRegexMatcher.setTimeLimit( 23*1000, status);
const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
aRegexMatcher.reset( aIcuText);
if (!bReplacement)
{
// Find first occurrence.
if (!aRegexMatcher.find())
// Find n-th occurrence.
sal_Int32 nCount = 0;
while (aRegexMatcher.find( status) && U_SUCCESS(status) && ++nCount < nOccurrence)
;
if (U_FAILURE(status))
{
// Some error.
PushIllegalArgument();
return;
}
// n-th match found?
if (nCount != nOccurrence)
{
PushError( FormulaError::NotAvailable);
return;
......@@ -9319,14 +9370,34 @@ void ScInterpreter::ScRegex()
return;
}
// Replace first occurrence of match with replacement.
const icu::UnicodeString aIcuReplacement(
reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
icu::UnicodeString aReplaced;
if (bGlobalReplacement)
// Replace all occurrences of match with replacement.
aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status);
else
else if (nOccurrence == 1)
// Replace first occurrence of match with replacement.
aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status);
else
{
// Replace n-th occurrence of match with replacement.
sal_Int32 nCount = 0;
while (aRegexMatcher.find( status) && U_SUCCESS(status))
{
// XXX NOTE: After several RegexMatcher::find() the
// RegexMatcher::appendReplacement() still starts at the
// beginning (or after the last appendReplacement() position
// which is none here) and copies the original text up to the
// current found match and then replaces the found match.
if (++nCount == nOccurrence)
{
aRegexMatcher.appendReplacement( aReplaced, aIcuReplacement, status);
break;
}
}
aRegexMatcher.appendTail( aReplaced);
}
if (U_FAILURE(status))
{
// Some error, e.g. extraneous $1 without group.
......@@ -9335,7 +9406,6 @@ void ScInterpreter::ScRegex()
}
OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
PushString( aResult);
}
}
void ScInterpreter::ScMid()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment