Kaydet (Commit) e3af4947 authored tarafından Eike Rathke's avatar Eike Rathke

Support occurrence number as REGEX() 4th argument, tdf#113977 follow-up

REGEX( Text ; Expression [ ; [ Replacement ] [ ; Flags|Occurrence ] ] )

REGEX(Text;Expression) extracts the first match of Expression in
Text. If there is no match, #N/A is returned.

REGEX(Text;Expression;Replacement) replaces the first match of
Expression in Text, not extracted. If there is no match, Text is
returned unmodified.

REGEX(Text;Expression;Replacement;"g") replaces all matches of
Expression in Text with Replacement, not extracted. If there is no
match, Text is returned unmodified.

REGEX(Text;Expression;;Occurrence) extracts the n-th match of
Expression in Text. If there is no n-th match, #N/A is returned.
If Occurrence is 0, Text is returned unmodified.

REGEX(Text;Expression;Replacement;Occurrence) replaces the n-th
match of Expression in Text with Replacement, not extracted. If
there is no n-th match, Text is returned unmodified. If Occurrence
is 0, Text is returned unmodified.

Change-Id: Iadb705e4c76415c57bf510489410ec029344cca7
Reviewed-on: https://gerrit.libreoffice.org/64199Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: Jenkins
üst 00df4a5a
...@@ -3826,8 +3826,8 @@ const char* SC_OPCODE_REGEX_ARY[] = ...@@ -3826,8 +3826,8 @@ const char* SC_OPCODE_REGEX_ARY[] =
NC_("SC_OPCODE_REGEX", "The regular expression pattern to be matched."), NC_("SC_OPCODE_REGEX", "The regular expression pattern to be matched."),
NC_("SC_OPCODE_REGEX", "Replacement"), NC_("SC_OPCODE_REGEX", "Replacement"),
NC_("SC_OPCODE_REGEX", "The replacement text and references to capture groups."), NC_("SC_OPCODE_REGEX", "The replacement text and references to capture groups."),
NC_("SC_OPCODE_REGEX", "Flags"), NC_("SC_OPCODE_REGEX", "Flags or Occurrence"),
NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement.") NC_("SC_OPCODE_REGEX", "Text specifying option flags, \"g\" for global replacement. Or number of occurrence to match or replace.")
}; };
// -=*# Resource for function BASE #*=- // -=*# Resource for function BASE #*=-
......
...@@ -9226,17 +9226,48 @@ void ScInterpreter::ScSearch() ...@@ -9226,17 +9226,48 @@ void ScInterpreter::ScSearch()
void ScInterpreter::ScRegex() void ScInterpreter::ScRegex()
{ {
sal_uInt8 nParamCount = GetByte(); const sal_uInt8 nParamCount = GetByte();
if (MustHaveParamCount( nParamCount, 2, 4)) if (!MustHaveParamCount( nParamCount, 2, 4))
return;
// Flags are supported only for replacement, search match flags can be
// individually and much more flexible set in the regular expression
// pattern using (?ismwx-ismwx)
bool bGlobalReplacement = false;
sal_Int32 nOccurrence = 1; // default first occurrence, if any
if (nParamCount == 4)
{ {
// Flags are supported only for replacement, search match flags can be // Argument can be either string or double.
// individually and much more flexible set in the regular expression double fOccurrence;
// pattern using (?ismwx-ismwx) svl::SharedString aFlagsString;
bool bGlobalReplacement = false; bool bDouble;
if (nParamCount == 4) if (!IsMissing())
bDouble = GetDoubleOrString( fOccurrence, aFlagsString);
else
{
// For an omitted argument keep the default.
PopError();
bDouble = true;
fOccurrence = nOccurrence;
}
if (nGlobalError != FormulaError::NONE)
{
PushError( nGlobalError);
return;
}
if (bDouble)
{
if (!CheckStringPositionArgument( fOccurrence))
{
PushError( FormulaError::IllegalArgument);
return;
}
nOccurrence = static_cast<sal_Int32>(fOccurrence);
}
else
{ {
const OUString aFlags( aFlagsString.getString());
// Empty flags string is valid => no flag set. // Empty flags string is valid => no flag set.
OUString aFlags( GetString().getString());
if (aFlags.getLength() > 1) if (aFlags.getLength() > 1)
{ {
// Only one flag supported. // Only one flag supported.
...@@ -9255,87 +9286,126 @@ void ScInterpreter::ScRegex() ...@@ -9255,87 +9286,126 @@ void ScInterpreter::ScRegex()
} }
} }
} }
}
bool bReplacement = false; bool bReplacement = false;
OUString aReplacement; OUString aReplacement;
if (nParamCount >= 3) if (nParamCount >= 3)
{
// A missing argument is not an empty string to replace the match.
// nOccurrence==0 forces no replacement, so simply discard the
// argument.
if (IsMissing() || nOccurrence == 0)
PopError();
else
{ {
// A missing argument is not an empty string to replace the match. aReplacement = GetString().getString();
if (IsMissing()) bReplacement = true;
Pop();
else
{
aReplacement = GetString().getString();
bReplacement = true;
}
} }
// If bGlobalReplacement==true and bReplacement==false then }
// bGlobalReplacement is silently ignored. // If bGlobalReplacement==true and bReplacement==false then
// bGlobalReplacement is silently ignored.
OUString aExpression = GetString().getString(); OUString aExpression = GetString().getString();
OUString aText = GetString().getString(); OUString aText = GetString().getString();
if (nGlobalError != FormulaError::NONE) if (nGlobalError != FormulaError::NONE)
{ {
PushError( nGlobalError); PushError( nGlobalError);
return; return;
} }
const icu::UnicodeString aIcuExpression( // 0-th match or replacement is none, return original string early.
reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength()); if (nOccurrence == 0)
UErrorCode status = U_ZERO_ERROR; {
icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status); PushString( aText);
return;
}
const icu::UnicodeString aIcuExpression(
reinterpret_cast<const UChar*>(aExpression.getStr()), aExpression.getLength());
UErrorCode status = U_ZERO_ERROR;
icu::RegexMatcher aRegexMatcher( aIcuExpression, 0, status);
if (U_FAILURE(status))
{
// Invalid regex.
PushIllegalArgument();
return;
}
// Guard against pathological patterns, limit steps of engine, see
// https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
aRegexMatcher.setTimeLimit( 23*1000, status);
const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
aRegexMatcher.reset( aIcuText);
if (!bReplacement)
{
// Find n-th occurrence.
sal_Int32 nCount = 0;
while (aRegexMatcher.find( status) && U_SUCCESS(status) && ++nCount < nOccurrence)
;
if (U_FAILURE(status)) if (U_FAILURE(status))
{ {
// Invalid regex. // Some error.
PushIllegalArgument(); PushIllegalArgument();
return; return;
} }
// Guard against pathological patterns, limit steps of engine, see // n-th match found?
// https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00 if (nCount != nOccurrence)
aRegexMatcher.setTimeLimit ( 23*1000, status);
const icu::UnicodeString aIcuText( reinterpret_cast<const UChar*>(aText.getStr()), aText.getLength());
aRegexMatcher.reset( aIcuText);
if (!bReplacement)
{ {
// Find first occurrence. PushError( FormulaError::NotAvailable);
if (!aRegexMatcher.find())
{
PushError( FormulaError::NotAvailable);
return;
}
// Extract matched text.
icu::UnicodeString aMatch( aRegexMatcher.group( status));
if (U_FAILURE(status))
{
// Some error.
PushIllegalArgument();
return;
}
OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
PushString( aResult);
return; return;
} }
// Extract matched text.
// Replace first occurrence of match with replacement. icu::UnicodeString aMatch( aRegexMatcher.group( status));
const icu::UnicodeString aIcuReplacement(
reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
icu::UnicodeString aReplaced;
if (bGlobalReplacement)
aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status);
else
aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status);
if (U_FAILURE(status)) if (U_FAILURE(status))
{ {
// Some error, e.g. extraneous $1 without group. // Some error.
PushIllegalArgument(); PushIllegalArgument();
return; return;
} }
OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length()); OUString aResult( reinterpret_cast<const sal_Unicode*>(aMatch.getBuffer()), aMatch.length());
PushString( aResult); PushString( aResult);
return;
}
const icu::UnicodeString aIcuReplacement(
reinterpret_cast<const UChar*>(aReplacement.getStr()), aReplacement.getLength());
icu::UnicodeString aReplaced;
if (bGlobalReplacement)
// Replace all occurrences of match with replacement.
aReplaced = aRegexMatcher.replaceAll( aIcuReplacement, status);
else if (nOccurrence == 1)
// Replace first occurrence of match with replacement.
aReplaced = aRegexMatcher.replaceFirst( aIcuReplacement, status);
else
{
// Replace n-th occurrence of match with replacement.
sal_Int32 nCount = 0;
while (aRegexMatcher.find( status) && U_SUCCESS(status))
{
// XXX NOTE: After several RegexMatcher::find() the
// RegexMatcher::appendReplacement() still starts at the
// beginning (or after the last appendReplacement() position
// which is none here) and copies the original text up to the
// current found match and then replaces the found match.
if (++nCount == nOccurrence)
{
aRegexMatcher.appendReplacement( aReplaced, aIcuReplacement, status);
break;
}
}
aRegexMatcher.appendTail( aReplaced);
}
if (U_FAILURE(status))
{
// Some error, e.g. extraneous $1 without group.
PushIllegalArgument();
return;
} }
OUString aResult( reinterpret_cast<const sal_Unicode*>(aReplaced.getBuffer()), aReplaced.length());
PushString( aResult);
} }
void ScInterpreter::ScMid() void ScInterpreter::ScMid()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment