Kaydet (Commit) 7d3092d7 authored tarafından Eike Rathke's avatar Eike Rathke

Use Welford one-pass variance algorithm for DataPilot / pivot table

... instead of the naïve algorithm. Short test case with 4 values:

10000000007
10000000011
10000000013
10000000017

     Naïve Var: -21845.3333333333
   Welford Var: 17.3333314259847
VAR() two-pass: 17.3333333333333

Change-Id: I2f27ab91166551e96c0e467f41bd6e6d49b50295
Reviewed-on: https://gerrit.libreoffice.org/64993Reviewed-by: 's avatarEike Rathke <erack@redhat.com>
Tested-by: Jenkins
(cherry picked from commit 3c964980)
Reviewed-on: https://gerrit.libreoffice.org/64994
üst 6e4d7556
......@@ -24,6 +24,7 @@
#include "dpfilteredcache.hxx"
#include "calcmacros.hxx"
#include "dpitemdata.hxx"
#include "subtotal.hxx"
#include <com/sun/star/sheet/DataPilotFieldOrientation.hpp>
#include <com/sun/star/sheet/DataPilotFieldReference.hpp>
......@@ -144,18 +145,19 @@ struct ScDPRelativePos
// Possible values for the nCount member:
// (greater than 0 counts the collected values)
const long SC_DPAGG_EMPTY = 0; // empty during data collection
const long SC_DPAGG_DATA_ERROR = -1; // error during data collection
const long SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
const long SC_DPAGG_RESULT_VALID = -3; // valid result calculated
const long SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
const sal_Int64 SC_DPAGG_EMPTY = 0; // empty during data collection
const sal_Int64 SC_DPAGG_DATA_ERROR = -1; // error during data collection
const sal_Int64 SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
const sal_Int64 SC_DPAGG_RESULT_VALID = -3; // valid result calculated
const sal_Int64 SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
class ScDPAggData
{
private:
WelfordRunner maWelford;
double fVal;
double fAux;
long nCount;
sal_Int64 nCount;
std::unique_ptr<ScDPAggData> pChild;
std::vector<double> mSortedValues;
......
......@@ -428,15 +428,7 @@ void ScDPAggData::Update( const ScDPValue& rNext, ScSubTotalFunc eFunc, const Sc
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VAR:
case SUBTOTAL_FUNC_VARP:
{
// fAux is used to sum up squares
if ( !SubTotal::SafePlus( fVal, rNext.mfValue ) )
nCount = -1; // -1 for error
double fAdd = rNext.mfValue;
if ( !SubTotal::SafeMult( fAdd, rNext.mfValue ) ||
!SubTotal::SafePlus( fAux, fAdd ) )
nCount = -1; // -1 for error
}
maWelford.update( rNext.mfValue);
break;
case SUBTOTAL_FUNC_MED:
{
......@@ -486,14 +478,19 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
case SUBTOTAL_FUNC_MED:
case SUBTOTAL_FUNC_MAX:
case SUBTOTAL_FUNC_MIN:
bError = ( nCount <= 0 ); // no data is an error
break;
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VARP:
bError = ( nCount <= 0 ); // no data is an error
assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
case SUBTOTAL_FUNC_STD:
case SUBTOTAL_FUNC_VAR:
bError = ( nCount < 2 ); // need at least 2 values
assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
default:
......@@ -525,23 +522,33 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
fResult = fVal / static_cast<double>(nCount);
break;
//TODO: use safe mul for fVal * fVal
case SUBTOTAL_FUNC_STD:
if ( nCount >= 2 )
fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1));
{
fResult = maWelford.getVarianceSample();
if (fResult < 0.0)
bError = true;
else
fResult = sqrt( fResult);
}
break;
case SUBTOTAL_FUNC_VAR:
if ( nCount >= 2 )
fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1);
fResult = maWelford.getVarianceSample();
break;
case SUBTOTAL_FUNC_STDP:
if ( nCount > 0 )
fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount));
{
fResult = maWelford.getVariancePopulation();
if (fResult < 0.0)
bError = true;
else
fResult = sqrt( fResult);
}
break;
case SUBTOTAL_FUNC_VARP:
if ( nCount > 0 )
fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount);
fResult = maWelford.getVariancePopulation();
break;
case SUBTOTAL_FUNC_MED:
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment