Kaydet (Commit) e791fbfc authored tarafından Haidong Lian's avatar Haidong Lian Kaydeden (comit) Kohei Yoshida

Patch for milestone1-0829-v4.

1. Add the parser based on RPN;
2. For test sample1 named "ground-water-daily.xls", using the compound formula
   to do calculation;
Add the compound kernels:
Formulae include "AVERAGE,MAX and MIN".Compound formulae include "AVERAGE
-(+,*,/)","MAX -(+,*,/)" and "MIN -(+,*,/)";
3. For formulae which do not work in GPU, they'll work in CPU;
4. For compound operators(-,+,*,/), they'll be calculated one by one in GPU as
the sequence of RPN;
5. Add the start and end position to fit for the sliding window;
6. Modify kernels by using vector for AMD GPU.

Conflicts:
	sc/source/core/opencl/formulagroupcl.cxx
	sc/source/core/opencl/openclwrapper.cxx

Change-Id: I6157008575ce89ddd3e7bf552a87812474af4125
üst ccf7b15c
......@@ -19,6 +19,10 @@
#include "openclwrapper.hxx"
#define SRCDATASIZE 100
#define SINGLEARRAYLEN 100
#define DOUBLEARRAYLEN 100
#define SVDOUBLELEN 100
namespace sc {
// A single public entry point for a factory function:
......@@ -38,447 +42,918 @@ double getTimeDiff(const TimeValue& t1, const TimeValue& t2)
}//dbg-t
TimeValue aTimeBefore, aTimeAfter;
///////////////////////////////////////
class SourceData
{
const double *mdpSrcPtr;
unsigned int mnDataSize;
const char *mcpFormulaName;
unsigned int mnCol;
int eOp;
public:
SourceData( const double *dpData, unsigned int nSize, uint nCol = 1,const char *cpFormulaName = NULL):mdpSrcPtr(dpData),mnDataSize(nSize),mcpFormulaName(cpFormulaName),mnCol(nCol)
{
}
SourceData():mdpSrcPtr(NULL),mnDataSize(0)
{
}
void setSrcPtr( const double *dpTmpDataPtr)
{
mdpSrcPtr = dpTmpDataPtr;
}
void setSrcSize( int nSize )
{
mnDataSize = nSize;
}
const double * getDouleData()
{
return mdpSrcPtr;
}
unsigned int getDataSize()
{
return mnDataSize;
}
void print()
{
for( uint i=0; i<mnDataSize; i++ )
printf( " The SourceData is %f and data size is %d\n",mdpSrcPtr[i],mnDataSize );
}
void printFormula()
{
printf("--------The formulaname is %s and the eOp is %d---------\n",mcpFormulaName,eOp);
}
void setFormulaName(const char *cpFormulaName)
{
this->mcpFormulaName = cpFormulaName;
}
const char *getFormulaName()
{
return mcpFormulaName;
}
void seteOp(int op)
{
this->eOp = op;
}
int geteOp()
{
return eOp;
}
int getColNum()
{
return mnCol;
}
};
class FormulaGroupInterpreterOpenCL : public FormulaGroupInterpreterSoftware
{
SourceData *mSrcDataStack[SRCDATASIZE];
unsigned int mnStackPointer,mnDoublePtrCount;
uint * mnpOclStartPos;
uint * mnpOclEndPos;
SingleVectorFormula *mSingleArray[SINGLEARRAYLEN];
DoubleVectorFormula *mDoubleArray[DOUBLEARRAYLEN];
double mdpSvdouble[SVDOUBLELEN];
double *mdpSrcDoublePtr[SVDOUBLELEN];
uint mnSingleCount;
uint mnDoubleCount;
uint mnSvDoubleCount;
uint mnOperatorGroup[100];
uint mnOperatorCount;
char mcHostName[100];
uint mnPositonLen;
size_t mnRowSize;
public:
FormulaGroupInterpreterOpenCL() :
FormulaGroupInterpreterSoftware()
{
OclCalc::InitEnv();
mnStackPointer = 0;
mnpOclEndPos = NULL;
mnpOclStartPos = NULL;
mnSingleCount = 0;
mnDoubleCount = 0;
mnSvDoubleCount = 0;
mnOperatorCount = 0;
mnPositonLen = 0;
mnDoublePtrCount = 0;
OclCalc::initEnv();
}
virtual ~FormulaGroupInterpreterOpenCL()
{
OclCalc::ReleaseOpenclRunEnv();
OclCalc::releaseOpenclRunEnv();
}
virtual ScMatrixRef inverseMatrix(const ScMatrix& rMat);
virtual bool interpret(ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode);
};
ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix(const ScMatrix& rMat)
{
SCSIZE nC, nR;
rMat.GetDimensions(nC, nR);
if (nC != nR || nC == 0)
// Input matrix must be square. Return an empty matrix on failure and
// the caller will calculate it via CPU.
return ScMatrixRef();
// This vector will contain a series of doubles from the first column to
// the last, chained together in a single array.
std::vector<double> aDoubles;
rMat.GetDoubleArray(aDoubles);
float * fpOclMatrixSrc = NULL;
float * fpOclMatrixDst = NULL;
double * dpOclMatrixSrc = NULL;
double * dpOclMatrixDst = NULL;
uint nMatrixSize = nC * nR;
static OclCalc aOclCalc;
if ( aOclCalc.GetOpenclState() )
virtual ScMatrixRef inverseMatrix( const ScMatrix& rMat );
virtual bool interpret( ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode );
void collectDoublePointers( double *temp )
{
if ( aOclCalc.gpuEnv.mnKhrFp64Flag == 1 || aOclCalc.gpuEnv.mnAmdFp64Flag == 1 )
if( mnDoublePtrCount < SRCDATASIZE )
{
aOclCalc.CreateBuffer64Bits( dpOclMatrixSrc, dpOclMatrixDst, nMatrixSize );
for ( uint i = 0; i < nC; i++ )
for ( uint j = 0; j < nR; j++ )
dpOclMatrixSrc[i*nC+j] = aDoubles[j*nR+i];
aOclCalc.OclHostMatrixInverse64Bits( "oclFormulaMtxInv", dpOclMatrixSrc, dpOclMatrixDst,aDoubles, nR );
mdpSrcDoublePtr[mnDoublePtrCount++] = temp;
}
else
{
aOclCalc.CreateBuffer32Bits( fpOclMatrixSrc, fpOclMatrixDst, nMatrixSize );
for ( uint i = 0; i < nC; i++ )
for ( uint j = 0; j < nR; j++ )
fpOclMatrixSrc[i*nC+j] = (float) aDoubles[j*nR+i];
aOclCalc.OclHostMatrixInverse32Bits( "oclFormulaMtxInv", fpOclMatrixSrc, fpOclMatrixDst, aDoubles, nR );
printf( "The mdpSrcDoublePtr is full now.\n" );
double *dtmp = NULL;
if ( (dtmp = mdpSrcDoublePtr[--mnDoublePtrCount]) != NULL )
{
free( dtmp );
dtmp = NULL;
}
}
}
// TODO: Inverse this matrix and put the result back into xInv. Right now,
// I'll just put the original, non-inversed matrix values back, just to
// demonstrate how to put the values back after inversion. There are two
// ways to put the values back (depending on what the GPU output is).
ScMatrixRef xInv(new ScMatrix(nR, nR, 0.0));
#if 0
// One way is to put the whole value as one array. This method assumes
// that the array size equals column x row, and is oriented column-wise.
// This method is slightly more efficient than the second, but I wouldn't
// expect too much of a difference.
xInv->PutDouble(&aDoubles[0], aDoubles.size(), 0, 0);
#else
// Another way is to put the values one column at a time.
const double* p = &aDoubles[0];
for (SCSIZE i = 0; i < nC; ++i)
void freeDoublePointers()
{
xInv->PutDouble(p, nR, i, 0);
p += nR;
while( mnDoublePtrCount > 0 )
{
double *dtmp = NULL;
if ( (dtmp = mdpSrcDoublePtr[--mnDoublePtrCount]) != NULL )
{
free( dtmp );
dtmp = NULL;
}
}
}
#endif
return xInv;
}
bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode)
{
generateRPNCode(rDoc, rTopPos, rCode);
size_t rowSize = xGroup->mnLength;
fprintf(stderr,"rowSize at begin is ...%ld.\n",(long)rowSize);
// The row quantity can be gotten from p2->GetArrayLength()
uint nCount1 = 0, nCount2 = 0, nCount3 = 0;
int nOclOp = 0;
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc(sizeof(double) * rowSize*2);// For 2 columns(B,C)
if(NULL==rResult)
void srdDataPush( SourceData *temp )
{
printf("malloc err\n");
return false;
if( mnStackPointer < SRCDATASIZE )
{
mSrcDataStack[mnStackPointer++] = temp;
}
memset(rResult,0,rowSize);
float * fpOclSrcData = NULL; // Point to the input data from CPU
double * dpOclSrcData = NULL;
uint * npOclStartPos = NULL; // The first position for calculation,for example,the A1 in (=MAX(A1:A100))
uint * npOclEndPos = NULL; // The last position for calculation,for example, the A100 in (=MAX(A1:A100))
float * fpLeftData = NULL; // Left input for binary operator(+,-,*,/),for example,(=leftData+rightData)
float * fpRightData = NULL; // Right input for binary operator(+,-,*,/),for example,(=leftData/rightData)
// The rightData can't be zero for "/"
double * dpLeftData = NULL;
double * dpRightData = NULL;
float * fpSaveData=NULL; //It is a temp pointer point the preparing memory;
float * fpSumProMergeLfData = NULL; //It merge the more col to one col is the left operator
float * fpSumProMergeRtData = NULL; //It merge the more col to one col is the right operator
double * dpSaveData=NULL;
double * dpSumProMergeLfData = NULL;
double * dpSumProMergeRtData = NULL;
uint * npSumSize=NULL; //It is a array to save the matix sizt(col *row)
int nSumproductSize=0; //It is the merge array size
bool aIsAlloc=false; //It is a flag to judge the fpSumProMergeLfData existed
unsigned int nCountMatix=0; //It is a count to save the calculate times
static OclCalc ocl_calc;
bool isSumProduct=false;
if(ocl_calc.GetOpenclState())
else
printf( "The stack is full now.\n" );
}
SourceData *srdDataPop( void )
{
// Don't know how large the size will be applied previously, so create them as the rowSize or 65536
// Don't know which formulae will be used previously, so create buffers for different formulae used probably
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
if( mnStackPointer <= 0 )
{
ocl_calc.CreateBuffer64Bits(dpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
ocl_calc.CreateBuffer64Bits(dpLeftData,dpRightData,rowSize);
printf( "The stack was empty\n" );
return NULL;
}
else
return mSrcDataStack[--mnStackPointer];
}
unsigned int getDataSize()
{
ocl_calc.CreateBuffer32Bits(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
ocl_calc.CreateBuffer32Bits(fpLeftData,fpRightData,rowSize);
return mnStackPointer;
}
//printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
void printStackInfo()
{
printf( "/********The stack size is %d*********\\\n",mnStackPointer );
for ( int i = mnStackPointer - 1; i >= 0; i-- )
mSrcDataStack[i]->print();
}
///////////////////////////////////////////////////////////////////////////////////////////
// Until we implement group calculation for real, decompose the group into
// individual formula token arrays for individual calculation.
ScAddress aTmpPos = rTopPos;
for (sal_Int32 i = 0; i < xGroup->mnLength; ++i)
bool getPosition(const ScTokenArray& rCode,const ScFormulaCellGroupRef& xGroup,uint nRowSize,uint *&npOclStartPos,uint *&npOclEndPos,uint *nPositonLen);
bool chooseFunction(OclCalc &ocl_calc,double *&dpResult);
bool isStockHistory();
bool isGroundWater();
};
bool FormulaGroupInterpreterOpenCL::getPosition(const ScTokenArray& rCode,const ScFormulaCellGroupRef& xGroup,uint nRowSize,uint *&npOclStartPos,uint *&npOclEndPos,uint *nPositonLen)
{
uint nColPosition = 0;
ScTokenArray * rCodePos = rCode.Clone();
static int nCountPosSize = nRowSize;
bool isAllocFormulaOclBuf = true;
for ( const formula::FormulaToken* p = rCodePos->First(); p; p = rCodePos->Next() )
{
aTmpPos.SetRow(xGroup->mnStart + i);
ScTokenArray aCode2;
for (const formula::FormulaToken* p = rCode.First(); p; p = rCode.Next())
switch ( p->GetType() )
{
switch (p->GetType())
case formula::svDoubleVectorRef:
{
case formula::svSingleVectorRef:
nColPosition++;
break;
}
}
}
int nPositionSize = nColPosition * nRowSize;
npOclStartPos = (unsigned int*) malloc( nPositionSize * sizeof(unsigned int) );
npOclEndPos = (unsigned int*) malloc( nPositionSize * sizeof(unsigned int) );
if ( nCountPosSize < nPositionSize )
{
const formula::SingleVectorRefToken* p2 = static_cast<const formula::SingleVectorRefToken*>(p);
const double* pArray = p2->GetArray();
aCode2.AddDouble(static_cast<size_t>(i) < p2->GetArrayLength() ? pArray[i] : 0.0);
nCountPosSize = nPositionSize;
isAllocFormulaOclBuf = false;
}
break;
for ( sal_Int32 i = 0; i < xGroup->mnLength; ++i )
{
ScTokenArray * rCodeTemp = rCode.Clone();
int j = 0;
for ( const formula::FormulaToken* p = rCodeTemp->First(); p; p = rCodeTemp->Next() )
{
switch (p->GetType())
{
case formula::svDoubleVectorRef:
{
const formula::DoubleVectorRefToken* p2 = static_cast<const formula::DoubleVectorRefToken*>(p);
const std::vector<const double*>& rArrays = p2->GetArrays();
size_t nColSize = rArrays.size();
size_t nRowStart = p2->IsStartFixed() ? 0 : i;
size_t nRowEnd = p2->GetRefRowSize() - 1;
if (!p2->IsEndFixed())
nRowEnd += i;
size_t nRowSize = nRowEnd - nRowStart + 1;
//store the a matix`s rowsize and colsize,use it to calculate the matix`s size
ocl_calc.nFormulaRowSize = nRowSize;
ocl_calc.nFormulaColSize = nColSize;
ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
if(ocl_calc.GetOpenclState())
{
npOclStartPos[i] = nRowStart; // record the start position
npOclEndPos[i] = nRowEnd; // record the end position
}
int nTempOpcode;
const formula::FormulaToken* pTemp = p;
pTemp=aCode2.Next();
nTempOpcode=pTemp->GetOpCode();
while(1)
{
nTempOpcode=pTemp->GetOpCode();
if(nTempOpcode!=ocOpen && nTempOpcode!=ocPush)
break;
pTemp=aCode2.Next();
npOclStartPos[j*nRowSize+i] = nRowStart;//record the start position
npOclEndPos[j*nRowSize+i] = nRowEnd;//record the end position
j++;
}
}
}
if((!aIsAlloc) && (ocl_calc.GetOpenclState())&& (nTempOpcode == ocSumProduct))
}
*nPositonLen = nPositionSize;
//Now the pos array is 0 1 2 3 4 5 0 1 2 3 4 5;
return isAllocFormulaOclBuf;
}
bool FormulaGroupInterpreterOpenCL::isStockHistory()
{
bool isHistory = false;
if( (mnOperatorGroup[0]== 224) && (mnOperatorGroup[1]== 227) && (mnOperatorGroup[2]== 41) && (mnOperatorGroup[3]== 43) && (mnOperatorGroup[4]== 41) )
{
//nColSize * rowSize is the data size , but except the the head of data will use less the nRowSize
//the other all use nRowSize times . and it must aligen so add nRowSize-1.
nSumproductSize = nRowSize+nColSize * rowSize*nRowSize-1;
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
ocl_calc.CreateBuffer64Bits(dpSumProMergeLfData,dpSumProMergeRtData,npSumSize,nSumproductSize,rowSize);
else
ocl_calc.CreateBuffer32Bits(fpSumProMergeLfData,fpSumProMergeRtData,npSumSize,nSumproductSize,rowSize);
aIsAlloc = true;
isSumProduct=true;
strcpy(mcHostName,"OclOperationColumnN");
isHistory = true;
}
if(isSumProduct)
else if( (mnOperatorGroup[0] == 226) && (mnOperatorGroup[1] == 42) )
{
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
strcpy(mcHostName,"OclOperationColumnH");
isHistory = true;
}
else if((mnOperatorGroup[0] == 213) && (mnOperatorGroup[1] == 43) && (mnOperatorGroup[2] == 42) )
{
if(nCountMatix%2==0)
dpSaveData = dpSumProMergeLfData;
else
dpSaveData = dpSumProMergeRtData;
strcpy(mcHostName,"OclOperationColumnJ");
isHistory = true;
}
else
return isHistory;
}
bool FormulaGroupInterpreterOpenCL::isGroundWater()
{
bool GroundWater=false;
if((mnOperatorGroup[0] == ocAverage && 1 == mnSingleCount )||(mnOperatorGroup[0] == ocMax && 1 == mnSingleCount )||
(mnOperatorGroup[0] == ocMin && 1 == mnSingleCount )||(mnOperatorGroup[0] == ocSub && mnSvDoubleCount==1))
{
if(nCountMatix%2==0)
fpSaveData = fpSumProMergeLfData;
else
fpSaveData = fpSumProMergeRtData;
GroundWater = true;
}
return GroundWater;
}
bool FormulaGroupInterpreterOpenCL::chooseFunction( OclCalc &ocl_calc, double *&dpResult )
{
const double * dpOclSrcData = NULL;
unsigned int nSrcDataSize = 0;
const double *dpLeftData = NULL;
const double *dpRightData = NULL;
if((mnOperatorGroup[0] == ocAverage && 1 == mnSingleCount )||(mnOperatorGroup[0] == ocMax && 1 == mnSingleCount )||
(mnOperatorGroup[0] == ocMin && 1 == mnSingleCount )||(mnOperatorGroup[0] == ocSub && mnSvDoubleCount==1))
{
double delta = 0.0;
const double *pArrayToSubtractOneElementFrom;
const double *pGroundWaterDataArray;
uint nSrcData = 0;
if( mnSvDoubleCount!=1 )
{
pArrayToSubtractOneElementFrom= mSingleArray[0]->mdpInputLeftData;
pGroundWaterDataArray= mDoubleArray[0]->mdpInputData;
nSrcData = mDoubleArray[0]->mnInputDataSize;
}
for (size_t nCol = 0; nCol < nColSize; ++nCol)
else
{
const double* pArray = rArrays[nCol];
if( NULL==pArray )
pArrayToSubtractOneElementFrom= mSingleArray[0]->mdpInputLeftData;
pGroundWaterDataArray=NULL;
delta = mdpSvdouble[0];
}
ocl_calc.oclGroundWaterGroup( mnOperatorGroup,mnOperatorCount,pGroundWaterDataArray,pArrayToSubtractOneElementFrom,nSrcData,mnRowSize,delta,mnpOclStartPos,mnpOclEndPos,dpResult);
}
else if( isStockHistory() )
{
fprintf(stderr,"Error: pArray is NULL!\n");
free(rResult);
return false;
}
if(ocl_calc.GetOpenclState())
else if(((mnSvDoubleCount==0)&&(mnSingleCount==0)&&(mnDoubleCount==1)) &&
((mnOperatorGroup[0] == ocAverage)||(mnOperatorGroup[0] == ocMax)||(mnOperatorGroup[0] == ocMin)))
{
for( size_t u=nRowStart; u<=nRowEnd; u++ )
if(mnOperatorGroup[0] == ocAverage)
strcpy(mcHostName,"oclFormulaAverage");
if(mnOperatorGroup[0] == ocMax)
strcpy(mcHostName,"oclFormulaMax");
if(mnOperatorGroup[0] == ocMin)
strcpy(mcHostName,"oclFormulaMin");
DoubleVectorFormula * doubleTemp = mDoubleArray[--mnDoubleCount];
nSrcDataSize = doubleTemp->mnInputDataSize;
dpOclSrcData = doubleTemp->mdpInputData;
if ( ocl_calc.getOpenclState())
{
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
if ( ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag == 1 )
{
dpOclSrcData[u] = pArray[u];
//fprintf(stderr,"dpOclSrcData[%d] is %f.\n",u,dpOclSrcData[u]);
if(isSumProduct)
dpSaveData[u+nRowSize*nCol + nRowStart* nColSize * nRowSize-nRowStart] = pArray[u];
ocl_calc.createFormulaBuf64Bits( nSrcDataSize, mnRowSize );
ocl_calc.mapAndCopy64Bits( dpOclSrcData,mnpOclStartPos,mnpOclEndPos,nSrcDataSize,mnRowSize );
ocl_calc.oclHostFormulaStatistics64Bits( mcHostName, dpResult, mnRowSize );
}
else
{
// Many video cards can't support double type in kernel, so need transfer the double to float
fpOclSrcData[u] = (float)pArray[u];
//fprintf(stderr,"fpOclSrcData[%d] is %f.\n",u,fpOclSrcData[u]);
if(isSumProduct)
fpSaveData[u+nRowSize*nCol + nRowStart* nColSize * nRowSize-nRowStart] = (float)pArray[u];
ocl_calc.createFormulaBuf32Bits( nSrcDataSize, mnPositonLen );
ocl_calc.mapAndCopy32Bits( dpOclSrcData, mnpOclStartPos, mnpOclEndPos, nSrcDataSize, mnRowSize);
ocl_calc.oclHostFormulaStatistics32Bits( mcHostName, dpResult, mnRowSize );
}
}
}
for (size_t nRow = 0; nRow < nRowSize; ++nRow)
else if((mnSvDoubleCount==0)&&(mnSingleCount==1)&&(mnDoubleCount==0))
{
if (nRowStart + nRow < p2->GetArrayLength())
dpLeftData = mSingleArray[0]->mdpInputLeftData;
dpRightData = mSingleArray[0]->mdpInputRightData;
if(mnOperatorGroup[0] == ocAdd)
strcpy(mcHostName,"oclSignedAdd");
if(mnOperatorGroup[0] == ocSub)
strcpy(mcHostName,"oclSignedSub");
if(mnOperatorGroup[0] == ocMul)
strcpy(mcHostName,"oclSignedMul");
if(mnOperatorGroup[0] == ocDiv)
strcpy(mcHostName,"oclSignedDiv");
if ( ocl_calc.getOpenclState())
{
double fVal = pArray[nRowStart+nRow];
pMat->PutDouble(fVal, nCol, nRow);
}
}
if ( ocl_calc.gpuEnv.mnKhrFp64Flag == 1 || ocl_calc.gpuEnv.mnAmdFp64Flag == 1 )
{
ocl_calc.createArithmeticOptBuf64Bits( mnRowSize );
ocl_calc.mapAndCopy64Bits(dpLeftData,dpRightData,mnRowSize);
ocl_calc.oclHostArithmeticOperator64Bits( mcHostName,dpResult,mnRowSize );
}
ScMatrixToken aTok(pMat);
aCode2.AddToken(aTok);
if(isSumProduct)
else
{
npSumSize[nCountMatix/2] =nRowSize*nColSize;
nCountMatix++;
ocl_calc.createArithmeticOptBuf32Bits( mnRowSize );
ocl_calc.mapAndCopy32Bits(dpLeftData,dpRightData,mnRowSize);
ocl_calc.oclHostArithmeticOperator32Bits( mcHostName,dpResult,mnRowSize );
}
}
break;
default:
aCode2.AddToken(*p);
}
else if( (mnSingleCount>1) && (mnSvDoubleCount==0) && (mnDoubleCount==0) )
{
const double* dpArray[100] = {};
int j=0;
for( uint i = 0; i < mnSingleCount; i++ )
{
dpArray[j++] = mSingleArray[i]->mdpInputLeftData;
if( NULL != mSingleArray[i]->mdpInputRightData )
dpArray[j++] = mSingleArray[i]->mdpInputRightData;
}
ScFormulaCell* pDest = rDoc.GetFormulaCell(aTmpPos);
if (!pDest)
double *dpMoreColArithmetic = (double *)malloc( sizeof(double) * j * mnRowSize );
if( NULL == dpMoreColArithmetic )
{
free(rResult);
printf("Memory alloc error!\n");
return false;
}
if(ocl_calc.GetOpenclState())
{
const formula::FormulaToken *pCur = aCode2.First();
aCode2.Reset();
while( ( pCur = aCode2.Next() ) != NULL )
for( uint i = 0; i < j*mnRowSize; i++ )
{
OpCode eOp = pCur->GetOpCode();
if(eOp==0)
dpMoreColArithmetic[i] = dpArray[i/mnRowSize][i%mnRowSize];
}
if ( ocl_calc.getOpenclState())
{
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
if ( ocl_calc.gpuEnv.mnKhrFp64Flag == 1 || ocl_calc.gpuEnv.mnAmdFp64Flag == 1 )
{
if(nCount3%2==0)
dpLeftData[nCount1++] = pCur->GetDouble();
else
dpRightData[nCount2++] = pCur->GetDouble();
nCount3++;
ocl_calc.createMoreColArithmeticBuf64Bits( j * mnRowSize, mnOperatorCount );
ocl_calc.mapAndCopyMoreColArithmetic64Bits( dpMoreColArithmetic, mnRowSize * j, mnOperatorGroup, mnOperatorCount );
ocl_calc.oclMoreColHostArithmeticOperator64Bits( mnRowSize, mnOperatorCount, dpResult,mnRowSize );
}
else
{
if(nCount3%2==0)
fpLeftData[nCount1++] = (float)pCur->GetDouble();
else
fpRightData[nCount2++] = (float)pCur->GetDouble();
nCount3++;
}
ocl_calc.createMoreColArithmeticBuf32Bits( j* mnRowSize, mnOperatorCount );
ocl_calc.mapAndCopyMoreColArithmetic32Bits(dpMoreColArithmetic, mnRowSize * j, mnOperatorGroup, mnOperatorCount);
ocl_calc.oclMoreColHostArithmeticOperator32Bits( mnRowSize, mnOperatorCount, dpResult, mnRowSize );
}
else if( eOp!=ocOpen && eOp!=ocClose &&eOp != ocSep)
nOclOp = eOp;
// if(count1>0){//dbg
// fprintf(stderr,"leftData is %f.\n",fpLeftData[count1-1]);
// count1--;
// }
// if(count2>0){//dbg
// fprintf(stderr,"rightData is %f.\n",fpRightData[count2-1]);
// count2--;
// }
}
}
if(!getenv("SC_GPU")||!ocl_calc.GetOpenclState())
else
{
//fprintf(stderr,"ccCPU flow...\n\n");
generateRPNCode(rDoc, aTmpPos, aCode2);
ScInterpreter aInterpreter(pDest, &rDoc, aTmpPos, aCode2);
aInterpreter.Interpret();
pDest->SetResultToken(aInterpreter.GetResultToken().get());
pDest->ResetDirty();
pDest->SetChanged(true);
return false;
}
} // for loop end (xGroup->mnLength)
return true;
}
// For GPU calculation
if(getenv("SC_GPU")&&ocl_calc.GetOpenclState())
{
fprintf(stderr,"ggGPU flow...\n\n");
printf(" oclOp is... %d\n",nOclOp);
osl_getSystemTime(&aTimeBefore); //timer
if(ocl_calc.gpuEnv.mnKhrFp64Flag==1 || ocl_calc.gpuEnv.mnAmdFp64Flag==1)
class agency
{
public:
double *calculate(int nOclOp,int rowSize,OclCalc &ocl_calc,uint *npOclStartPos,uint *npOclEndPos,FormulaGroupInterpreterOpenCL *formulaInterprt);
};
double * agency::calculate( int nOclOp,int rowSize,OclCalc &ocl_calc,uint *npOclStartPos,uint *npOclEndPos,FormulaGroupInterpreterOpenCL *formulaInterprt)
{
const double *dpLeftData = NULL;
const double *dpRightData = NULL;
const double *dpOclSrcData=NULL;
if ( ocl_calc.gpuEnv.mnKhrFp64Flag == 1 || ocl_calc.gpuEnv.mnAmdFp64Flag == 1 )
{
fprintf(stderr,"ggGPU double precision flow...\n\n");
//double precision
switch(nOclOp)
switch( nOclOp )
{
case ocAdd:
ocl_calc.OclHostArithmeticOperator64Bits("oclSignedAdd",dpLeftData,dpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset(rResult,0,rowSize);
ocl_calc.oclHostArithmeticStash64Bits( "oclSignedAdd",dpLeftData,dpRightData,rResult,temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData( rResult,nDataSize ) );
break;
}
case ocSub:
ocl_calc.OclHostArithmeticOperator64Bits("oclSignedSub",dpLeftData,dpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = ( double * )malloc( sizeof(double) * nDataSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostArithmeticStash64Bits( "oclSignedSub",dpLeftData,dpRightData,rResult,temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData(rResult,nDataSize) );
break;
}
case ocMul:
ocl_calc.OclHostArithmeticOperator64Bits("oclSignedMul",dpLeftData,dpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostArithmeticStash64Bits( "oclSignedMul",dpLeftData,dpRightData,rResult,temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData( rResult,nDataSize ) );
break;
}
case ocDiv:
ocl_calc.OclHostArithmeticOperator64Bits("oclSignedDiv",dpLeftData,dpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = ( double * )malloc( sizeof(double) * nDataSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostArithmeticStash64Bits( "oclSignedDiv",dpLeftData,dpRightData,rResult,temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData( rResult,nDataSize ) );
break;
}
case ocMax:
ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaMax",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * rowSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostFormulaStash64Bits( "oclFormulaMax",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,nDataSize,rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult,rowSize ) );
break;
}
case ocMin:
ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaMin",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * rowSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostFormulaStash64Bits( "oclFormulaMin",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,nDataSize,rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult,rowSize ) );
break;
}
case ocAverage:
ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaAverage",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocSum:
ocl_calc.OclHostFormulaStatistics64Bits("oclFormulaSum",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocCount:
ocl_calc.OclHostFormulaCount64Bits(npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocSumProduct:
ocl_calc.OclHostFormulaSumProduct64Bits(dpSumProMergeLfData,dpSumProMergeRtData,npSumSize,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * rowSize );
memset( rResult,0,rowSize );
ocl_calc.oclHostFormulaStash64Bits( "oclFormulaAverage",dpOclSrcData,npOclStartPos,npOclEndPos,rResult,nDataSize,rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult,rowSize ) );
break;
}
default:
fprintf(stderr,"No OpenCL function for this calculation.\n");
fprintf( stderr,"No OpenCL function for this calculation.\n" );
break;
}
}
else
{
fprintf(stderr,"ggGPU float precision flow...\n\n");
//float precision
switch(nOclOp)
switch( nOclOp )
{
case ocAdd:
ocl_calc.OclHostArithmeticOperator32Bits("oclSignedAdd",fpLeftData,fpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset(rResult,0,rowSize);
ocl_calc.oclHostArithmeticStash32Bits( "oclSignedAdd", dpLeftData, dpRightData, rResult, temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData(rResult, nDataSize) );
break;
}
case ocSub:
ocl_calc.OclHostArithmeticOperator32Bits("oclSignedSub",fpLeftData,fpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset( rResult, 0, rowSize );
ocl_calc.oclHostArithmeticStash32Bits( "oclSignedSub", dpLeftData, dpRightData, rResult, temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData( rResult,nDataSize ) );
break;
}
case ocMul:
ocl_calc.OclHostArithmeticOperator32Bits("oclSignedMul",fpLeftData,fpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc(sizeof(double) * nDataSize );
memset( rResult, 0, rowSize );
ocl_calc.oclHostArithmeticStash32Bits( "oclSignedMul", dpLeftData, dpRightData, rResult, temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData( rResult, nDataSize ) );
break;
}
case ocDiv:
ocl_calc.OclHostArithmeticOperator32Bits("oclSignedDiv",fpLeftData,fpRightData,rResult,nCount1);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
SourceData *temp2 = formulaInterprt->srdDataPop();
nDataSize = temp2->getDataSize();
dpLeftData = temp2->getDouleData();
dpRightData = temp->getDouleData();
nDataSize = temp2->getDataSize();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset( rResult, 0, rowSize );
ocl_calc.oclHostArithmeticStash32Bits( "oclSignedDiv", dpLeftData, dpRightData, rResult, temp->getDataSize() );
formulaInterprt->srdDataPush( new SourceData(rResult, nDataSize) );
break;
}
case ocMax:
ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaMax",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc(sizeof(double) * nDataSize );
memset(rResult,0,rowSize);
ocl_calc.oclHostFormulaStash32Bits( "oclFormulaMax", dpOclSrcData, npOclStartPos, npOclEndPos, rResult,nDataSize, rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult, rowSize ) );
break;
}
case ocMin:
ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaMin",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset( rResult, 0, rowSize );
ocl_calc.oclHostFormulaStash32Bits( "oclFormulaMin", dpOclSrcData, npOclStartPos, npOclEndPos, rResult, nDataSize, rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult, rowSize) );
break;
}
case ocAverage:
ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaAverage",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocSum:
ocl_calc.OclHostFormulaStatistics32Bits("oclFormulaSum",fpOclSrcData,npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocCount:
ocl_calc.OclHostFormulaCount32Bits(npOclStartPos,npOclEndPos,rResult,rowSize);
break;
case ocSumProduct:
ocl_calc.OclHostFormulaSumProduct32Bits(fpSumProMergeLfData,fpSumProMergeRtData,npSumSize,rResult,rowSize);
{
unsigned int nDataSize = 0;
SourceData *temp = formulaInterprt->srdDataPop();
nDataSize = temp->getDataSize();
dpOclSrcData = temp->getDouleData();
double *rResult = NULL; // Point to the output data from GPU
rResult = (double *)malloc( sizeof(double) * nDataSize );
memset( rResult, 0, rowSize);
ocl_calc.oclHostFormulaStash32Bits( "oclFormulaAverage", dpOclSrcData, npOclStartPos, npOclEndPos, rResult, nDataSize, rowSize );
formulaInterprt->srdDataPush( new SourceData( rResult, rowSize) );
break;
}
default:
fprintf(stderr,"No OpenCL function for this calculation.\n");
break;
}
}
return NULL;
}
ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& rMat )
{
SCSIZE nC, nR;
rMat.GetDimensions( nC, nR );
if ( nC != nR || nC == 0 )
// Input matrix must be square. Return an empty matrix on failure and
// the caller will calculate it via CPU.
return ScMatrixRef();
// This vector will contain a series of doubles from the first column to
// the last, chained together in a single array.
std::vector<double> aDoubles;
rMat.GetDoubleArray(aDoubles);
/////////////////////////////////////////////////////
osl_getSystemTime(&aTimeAfter);
double diff = getTimeDiff(aTimeAfter, aTimeBefore);
//if (diff >= 1.0)
float * fpOclMatrixSrc = NULL;
float * fpOclMatrixDst = NULL;
double * dpOclMatrixSrc = NULL;
double * dpOclMatrixDst = NULL;
uint nMatrixSize = nC * nR;
static OclCalc aOclCalc;
if ( aOclCalc.getOpenclState() )
{
if ( aOclCalc.gpuEnv.mnKhrFp64Flag == 1 || aOclCalc.gpuEnv.mnAmdFp64Flag == 1 )
{
fprintf(stderr,"OpenCL,diff...%f.\n",diff);
aOclCalc.createBuffer64Bits( dpOclMatrixSrc, dpOclMatrixDst, nMatrixSize );
for ( uint i = 0; i < nC; i++ )
for ( uint j = 0; j < nR; j++ )
dpOclMatrixSrc[i*nC+j] = aDoubles[j*nR+i];
aOclCalc.oclHostMatrixInverse64Bits( "oclFormulaMtxInv", dpOclMatrixSrc, dpOclMatrixDst,aDoubles, nR );
}
else
{
aOclCalc.createBuffer32Bits( fpOclMatrixSrc, fpOclMatrixDst, nMatrixSize );
for ( uint i = 0; i < nC; i++ )
for ( uint j = 0; j < nR; j++ )
fpOclMatrixSrc[i*nC+j] = (float) aDoubles[j*nR+i];
aOclCalc.oclHostMatrixInverse32Bits( "oclFormulaMtxInv", fpOclMatrixSrc, fpOclMatrixDst, aDoubles, nR );
}
}
/////////////////////////////////////////////////////
//rResult[i];
// for(sal_Int32 i = 0; i < rowSize; ++i){//dbg output results
// fprintf(stderr,"After GPU,rRsults[%d] is ...%f\n",i,rResult[i]);
// }
// TODO: Inverse this matrix and put the result back into xInv. Right now,
// I'll just put the original, non-inversed matrix values back, just to
// demonstrate how to put the values back after inversion. There are two
// ways to put the values back (depending on what the GPU output is).
ScMatrixRef xInv(new ScMatrix(nR, nR, 0.0));
// Insert the double data, in rResult[i] back into the document
rDoc.SetFormulaResults(rTopPos, rResult, xGroup->mnLength);
#if 0
// One way is to put the whole value as one array. This method assumes
// that the array size equals column x row, and is oriented column-wise.
// This method is slightly more efficient than the second, but I wouldn't
// expect too much of a difference.
xInv->PutDouble(&aDoubles[0], aDoubles.size(), 0, 0);
#else
// Another way is to put the values one column at a time.
const double* p = &aDoubles[0];
for( SCSIZE i = 0; i < nC; ++i )
{
xInv->PutDouble( p, nR, i, 0 );
p += nR;
}
#endif
return xInv;
}
bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc, const ScAddress& rTopPos,
const ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode )
{
generateRPNCode( rDoc, rTopPos, rCode );
mnRowSize = xGroup->mnLength;
fprintf( stderr,"mnRowSize at begin is ...%ld.\n",(long)mnRowSize );
// The row quantity can be gotten from p2->GetArrayLength()
int nOclOp = 0;
const double * dpOclSrcData = NULL;
const double * dpBinaryData = NULL;
static OclCalc ocl_calc;
unsigned int nSrcDataSize = 0;
free(rResult);
const double *dpResult = NULL;
double *pResult = (double *)malloc(sizeof(double) * mnRowSize);
double *dpSvDouble = NULL;
bool isSample = false;
mnSingleCount = 0;
mnDoubleCount = 0;
mnSvDoubleCount = 0;
mnOperatorCount = 0;
mnPositonLen = 0;
if ( ocl_calc.getOpenclState() )
{
getPosition(rCode,xGroup,mnRowSize,mnpOclStartPos,mnpOclEndPos,&mnPositonLen);
const formula::FormulaToken* p = rCode.FirstRPN();
bool isSingle = false;
int nCountNum=0;
do
{
if ( ocPush != p->GetOpCode())
{
nOclOp = p->GetOpCode();
mnOperatorGroup[mnOperatorCount++] = nOclOp;
}
else if( ocPush == p->GetOpCode() && formula::svSingleVectorRef == p->GetType() )
{
mnSingleCount++;
}
if ( ocPush == p->GetOpCode() && formula::svDouble == p->GetType() )
{
mnSvDoubleCount++;
}
} while ( NULL != ( p = rCode.NextRPN() ) );
if( isGroundWater() )
{
isSample = true;
}
mnOperatorCount = 0;
mnSingleCount = 0;
mnSvDoubleCount = 0;
p = rCode.FirstRPN();
if(isSample)
{
do
{
if ( ocPush == p->GetOpCode() && formula::svDouble == p->GetType() )
{
mdpSvdouble[mnSvDoubleCount++] = p->GetDouble();
}
else if( ocPush == p->GetOpCode() && formula::svDoubleVectorRef == p->GetType())
{
const formula::DoubleVectorRefToken* pDvr = static_cast< const formula::DoubleVectorRefToken* >( p );
const std::vector< const double* >& rArrays = pDvr->GetArrays();
uint rArraysSize = rArrays.size();
int nMoreColSize = 0;
DoubleVectorFormula *SvDoubleTemp = new DoubleVectorFormula();
if( rArraysSize > 1 )
{
double *dpMoreColData = NULL;
for ( uint loop=0; loop < rArraysSize; loop++ )
{
dpOclSrcData = rArrays[loop];
nSrcDataSize = pDvr->GetArrayLength();
nMoreColSize += nSrcDataSize;
dpMoreColData = (double *) realloc(dpMoreColData,nMoreColSize * sizeof(double));
for ( uint j = nMoreColSize - nSrcDataSize, i = 0; i < nSrcDataSize; i++, j++ )
{
dpMoreColData[j] = dpOclSrcData[i];
}
}
dpOclSrcData = dpMoreColData;
nSrcDataSize = nMoreColSize;
}
else
{
dpOclSrcData = rArrays[0];
nSrcDataSize = pDvr->GetArrayLength();
SvDoubleTemp->mdpInputData = dpOclSrcData;
SvDoubleTemp->mnInputDataSize = nSrcDataSize;
SvDoubleTemp->mnInputStartPosition = mnpOclStartPos[nCountNum*mnRowSize];
SvDoubleTemp->mnInputEndPosition = mnpOclEndPos[nCountNum*mnRowSize];
SvDoubleTemp->mnInputStartOffset = mnpOclStartPos[nCountNum*mnRowSize+1]-mnpOclStartPos[nCountNum*mnRowSize];
SvDoubleTemp->mnInputEndOffset = mnpOclEndPos[nCountNum*mnRowSize+1]-mnpOclEndPos[nCountNum*mnRowSize];
mDoubleArray[mnDoubleCount++] = SvDoubleTemp;
nCountNum++;
}
}
else if( ocPush == p->GetOpCode() && formula::svSingleVectorRef == p->GetType() )
{
const formula::SingleVectorRefToken* pSvr = static_cast<const formula::SingleVectorRefToken*>( p );
dpBinaryData = pSvr->GetArray();
uint nArrayLen = pSvr->GetArrayLength();
SingleVectorFormula *SignleTemp = new SingleVectorFormula() ;
if(isSingle)
{
SignleTemp = mSingleArray[--mnSingleCount];
SignleTemp->mdpInputRightData = dpBinaryData;
SignleTemp->mnInputRightDataSize = nArrayLen;
SignleTemp->mnInputRightStartPosition = 0;
SignleTemp->mnInputRightOffset = 0;
isSingle = false;
}
else
{
SignleTemp = new SingleVectorFormula();
SignleTemp->mdpInputLeftData = dpBinaryData;
SignleTemp->mnInputLeftDataSize = nArrayLen;
SignleTemp->mdpInputRightData = NULL;
SignleTemp->mnInputRightDataSize = 0;
SignleTemp->mnInputLeftStartPosition = 0;
SignleTemp->mnInputLeftOffset = 0;
isSingle = true;
}
mSingleArray[mnSingleCount++] = SignleTemp;
}
else
{
nOclOp = p->GetOpCode();
mnOperatorGroup[mnOperatorCount++] = nOclOp;
}
} while ( NULL != ( p = rCode.NextRPN() ) );
if ( !chooseFunction( ocl_calc, pResult ) )
return false;
else
dpResult = pResult;
}
else
{
agency aChooseAction;
do
{
if ( ocPush == p->GetOpCode() && formula::svDouble == p->GetType() )
{
dpSvDouble = (double *) malloc( sizeof(double ) * mnRowSize );
double dTempValue = p->GetDouble();
for ( uint i = 0; i < mnRowSize; i++ )
dpSvDouble[i] = dTempValue;
srdDataPush( new SourceData( dpSvDouble, mnRowSize ) );
collectDoublePointers( dpSvDouble );
}
else if( ocPush == p->GetOpCode() && formula::svDoubleVectorRef == p->GetType())
{
const formula::DoubleVectorRefToken* pDvr = static_cast< const formula::DoubleVectorRefToken* >( p );
const std::vector< const double* >& rArrays = pDvr->GetArrays();
unsigned int rArraysSize = rArrays.size();
int nMoreColSize = 0;
if(rArraysSize > 1)
{
double *dpMoreColData = NULL;
for( uint loop=0; loop < rArraysSize; loop++ )
{
dpOclSrcData = rArrays[loop];
nSrcDataSize = pDvr->GetArrayLength();
nMoreColSize += nSrcDataSize;
dpMoreColData = (double *) realloc(dpMoreColData,nMoreColSize * sizeof(double));
for(uint j=nMoreColSize-nSrcDataSize,i=0;i<nSrcDataSize;i++,j++)
{
dpMoreColData[j] = dpOclSrcData[i];
}
}
dpOclSrcData = dpMoreColData;
nSrcDataSize = nMoreColSize;
collectDoublePointers( dpMoreColData );
}
else
{
dpOclSrcData = rArrays[0];
nSrcDataSize = pDvr->GetArrayLength();
}
srdDataPush( new SourceData( dpOclSrcData,nSrcDataSize,rArraysSize ) );
}
else if( ocPush == p->GetOpCode() && formula::svSingleVectorRef == p->GetType() )
{
const formula::SingleVectorRefToken* pSvr = static_cast<const formula::SingleVectorRefToken*>( p );
dpBinaryData = pSvr->GetArray();
nSrcDataSize = pSvr->GetArrayLength();
srdDataPush( new SourceData( dpBinaryData, nSrcDataSize ) );
}
else
{
nOclOp = p->GetOpCode();
aChooseAction.calculate(nOclOp,mnRowSize,ocl_calc,mnpOclStartPos,mnpOclEndPos,this);
mnSingleCount = 0;
mnDoubleCount = 0;
mnSvDoubleCount = 0;
mnOperatorCount = 0;
mnPositonLen = 0;
}
} while ( NULL != ( p = rCode.NextRPN() ) );
SourceData * sResult = srdDataPop();
dpResult = sResult->getDouleData();
}
rDoc.SetFormulaResults( rTopPos, dpResult, mnRowSize );
freeDoublePointers();
if ( pResult )
{
free( pResult );
pResult = NULL;
}
if ( mnpOclStartPos )
{
free( mnpOclStartPos );
mnpOclStartPos = NULL;
}
if ( mnpOclEndPos )
{
free( mnpOclEndPos );
mnpOclEndPos = NULL;
}
return true;
} // getOpenclState() End
else
return false;
}
/// Special case of formula compiler for groundwatering
......@@ -489,11 +964,11 @@ public:
FormulaGroupInterpreterSoftware()
{
fprintf(stderr,"\n\n ***** Groundwater Backend *****\n\n\n");
OclCalc::InitEnv();
OclCalc::initEnv();
}
virtual ~FormulaGroupInterpreterGroundwater()
{
OclCalc::ReleaseOpenclRunEnv();
OclCalc::releaseOpenclRunEnv();
}
virtual ScMatrixRef inverseMatrix(const ScMatrix& /* rMat */) { return ScMatrixRef(); }
......@@ -569,7 +1044,7 @@ bool FormulaGroupInterpreterGroundwater::interpretCL(ScDocument& rDoc, const ScA
fprintf (stderr, "Calculate !");
double *pResult = ocl_calc.OclSimpleDeltaOperation( eOp, pGroundWaterDataArray,
double *pResult = ocl_calc.oclSimpleDeltaOperation( eOp, pGroundWaterDataArray,
pArrayToSubtractOneElementFrom,
(size_t) xGroup->mnLength, delta );
RETURN_IF_FAIL(pResult != NULL, "buffer alloc / calculaton failed");
......
......@@ -7,8 +7,8 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef _OCL_KERNEL_H_
#define _OCL_KERNEL_H_
#ifndef SC_OCLKERNELS_HXX
#define SC_OCLKERNELS_HXX
#ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__
......@@ -24,6 +24,97 @@ const char *kernel_src = KERNEL(
\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
\n#else\n
\n#endif\n
inline fp_t oclAverage( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
{
uint start = startArray[id];
uint end = endArray[id];
fp_t fSum = 0.0;
fp_t zero[16] = {0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f,0.0f};
fp_t16 vSum=vload16(0,zero);
fp_t16 ptr;
__global fp_t *p = values;
p+= start;
for(int i = 0; i < (end - start + 1)/16; ++i)
{
ptr=vload16(0,p);
vSum += ptr;
p+=16;
}
int lastData = (end-start+1)%16;
for(int i = 0; i <lastData; i++)
{
fSum += *p;
p+=1;
}
vSum.s01234567 = vSum.s01234567+vSum.s89abcdef;
vSum.s0123 = vSum.s0123+vSum.s4567;
vSum.s01 = vSum.s01+vSum.s23;
vSum.s0 = vSum.s0+vSum.s1;
fSum = vSum.s0+fSum;
fp_t fVal = fSum/(end-start+1);
return fVal;
}
inline fp_t oclMax( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
{
uint start = startArray[id];
uint end = endArray[id];
fp_t fMax = values[start];
fp_t zero[16] = {fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax,fMax};
fp_t16 vMax=vload16(0,zero);
//Max
fp_t16 ptr;
__global fp_t *p = values;
p+= start;
for(int i = 0; i < (end - start + 1)/16; ++i)
{
ptr=vload16(0,p);
vMax = fmax(vMax,ptr);
p+=16;
}
int lastData = (end-start+1)%16;
for(int i = 0; i <lastData; i++)
{
fMax = fmax(fMax,*p);
p+=1;
}
vMax.s01234567 = fmax(vMax.s01234567, vMax.s89abcdef);
vMax.s0123 = fmax(vMax.s0123, vMax.s4567);
vMax.s01 = fmax(vMax.s01, vMax.s23);
vMax.s0 = fmax(vMax.s0, vMax.s1);
fMax = fmax(vMax.s0, fMax);
return fMax;
}
inline fp_t oclMin( const uint id,__global fp_t *values,__global uint *startArray,__global uint *endArray)
{
uint start = startArray[id];
uint end = endArray[id];
fp_t fMin = values[start];
fp_t zero[16] = {fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin,fMin};
fp_t16 vMin=vload16(0,zero);
//Min
fp_t16 ptr;
__global fp_t *p = values;
p+= start;
for(int i = 0; i < (end - start + 1)/16; ++i)
{
ptr=vload16(0,p);
vMin = fmin(vMin,ptr);
p+=16;
}
int lastData = (end-start+1)%16;
for(int i = 0; i <lastData; i++)
{
fMin = fmin(fMin,*p);
p+=1;
}
vMin.s01234567 = fmin(vMin.s01234567, vMin.s89abcdef);
vMin.s0123 = fmin(vMin.s0123, vMin.s4567);
vMin.s01 = fmin(vMin.s01, vMin.s23);
vMin.s0 = fmin(vMin.s0, vMin.s1);
fMin = fmin(vMin.s0, fMin);
return fMin;
}
__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
......@@ -31,7 +122,6 @@ __kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global
otData[id] = ltData[id] + rtData[id];
}
__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
const unsigned int id = get_global_id(0);
......@@ -41,39 +131,31 @@ __kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global
__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
int id = get_global_id(0);
otData[id] =ltData[id] * rtData[id];
otData[id] = ltData[id] * rtData[id];
}
__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
const unsigned int id = get_global_id(0);
otData[id] = ltData[id] / rtData[id];
fp_t divisor = rtData[id];
if ( divisor != 0 )
otData[id] = ltData[id] / divisor;
else
otData[id] = 0.0;
}
__kernel void oclFormulaMin(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
unsigned int startFlag = start[id];
unsigned int endFlag = end[id];
fp_t fMinVal = input[startFlag];
for(int i=startFlag;i<=endFlag;i++)
{
fMinVal = fmin( fMinVal, input[i] );
}
output[id] = fMinVal;
fp_t fVal = oclMin(id,input,start,end);
output[id] = fVal ;
}
__kernel void oclFormulaMax(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
unsigned int startFlag = start[id];
unsigned int endFlag = end[id];
fp_t fMaxVal = input[startFlag];
for ( int i = startFlag; i <= endFlag; i++ )
{
fMaxVal = fmax( fMaxVal, input[i] );
}
output[id] = fMaxVal;
fp_t fVal = oclMax(id,input,start,end);
output[id] = fVal ;
}
//Sum
__kernel void oclFormulaSum(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
......@@ -94,12 +176,10 @@ __kernel void oclFormulaCount(__global uint *start,__global uint *end,__global f
__kernel void oclFormulaAverage(__global fp_t *input,__global uint *start,__global uint *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t sum=0.0;
for(int i = start[id];i<=end[id];i++)
sum += input[i];
output[id] = sum / (end[id]-start[id]+1);
}
fp_t fVal = oclAverage(id,input,start,end);
output[id] = fVal ;
}
//Sumproduct
__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global uint* npSumSize,__global fp_t *output,uint nMatixSize)
{
......@@ -147,7 +227,7 @@ __kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint s
// Min
fp_t fMinVal = values[start];
for(int i=start+1;i < end;i++)
for ( int i = start + 1; i < end; i++ )
{
if(values[i]<fMinVal)
fMinVal = values[i];
......@@ -177,14 +257,14 @@ __kernel void oclFormulaMtxInv(__global fp_t * fpMatrixInput, __global fp_t * fp
fpP[nOffset*nDimension+nId] = fpP[nMax*nDimension+nId];
fpP[nMax*nDimension+nId] = dMovebuffer;
}
__kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMatrixOutput,__global fp_t * fpP,__global fp_t * fpY)
__kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMatrixOutput,__global fp_t * fpP,__global fp_t * fpY,__global uint* npDim)
{
int nId = get_global_id(0);
int nDimension = get_global_size(0);
int nDimension = npDim[nId];
fp_t fsum = 0.0;
for ( int yi=0; yi < nDimension; yi++ )
{
fp_t fsum = 0.0;
fsum = 0.0;
for ( int yj=0; yj < nDimension; yj++ )
{
fsum += fpMatrixInput[yi*nDimension+yj] * fpY[nId+yj*nDimension];
......@@ -194,7 +274,7 @@ __kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMat
}
for ( int xi = nDimension - 1; xi >= 0; xi-- )
{
fp_t fsum = 0.0;
fsum = 0.0;
for ( int xj = 0; xj < nDimension; xj++ )
{
fsum += fpMatrixInput[xi*nDimension+xj] * fpMatrixOutput[nId+nDimension*xj];
......@@ -203,6 +283,101 @@ __kernel void oclMatrixSolve(__global fp_t * fpMatrixInput,__global fp_t * fpMat
}
}
__kernel void oclAverageAdd(__global fp_t *values,__global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fVal = oclAverage(id,values,startArray,endArray);
output[id] = fVal + addend[id];
}
__kernel void oclAverageSub(__global fp_t *values,__global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fVal = oclAverage(id,values,startArray,endArray);
output[id] = fVal - subtract[id];
}
__kernel void oclAverageMul(__global fp_t *values,__global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fVal = oclAverage(id,values,startArray,endArray);
output[id] = fVal * multiplier[id];
}
__kernel void oclAverageDiv(__global fp_t *values,__global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fVal = oclAverage(id,values,startArray,endArray);
fp_t divisor = div[id];
if ( divisor != 0 )
output[id] = fVal / divisor;
else
output[id] = 0.0;
}
__kernel void oclMinAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMin = oclMin(id,values,startArray,endArray);
output[id] = fMin + addend[id];
}
__kernel void oclMinSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMin = oclMin(id,values,startArray,endArray);
output[id] = fMin - subtract[id];
}
__kernel void oclMinMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMin = oclMin(id,values,startArray,endArray);
output[id] = fMin * multiplier[id];
}
__kernel void oclMinDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMin = oclMin(id,values,startArray,endArray);
fp_t divisor = div[id];
if ( divisor != 0 )
output[id] = fMin / divisor;
else
output[id] = 0.0;
}
__kernel void oclMaxAdd(__global fp_t *values, __global fp_t *addend, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMax = oclMax(id,values,startArray,endArray);
output[id] = fMax + addend[id];
}
__kernel void oclMaxSub(__global fp_t *values, __global fp_t *subtract, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMax = oclMax(id,values,startArray,endArray);
output[id] = fMax - subtract[id];
}
__kernel void oclMaxMul(__global fp_t *values, __global fp_t *multiplier, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMax = oclMax(id,values,startArray,endArray);
output[id] = fMax * multiplier[id];
}
__kernel void oclMaxDiv(__global fp_t *values, __global fp_t *div, __global uint *startArray, __global uint *endArray, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
fp_t fMax = oclMax(id,values,startArray,endArray);
fp_t divisor = div[id];
if ( divisor != 0 )
output[id] = fMax / divisor;
else
output[id] = 0.0;
}
__kernel void oclSub( fp_t ltData, __global fp_t *rtData, __global fp_t *outData )
{
const unsigned int id = get_global_id(0);
outData[id] = ltData - rtData[id];
}
);
#endif // USE_EXTERNAL_KERNEL
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -7,8 +7,8 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef SC_OPENCL_WRAPPER_H
#define SC_OPENCL_WRAPPER_H
#ifndef SC_OPENCLWRAPPER_HXX
#define SC_OPENCLWRAPPER_HXX
#include <config_features.h>
#include <formula/opcode.hxx>
......@@ -87,6 +87,19 @@ if( status != CL_SUCCESS ) \
return 0; \
}
#define CHECK_OPENCL_VOID(status,name) \
if( status != CL_SUCCESS ) \
{ \
printf ("OpenCL error code is %d at " SAL_DETAIL_WHERE " when %s .\n", status, name); \
}
#define CHECK_OPENCL_RELEASE(status,name) \
if ( name != NULL ) \
clReleaseMemObject( name ); \
if( status != CL_SUCCESS ) \
{ \
printf ("OpenCL error code is %d at " SAL_DETAIL_WHERE " when clReleaseMemObject( %s ).\n", status, #name); \
}
#define MAX_KERNEL_STRING_LEN 64
#define MAX_CLFILE_NUM 50
......@@ -119,25 +132,48 @@ typedef struct
char kernelName[MAX_KERNEL_NAME_LEN + 1];
char *kernelStr;
} kernel_node;
typedef struct _SingleVectorFormula
{
const double *mdpInputLeftData;
const double *mdpInputRightData;
size_t mnInputLeftDataSize;
size_t mnInputRightDataSize;
uint mnInputLeftStartPosition;
uint mnInputRightStartPosition;
int mnInputLeftOffset;
int mnInputRightOffset;
} SingleVectorFormula;
typedef struct _DoubleVectorFormula
{
const double *mdpInputData;
size_t mnInputDataSize;
uint mnInputStartPosition;
uint mnInputEndPosition;
int mnInputStartOffset;
int mnInputEndOffset;
} DoubleVectorFormula;
class OpenclCalcBase
{
public:
OpenclCalcBase(){};
virtual ~OpenclCalcBase(){};
virtual int OclHostArithmeticOperator64Bits( const char* aKernelName, double *fpLeftData, double *fpRightData, double *&rResult, int nRowSize )=0;
virtual int OclHostFormulaStatistics64Bits( const char* aKernelName, double *fpSrcData, uint *npStartPos, uint *npEndPos, double *&output, int outputSize )=0;
virtual int OclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize)=0;
virtual int OclHostFormulaSumProduct64Bits( double *fpSumProMergeLfData, double *fpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize )=0;
virtual int OclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst, std::vector<double>& dpResult, uint nDim)=0;
virtual int oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult, int nRowSize )=0;
virtual int oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )=0;
virtual int oclHostFormulaStatistics64Bits( const char* aKernelName,double *&output, int outputSize )=0;
virtual int oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize)=0;
virtual int oclHostFormulaSumProduct64Bits( double *fpSumProMergeLfData, double *fpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize )=0;
virtual int oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst,std::vector<double>&dpResult, uint nDim)=0;
virtual int oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize )=0;
virtual int OclHostArithmeticOperator32Bits( const char* aKernelName, float *fpLeftData, float *fpRightData, double *rResult, int nRowSize )=0;
virtual int OclHostFormulaStatistics32Bits( const char* aKernelName, float *fpSrcData, uint *npStartPos, uint *npEndPos, double *output, int outputSize )=0;
virtual int OclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize)=0;
virtual int OclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize )=0;
virtual int OclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim )=0;
virtual int oclHostArithmeticOperator32Bits( const char* aKernelName, double *rResult, int nRowSize )=0;
virtual int oclHostFormulaStatistics32Bits( const char* aKernelName,double *output, int outputSize )=0;
virtual int oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize)=0;
virtual int oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize )=0;
virtual int oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim )=0;
virtual double *OclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double delta )=0;
virtual int oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle, size_t nSrcDataSize,size_t nElements, double delta,uint *nStartPos,uint *nEndPos ,double *deResult)=0;
virtual double *oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double delta )=0;
};
......@@ -151,40 +187,40 @@ public:
static int isInited;
OpenclDevice();
~OpenclDevice();
static int InitEnv();
static int RegistOpenclKernel();
static int ReleaseOpenclRunEnv();
static int InitOpenclRunEnv( GPUEnv *gpu );
static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
static int CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
static int InitOpenclRunEnv( int argc );
static int CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
static int GeneratBinFromKernelSource( cl_program program, const char * clFileName );
static int WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
static int BinaryGenerated( const char * clFileName, FILE ** fhandle );
static int CompileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
int InitOpenclAttr( OpenCLEnv * env );
int ReleaseKernel( KernelEnv * env );
int SetKernelEnv( KernelEnv *envInfo );
int CreateKernel( char * kernelname, KernelEnv * env );
int RunKernel( const char *kernelName, void **userdata );
int ConvertToString( const char *filename, char **source );
int CheckKernelName( KernelEnv *envInfo, const char *kernelName );
int RegisterKernelWrapper( const char *kernelName, cl_kernel_function function );
int RunKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
int GetKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
static int initEnv();
static int registOpenclKernel();
static int releaseOpenclRunEnv();
static int initOpenclRunEnv( GPUEnv *gpu );
static int releaseOpenclEnv( GPUEnv *gpuInfo );
static int compileKernelFile( GPUEnv *gpuInfo, const char *buildOption );
static int initOpenclRunEnv( int argc );
static int cachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * clFileName );
static int generatBinFromKernelSource( cl_program program, const char * clFileName );
static int writeBinaryToFile( const char* fileName, const char* birary, size_t numBytes );
static int binaryGenerated( const char * clFileName, FILE ** fhandle );
static int compileKernelFile( const char *filename, GPUEnv *gpuInfo, const char *buildOption );
int initOpenclAttr( OpenCLEnv * env );
int releaseKernel( KernelEnv * env );
int setKernelEnv( KernelEnv *envInfo );
int createKernel( char * kernelname, KernelEnv * env );
int runKernel( const char *kernelName, void **userdata );
int convertToString( const char *filename, char **source );
int checkKernelName( KernelEnv *envInfo, const char *kernelName );
int registerKernelWrapper( const char *kernelName, cl_kernel_function function );
int runKernelWrapper( cl_kernel_function function, const char * kernelName, void **usrdata );
int getKernelEnvAndFunc( const char *kernelName, KernelEnv *env, cl_kernel_function *function );
#ifdef WIN32
static int LoadOpencl();
static int OpenclInite();
static void FreeOpenclDll();
static int loadOpencl();
static int openclInite();
static void freeOpenclDll();
#endif
int GetOpenclState();
void SetOpenclState( int state );
inline static int AddKernelConfig( int kCount, const char *kName );
int getOpenclState();
void setOpenclState( int state );
inline static int addKernelConfig( int kCount, const char *kName );
};
......@@ -201,6 +237,10 @@ public:
cl_mem mpClmemMergeLfData;
cl_mem mpClmemMergeRtData;
cl_mem mpClmemMatixSumSize;
cl_mem mpClmemeOp;
unsigned int nArithmeticLen;
unsigned int nFormulaLen;
unsigned int nClmemLen;
unsigned int nFormulaColSize;
unsigned int nFormulaRowSize;
......@@ -208,27 +248,49 @@ public:
~OclCalc();
// for 64bits double
int OclHostArithmeticOperator64Bits( const char* aKernelName, double *fpLeftData, double *fpRightData, double *&rResult, int nRowSize );
int OclHostFormulaStatistics64Bits( const char* aKernelName, double *fpSrcData, uint *npStartPos, uint *npEndPos, double *&output, int outputSize);
int OclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize );
int OclHostFormulaSumProduct64Bits( double *fpSumProMergeLfData, double *fpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize);
int OclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst, std::vector<double>&dpResult, uint nDim );
int oclHostArithmeticOperator64Bits( const char* aKernelName, double *&rResult, int nRowSize );
int oclMoreColHostArithmeticOperator64Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize );
int oclHostFormulaStatistics64Bits( const char* aKernelName, double *&output, int outputSize);
int oclHostFormulaStash64Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size);
int oclHostFormulaCount64Bits( uint *npStartPos, uint *npEndPos, double *&dpOutput, int nSize );
int oclHostFormulaSumProduct64Bits( double *fpSumProMergeLfData, double *fpSumProMergeRrData, uint *npSumSize, double *&dpOutput, int nSize);
int oclHostMatrixInverse64Bits( const char* aKernelName, double *dpOclMatrixSrc, double *dpOclMatrixDst, std::vector<double>&dpResult, uint nDim );
// for 32bits float
int OclHostArithmeticOperator32Bits( const char* aKernelName, float *fpLeftData, float *fpRightData, double *rResult, int nRowSize );
int OclHostFormulaStatistics32Bits( const char* aKernelName, float *fpSrcData, uint *npStartPos, uint *npEndPos, double *output, int outputSize);
int OclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize );
int OclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize );
int OclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim );
int oclHostArithmeticOperator32Bits( const char* aKernelName, double *rResult, int nRowSize );
int oclMoreColHostArithmeticOperator32Bits( int nDataSize,int neOpSize,double *rResult, int nRowSize );
int oclHostFormulaStatistics32Bits( const char* aKernelName, double *output, int outputSize);
int oclHostFormulaCount32Bits( uint *npStartPos, uint *npEndPos, double *dpOutput, int nSize );
int oclHostArithmeticStash64Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize );
int oclHostFormulaSumProduct32Bits( float *fpSumProMergeLfData, float *fpSumProMergeRrData, uint *npSumSize, double *dpOutput, int nSize );
int oclHostMatrixInverse32Bits( const char* aKernelName, float *fpOclMatrixSrc, float *fpOclMatrixDst, std::vector<double>& dpResult, uint nDim );
// for groundwater
double *OclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double delta );
int oclGroundWaterGroup( uint *eOp, uint eOpNum, const double *pOpArray, const double *pSubtractSingle,size_t nSrcDataSize, size_t nElements, double delta ,uint *nStartPos,uint *nEndPos,double *deResult);
double *oclSimpleDeltaOperation( OpCode eOp, const double *pOpArray, const double *pSubtractSingle, size_t nElements, double delta );
///////////////////////////////////////////////////////////////
int CreateBuffer64Bits( double *&dpSrcData, uint *&npStartPos, uint *&npEndPos, int nBufferSize );
int CreateBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize );
int CreateBuffer64Bits( double *&dpSumProMergeLfData, double *&dpSumProMergeRtData, uint *&npSumSize, int nMatixSize, int nBufferSize );
int CreateBuffer32Bits( float *&fpSrcData, uint *&npStartPos, uint *&npEndPos, int nBufferSize );
int CreateBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize );
int CreateBuffer32Bits( float *&fpSumProMergeLfData, float *&fpSumProMergeRtData, uint *&npSumSize, int nMatixSize, int nBufferSize );
int createBuffer64Bits( double *&dpLeftData, double *&dpRightData, int nBufferSize );
int mapAndCopy64Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize );
int mapAndCopy64Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize);
int mapAndCopyArithmetic64Bits( const double *dpMoreArithmetic,int nBufferSize );
int mapAndCopyMoreColArithmetic64Bits( const double *dpMoreColArithmetic,int nBufferSize ,uint *npeOp,uint neOpSize );
int createMoreColArithmeticBuf64Bits( int nBufferSize, int neOpSize );
int createFormulaBuf64Bits( int nBufferSize, int rowSize );
int createArithmeticOptBuf64Bits( int nBufferSize );
int createBuffer32Bits( float *&fpLeftData, float *&fpRightData, int nBufferSize );
int mapAndCopy32Bits(const double *dpTempLeftData,const double *dpTempRightData,int nBufferSize );
int mapAndCopy32Bits(const double *dpTempSrcData,unsigned int *unStartPos,unsigned int *unEndPos,int nBufferSize ,int nRowsize);
int mapAndCopyArithmetic32Bits( const double *dpMoreColArithmetic, int nBufferSize );
int mapAndCopyMoreColArithmetic32Bits( const double *dpMoreColArithmetic,int nBufferSize ,uint *npeOp,uint neOpSize );
int createMoreColArithmeticBuf32Bits( int nBufferSize, int neOpSize );
int createFormulaBuf32Bits( int nBufferSize, int rowSize );
int createArithmeticOptBuf32Bits( int nBufferSize );
int oclHostFormulaStash32Bits( const char* aKernelName, const double* dpSrcData, uint *nStartPos, uint *nEndPos, double *output, int nBufferSize, int size );
int oclHostArithmeticStash32Bits( const char* aKernelName, const double *dpLeftData, const double *dpRightData, double *rResult,int nRowSize );
int releaseOclBuffer(void);
friend class agency;
};
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment