Kaydet (Commit) ff983712 authored tarafından Wei Wei's avatar Wei Wei Kaydeden (comit) I-Jui (Ray) Sung

GPU calc: refactor code for sum reduction and

sumifs naming rule

Change-Id: I685d263337bebe236befa5e5f45356336936c998
Signed-off-by: 's avatarI-Jui (Ray) Sung <ray@multicorewareinc.com>
üst f9973682
...@@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base ...@@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base
public: public:
DynamicKernelSlidingArgument(const std::string &s, DynamicKernelSlidingArgument(const std::string &s,
FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen): FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen):
Base(s, ft), mpCodeGen(CodeGen), needReductionKernel(true), mpClmem2(NULL) Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL)
{ {
FormulaToken *t = ft->GetFormulaToken(); FormulaToken *t = ft->GetFormulaToken();
if (t->GetType() != formula::svDoubleVectorRef) if (t->GetType() != formula::svDoubleVectorRef)
...@@ -428,7 +428,7 @@ public: ...@@ -428,7 +428,7 @@ public:
(!GetStartFixed() && !GetEndFixed()) ) ; (!GetStartFixed() && !GetEndFixed()) ) ;
} }
virtual void GenSlidingWindowFunction(std::stringstream &ss) { virtual void GenSlidingWindowFunction(std::stringstream &ss) {
if (dynamic_cast<OpSum*>(mpCodeGen.get())) if (dynamic_cast<OpSum*>(mpCodeGen.get()) && NeedParallelReduction())
{ {
std::string name = Base::GetName(); std::string name = Base::GetName();
ss << "__kernel void "<<name; ss << "__kernel void "<<name;
...@@ -436,20 +436,23 @@ public: ...@@ -436,20 +436,23 @@ public:
"__global double *result,int arrayLength,int windowSize){\n"; "__global double *result,int arrayLength,int windowSize){\n";
ss << " double tmp, current_result = 0.0;\n"; ss << " double tmp, current_result = 0.0;\n";
ss << " int writePos = get_group_id(1);\n"; ss << " int writePos = get_group_id(1);\n";
ss << " int offset = get_group_id(1);\n";
ss << " int lidx = get_local_id(0);\n"; ss << " int lidx = get_local_id(0);\n";
ss << " __local double shm_buf[256];\n"; ss << " __local double shm_buf[256];\n";
ss << " if (arrayLength == windowSize)\n"; if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
ss << " offset = 0;\n"; ss << " int offset = 0;\n";
else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
ss << " int offset = get_group_id(1);\n";
else
throw Unhandled();
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
ss << " int loop = arrayLength/512 + 1;\n"; ss << " int loop = arrayLength/512 + 1;\n";
ss << " for (int l=0; l<loop; l++){\n"; ss << " for (int l=0; l<loop; l++){\n";
ss << " tmp = 0.0;\n"; ss << " tmp = 0.0;\n";
ss << " int loopOffset = l*512;\n"; ss << " int loopOffset = l*512;\n";
ss << " if((loopOffset + lidx + offset + 256) < ( offset + windowSize))\n"; ss << " if((loopOffset + lidx + offset + 256) < min( offset + windowSize, arrayLength))\n";
ss << " tmp = A[loopOffset + lidx + offset] + " ss << " tmp = A[loopOffset + lidx + offset] + "
"A[loopOffset + lidx + offset + 256];\n"; "A[loopOffset + lidx + offset + 256];\n";
ss << " else if ((loopOffset + lidx + offset) < ( offset + windowSize))\n"; ss << " else if ((loopOffset + lidx + offset) < min(offset + windowSize, arrayLength))\n";
ss << " tmp = A[loopOffset + lidx + offset];\n"; ss << " tmp = A[loopOffset + lidx + offset];\n";
ss << " shm_buf[lidx] = tmp;\n"; ss << " shm_buf[lidx] = tmp;\n";
ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
...@@ -487,38 +490,35 @@ public: ...@@ -487,38 +490,35 @@ public:
{ {
if (!bIsStartFixed && !bIsEndFixed) if (!bIsStartFixed && !bIsEndFixed)
{ {
// set 100 as a threshold for invoking reduction kernel // set 100 as a temporary threshold for invoking reduction
// Ray: temporarily turn off parallel sum reduction // kernel in NeedParalleLReduction function
if (false /*nCurWindowSize > 100*/) if (NeedParallelReduction())
{ {
std::string temp = Base::GetName() + "[gid0]"; std::string temp = Base::GetName() + "[gid0]";
ss << "tmp = "; ss << "tmp = ";
ss << mpCodeGen->Gen2(temp, "tmp"); ss << mpCodeGen->Gen2(temp, "tmp");
ss << ";\n\t"; ss << ";\n\t";
needBody = false; needBody = false;
needReductionKernel = false;
return nCurWindowSize; return nCurWindowSize;
} }
} }
if (bIsStartFixed && bIsEndFixed) if (bIsStartFixed && bIsEndFixed)
{ {
// set 100 as a threshold for invoking reduction kernel // set 100 as a temporary threshold for invoking reduction
// Ray: temporarily turn off parallel sum reduction // kernel in NeedParalleLReduction function
if (false /* nCurWindowSize > 100 */) if (NeedParallelReduction())
{ {
std::string temp = Base::GetName() + "[0]"; std::string temp = Base::GetName() + "[0]";
ss << "tmp = "; ss << "tmp = ";
ss << mpCodeGen->Gen2(temp, "tmp"); ss << mpCodeGen->Gen2(temp, "tmp");
ss << ";\n\t"; ss << ";\n\t";
needBody = false; needBody = false;
needReductionKernel = false;
return nCurWindowSize; return nCurWindowSize;
} }
} }
} }
needBody = true; needBody = true;
needReductionKernel = true;
ss << "for (int i = "; ss << "for (int i = ";
if (!bIsStartFixed && bIsEndFixed) if (!bIsStartFixed && bIsEndFixed)
{ {
...@@ -561,7 +561,7 @@ public: ...@@ -561,7 +561,7 @@ public:
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram) virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
{ {
if (needReductionKernel) if (!NeedParallelReduction())
return Base::Marshal(k, argno, w, mpProgram); return Base::Marshal(k, argno, w, mpProgram);
assert(Base::mpClmem == NULL); assert(Base::mpClmem == NULL);
...@@ -654,7 +654,6 @@ protected: ...@@ -654,7 +654,6 @@ protected:
// from parent nodes // from parent nodes
boost::shared_ptr<SlidingFunctionBase> mpCodeGen; boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
// controls whether to invoke the reduction kernel during marshaling or not // controls whether to invoke the reduction kernel during marshaling or not
bool needReductionKernel;
cl_mem mpClmem2; cl_mem mpClmem2;
}; };
...@@ -1096,7 +1095,7 @@ public: ...@@ -1096,7 +1095,7 @@ public:
if (CL_SUCCESS != err) if (CL_SUCCESS != err)
throw OpenCLError(err); throw OpenCLError(err);
std::string kernelName = "SumIfs_reduction"; std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err); cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
if (err != CL_SUCCESS) if (err != CL_SUCCESS)
throw OpenCLError(err); throw OpenCLError(err);
......
...@@ -453,8 +453,10 @@ void OpSumIfs::GenSlidingWindowFunction(std::stringstream &ss, ...@@ -453,8 +453,10 @@ void OpSumIfs::GenSlidingWindowFunction(std::stringstream &ss,
if (mNeedReductionKernel) if (mNeedReductionKernel)
{ {
// generate reduction functions // generate reduction functions
ss << "__kernel void "; ss << "__kernel void ";
ss << "SumIfs_reduction( "; ss << vSubArguments[0]->GetName();
ss << "_SumIfs_reduction( ";
for (unsigned i = 0; i < vSubArguments.size(); i++) for (unsigned i = 0; i < vSubArguments.size(); i++)
{ {
if (i) if (i)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment