Kaydet (Commit) 69b1caa1 authored tarafından Haidong Lian's avatar Haidong Lian Kaydeden (comit) Kohei Yoshida

Add support for double in OpenCL kernel.

* modified coding style.
* merged arithmetic operators together.
* added support for double in OpenCL kernel.
* added an environment variable named SC_FLOAT, which, when set it to 1, will
  force to use float in OpenCL kernel. If not set, we will detect GPU, and if
  GPU supports double, use double for kernel, otherwise use float for kernel.

Conflicts:
	sc/source/core/opencl/openclwrapper.cxx
	sc/source/core/opencl/openclwrapper.hxx

Change-Id: I7cdec458d72837d3b22ba50c6d28f78797ee0d3b
üst f8129024
......@@ -12,175 +12,119 @@
#ifndef USE_EXTERNAL_KERNEL
#define KERNEL( ... )# __VA_ARGS__
// Double precision is a default of spreadsheets
// cl_khr_fp64: Khronos extension
// cl_amd_fp64: AMD extension
// use build option outside to define fp_t
/////////////////////////////////////////////
const char *kernel_src = KERNEL(
__kernel void hello(__global uint *buffer)
{
size_t idx = get_global_id(0);
buffer[idx]=idx;
}
__kernel void oclformula(__global float *data,
const uint type)
{
const unsigned int i = get_global_id(0);
switch (type)
{
case 0: //MAX
{
//printf("%i ?%f>%f\n",i,data[2*i],data[2*i+1]);
if(data[2*i]>data[2*i+1])
data[i] = data[2*i];
else
data[i] = data[2*i+1];
break;
}
case 1: //MIN
{
//printf("%d ?%d<%d\n",i,data[2*i],data[2*i+1]);
if(data[2*i]<data[2*i+1])
data[i] = data[2*i];
else
data[i] = data[2*i+1];
break;
}
case 2: //SUM
case 3: //AVG
{
//printf("%d %d+%d\n",i,data[2*i],data[2*i+1]);
data[i] = data[2*i] + data[2*i+1];
break;
}
default:
break;
\n#ifdef KHR_DP_EXTENSION\n
\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
\n#elif AMD_DP_EXTENSION\n
\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
\n#else\n
\n#endif\n
}
}
__kernel void oclSignedAdd(__global float *ltData,__global float *rtData,__global float *otData)
__kernel void oclSignedAdd(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
const unsigned int id = get_global_id(0);
otData[id] = ltData[id] + rtData[id];
}
__kernel void oclSignedSub(__global float *ltData,__global float *rtData,__global float *otData)
__kernel void oclSignedSub(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
const unsigned int id = get_global_id(0);
otData[id] = ltData[id] - rtData[id];
}
__kernel void oclSignedMul(__global float *ltData,__global float *rtData,__global float *otData)
__kernel void oclSignedMul(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
int id = get_global_id(0);
otData[id] =ltData[id] * rtData[id];
}
__kernel void oclSignedDiv(__global float *ltData,__global float *rtData,__global float *otData)
__kernel void oclSignedDiv(__global fp_t *ltData,__global fp_t *rtData,__global fp_t *otData)
{
const unsigned int id = get_global_id(0);
otData[id] = ltData[id] / rtData[id];
}
__kernel void oclFormulaMin(__global float *input,__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaMin(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
int i=0;
unsigned int startFlag = start[id];
unsigned int endFlag = end[id];
float min = input[startFlag];
for(i=startFlag;i<=endFlag;i++)
fp_t min = input[startFlag];
for(int i=startFlag;i<=endFlag;i++)
{
if(input[i]<min)
min = input[i];
}
output[id] = min;
}
__kernel void oclFormulaMax(__global float *input,__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaMax(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
int i=0;
unsigned int startFlag = start[id];
unsigned int endFlag = end[id];
float max = input[startFlag];
for(i=startFlag;i<=endFlag;i++)
fp_t max = input[startFlag];
for(int i=startFlag;i<=endFlag;i++)
{
if(input[i]>max)
max = input[i];
}
output[id] = max;
}
//Sum
__kernel void oclFormulaSum(__global float *input,__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaSum(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
{
const unsigned int nId = get_global_id(0);
float fSum = 0.0f;
fp_t fSum = 0.0;
for(int i = start[nId]; i<=end[nId]; i++)
fSum += input[i];
output[nId] = fSum ;
}
//Count
__kernel void oclFormulaCount(__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaCount(__global int *start,__global int *end,__global fp_t *output)
{
const unsigned int nId = get_global_id(0);
output[nId] = end[nId] - start[nId] + 1;
}
__kernel void oclFormulaAverage(__global float *input,__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaAverage(__global fp_t *input,__global int *start,__global int *end,__global fp_t *output)
{
const unsigned int id = get_global_id(0);
int i=0;
float sum=0;
for(i = start[id];i<=end[id];i++)
fp_t sum=0.0;
for(int i = start[id];i<=end[id];i++)
sum += input[i];
output[id] = sum / (end[id]-start[id]+1);
}
//Sumproduct
__kernel void oclFormulaSumproduct(__global float *firstCol,__global float *secondCol,__global int *start,__global int *end,__global float *output)
__kernel void oclFormulaSumproduct(__global fp_t *firstCol,__global int* npSumSize,__global fp_t *output,uint nMatixSize)
{
const int nId = get_global_id(0);
int nCount = start[nId] - end[nId] + 1;
int nStartA = start[nId*2];
int nStartB = start[nId*2+1];
for(int i = 0; i<nCount; i++)
output[nId] += firstCol[nStartA+i]*secondCol[nStartB+i];
const unsigned int id = get_global_id(0);
unsigned int nSumSize = npSumSize[id];
fp_t fSum = 0.0;
for(int i=0;i<nSumSize;i++)
fSum += firstCol[i + nMatixSize * id];
output[id] = fSum;
}
__kernel void oclFormulaMinverse(__global float *data,
const uint type)
__kernel void oclFormulaMinverse(__global fp_t *data, const uint type)
{
}
// Double precision is a requirement of spreadsheets
// cl_khr_fp64: Khronos extension
// cl_amd_fp64: AMD extension
\n#if 0 \n
\n#if defined(cl_khr_fp64) \n
\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
\n#elif defined(cl_amd_fp64) \n
\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable \n
\n#endif \n
\ntypedef double fp_t; \n
\n#else \n
\ntypedef float fp_t; \n
\n#endif \n
__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, uint start, uint end, __global fp_t *output)
{
const unsigned int id = get_global_id(0);
// Average
fp_t fSum = 0.0f;
fp_t fSum = 0.0;
for(int i = start; i < end; i++)
fSum += values[i];
fp_t fVal = fSum/(end-start);
......@@ -194,7 +138,7 @@ __kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint s
const unsigned int id = get_global_id(0);
// Max
float fMaxVal = values[start];
fp_t fMaxVal = values[start];
for(int i=start+1;i < end;i++)
{
if(values[i]>fMaxVal)
......@@ -210,7 +154,7 @@ __kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint s
const unsigned int id = get_global_id(0);
// Min
float fMinVal = values[start];
fp_t fMinVal = values[start];
for(int i=start+1;i < end;i++)
{
if(values[i]<fMinVal)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment