11 #include "allheaders.h"
22 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \
23 defined(__CYGWIN__) || defined(__MINGW32__)
28 #define IF_WINDOWS(X) X
32 #define NOT_WINDOWS(X)
33 #elif defined( __linux__ )
42 #define NOT_WINDOWS(X) X
43 #elif defined( __APPLE__ )
52 #define NOT_WINDOWS(X) X
62 #define NOT_WINDOWS(X) X
77 #define PERF_COUNT_VERBOSE 1
78 #define PERF_COUNT_REPORT_STR "[%36s], %24s, %11.6f\n"
83 #if PERF_COUNT_VERBOSE >= 2
84 #define PERF_COUNT_START(FUNCT_NAME) \
85 char *funct_name = FUNCT_NAME; \
86 double elapsed_time_sec; \
87 LARGE_INTEGER freq, time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
88 QueryPerformanceFrequency(&freq); \
89 QueryPerformanceCounter(&time_funct_start); \
90 time_sub_start = time_funct_start; \
91 time_sub_end = time_funct_start;
93 #define PERF_COUNT_END \
94 QueryPerformanceCounter(&time_funct_end); \
95 elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \
96 (double)(freq.QuadPart); \
97 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
99 #define PERF_COUNT_START(FUNCT_NAME)
100 #define PERF_COUNT_END
103 #if PERF_COUNT_VERBOSE >= 3
104 #define PERF_COUNT_SUB(SUB) \
105 QueryPerformanceCounter(&time_sub_end); \
106 elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \
107 (double)(freq.QuadPart); \
108 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
109 time_sub_start = time_sub_end;
111 #define PERF_COUNT_SUB(SUB)
118 #if PERF_COUNT_VERBOSE >= 2
119 #define PERF_COUNT_START(FUNCT_NAME) \
120 char *funct_name = FUNCT_NAME; \
121 double elapsed_time_sec; \
122 timespec time_funct_start, time_funct_end, time_sub_start, time_sub_end; \
123 clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); \
124 time_sub_start = time_funct_start; \
125 time_sub_end = time_funct_start;
127 #define PERF_COUNT_END \
128 clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \
130 (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \
131 (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \
132 printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec);
134 #define PERF_COUNT_START(FUNCT_NAME)
135 #define PERF_COUNT_END
138 #if PERF_COUNT_VERBOSE >= 3
139 #define PERF_COUNT_SUB(SUB) \
140 clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \
142 (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \
143 (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \
144 printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \
145 time_sub_start = time_sub_end;
147 #define PERF_COUNT_SUB(SUB)
159 #define strcasecmp strcmp
162 #define MAX_KERNEL_STRING_LEN 64
163 #define MAX_CLFILE_NUM 50
164 #define MAX_CLKERNEL_NUM 200
165 #define MAX_KERNEL_NAME_LEN 64
166 #define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
167 #define GROUPSIZE_X 16
168 #define GROUPSIZE_Y 16
169 #define GROUPSIZE_HMORX 256
170 #define GROUPSIZE_HMORY 1
172 typedef struct _KernelEnv
174 cl_context mpkContext;
175 cl_command_queue mpkCmdQueue;
176 cl_program mpkProgram;
178 char mckKernelName[150];
181 typedef struct _OpenCLEnv
183 cl_platform_id mpOclPlatformID;
184 cl_context mpOclContext;
185 cl_device_id mpOclDevsID;
186 cl_command_queue mpOclCmdQueue;
188 typedef int ( *cl_kernel_function )(
void **userdata, KernelEnv *kenv );
190 #define CHECK_OPENCL(status,name) \
191 if( status != CL_SUCCESS ) \
193 printf ("OpenCL error code is %d at when %s .\n", status, name); \
197 typedef struct _GPUEnv
200 cl_platform_id mpPlatformID;
201 cl_device_type mDevType;
202 cl_context mpContext;
203 cl_device_id *mpArryDevsID;
204 cl_device_id mpDevID;
205 cl_command_queue mpCmdQueue;
206 cl_kernel mpArryKernels[MAX_CLFILE_NUM];
207 cl_program mpArryPrograms[MAX_CLFILE_NUM];
208 char mArryKnelSrcFile[MAX_CLFILE_NUM][256],
209 mArrykernelNames[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN + 1];
210 cl_kernel_function mpArryKnelFuncs[MAX_CLKERNEL_NUM];
211 int mnKernelCount, mnFileCount,
223 static GPUEnv gpuEnv;
227 static int InitEnv();
228 static int InitOpenclRunEnv(
int argc );
229 static int InitOpenclRunEnv_DeviceSelection(
int argc );
230 static int RegistOpenclKernel();
231 static int ReleaseOpenclRunEnv();
232 static int ReleaseOpenclEnv( GPUEnv *gpuInfo );
233 static int CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption );
234 static int CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName );
235 static int GeneratBinFromKernelSource( cl_program program,
const char * clFileName );
236 static int WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes );
237 static int BinaryGenerated(
const char * clFileName, FILE ** fhandle );
239 static l_uint32* pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl, l_uint32 *line);
240 static Pix* pixReadTiffCl(
const char *
filename, l_int32 n );
241 static PIX * pixReadStreamTiffCl ( FILE *fp, l_int32 n );
242 static PIX * pixReadMemTiffCl(
const l_uint8 *data,
size_t size, l_int32 n);
243 static PIX* pixReadFromTiffStreamCl(TIFF *tif);
244 static int composeRGBPixelCl(
int *tiffdata,
int *line,
int h,
int w);
245 static l_int32 getTiffStreamResolutionCl(TIFF *tif,l_int32 *pxres,l_int32 *pyres);
246 static TIFF* fopenTiffCl(FILE *fp,
const char *modestring);
251 static int initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs);
252 static void releaseMorphCLBuffers();
255 static PIX* pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
258 static PIX* pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
261 static PIX* pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
264 static PIX* pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize,
bool reqDataCopy);
267 static PIX* pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
bool reqDataCopy);
270 static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize,
bool reqDataCopy);
272 static void pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline,
273 PIX **pix_hline, PIX **pixClosed,
274 bool getpixClosed, l_int32 close_hsize,
275 l_int32 close_vsize, l_int32 open_hsize,
276 l_int32 open_vsize, l_int32 line_hsize,
281 static int SetKernelEnv( KernelEnv *envInfo );
292 static int LoadOpencl();
295 static void FreeOpenclDll();
298 inline static int AddKernelConfig(
int kCount,
const char *kName );
301 static int HistogramRectOCL(
unsigned char *imagedata,
int bytes_per_pixel,
302 int bytes_per_line,
int left,
int top,
304 int *histogramAllChannels);
306 static int ThresholdRectToPixOCL(
unsigned char *imagedata,
307 int bytes_per_pixel,
int bytes_per_line,
308 int *thresholds,
int *hi_values, Pix **pix,
309 int rect_height,
int rect_width,
310 int rect_top,
int rect_left);
312 static Pix *pixConvertRGBToGrayOCL(Pix *pix,
float weightRed = 0.3,
313 float weightGreen = 0.5,
314 float weightBlue = 0.2);
316 static ds_device getDeviceSelection();
317 static ds_device selectedDevice;
318 static bool deviceIsSelected;
319 static bool selectedDeviceIsOpenCL();
320 static bool selectedDeviceIsNativeCPU();