13 #include <sys/types.h>
26 #include <mach/mach_time.h>
33 #if defined(LIBLEPT_MAJOR_VERSION) && defined(LIBLEPT_MINOR_VERSION)
34 #define TESSERACT_LIBLEPT_PREREQ(maj, min) \
35 ((LIBLEPT_MAJOR_VERSION) > (maj) || \
36 ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min)))
38 #define TESSERACT_LIBLEPT_PREREQ(maj, min) 0
41 #if TESSERACT_LIBLEPT_PREREQ(1, 73)
42 #define CALLOC LEPT_CALLOC
43 #define FREE LEPT_FREE
49 GPUEnv OpenclDevice::gpuEnv;
51 bool OpenclDevice::deviceIsSelected =
false;
52 ds_device OpenclDevice::selectedDevice;
54 int OpenclDevice::isInited = 0;
56 static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC;
58 static const l_uint32 lmask32[] = {
59 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000,
60 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000,
61 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000,
62 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00,
63 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc,
64 0xfffffffe, 0xffffffff};
66 static const l_uint32 rmask32[] = {
67 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
68 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
69 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
70 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
71 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
72 0x7fffffff, 0xffffffff};
74 struct tiff_transform {
82 static struct tiff_transform tiff_orientation_transforms[] = {
93 static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000;
95 cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate;
101 void legalizeFileName(
char *fileName) {
103 const char *invalidChars =
106 for (
int i = 0; i < strlen(invalidChars); i++) {
108 invalidStr[0] = invalidChars[i];
109 invalidStr[1] =
'\0';
115 for (
char *pos = strstr(fileName, invalidStr); pos != NULL;
116 pos = strstr(pos + 1, invalidStr)) {
124 void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) {
127 gpuInfo->mnIsUserCreated = 1;
129 gpuInfo->mpDevID = device;
130 gpuInfo->mpArryDevsID =
new cl_device_id[1];
131 gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID;
133 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE,
134 sizeof(cl_device_type), &gpuInfo->mDevType, &size);
135 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(TYPE)");
138 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM,
139 sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size);
140 CHECK_OPENCL( clStatus,
"populateGPUEnv::getDeviceInfo(PLATFORM)");
142 cl_context_properties props[3];
143 props[0] = CL_CONTEXT_PLATFORM;
144 props[1] = (cl_context_properties) gpuInfo->mpPlatformID;
146 gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL,
148 CHECK_OPENCL( clStatus,
"populateGPUEnv::createContext");
150 cl_command_queue_properties queueProperties = 0;
151 gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus );
152 CHECK_OPENCL( clStatus,
"populateGPUEnv::createCommandQueue");
155 int OpenclDevice::LoadOpencl()
158 HINSTANCE HOpenclDll = NULL;
159 void *OpenclDll = NULL;
161 OpenclDll =
static_cast<HINSTANCE
>(HOpenclDll);
162 OpenclDll = LoadLibrary(
"openCL.dll");
163 if (!static_cast<HINSTANCE>(OpenclDll)) {
164 fprintf(stderr,
"[OD] Load opencl.dll failed!\n");
165 FreeLibrary(static_cast<HINSTANCE>(OpenclDll));
168 fprintf(stderr,
"[OD] Load opencl.dll successful!\n");
172 int OpenclDevice::SetKernelEnv( KernelEnv *envInfo )
174 envInfo->mpkContext = gpuEnv.mpContext;
175 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue;
176 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
181 cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer,
size_t nElements, cl_mem_flags flags, cl_int *pStatus)
183 cl_mem membuffer = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (flags),
184 nElements *
sizeof(l_uint32), hostbuffer, pStatus);
189 PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs,
190 int elements, cl_mem_flags flags,
bool memcopy =
false,
192 PROCNAME(
"mapOutputCLBuffer");
195 if ((pixd = pixCreateTemplate(pixs)) == NULL)
196 (PIX *)ERROR_PTR(
"pixd not made", procName, NULL);
198 if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs),
199 pixGetDepth(pixs))) == NULL)
200 (PIX *)ERROR_PTR(
"pixd not made", procName, NULL);
203 l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(
204 rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0,
205 elements *
sizeof(l_uint32), 0, NULL, NULL, NULL);
208 memcpy(pixGetData(pixd), pValues, elements *
sizeof(l_uint32));
210 pixSetData(pixd, pValues);
213 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL,
217 clFinish(rEnv.mpkCmdQueue);
223 cl_mem allocateIntBuffer( KernelEnv rEnv,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus ,
bool sync =
false)
226 clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(CL_MEM_READ_WRITE),
227 nElements *
sizeof(l_int32), NULL, pStatus);
229 if (_pValues != NULL) {
230 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer(
231 rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
232 nElements *
sizeof(l_int32), 0, NULL, NULL, NULL);
234 memcpy(pValues, _pValues, nElements *
sizeof(l_int32));
236 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL,
239 if (sync) clFinish(rEnv.mpkCmdQueue);
246 void OpenclDevice::releaseMorphCLBuffers()
248 if (pixdCLIntermediate != NULL) clReleaseMemObject(pixdCLIntermediate);
249 if (pixsCLBuffer != NULL) clReleaseMemObject(pixsCLBuffer);
250 if (pixdCLBuffer != NULL) clReleaseMemObject(pixdCLBuffer);
251 if (pixThBuffer != NULL) clReleaseMemObject(pixThBuffer);
252 pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL;
255 int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs)
257 SetKernelEnv( &rEnv );
259 if (pixThBuffer != NULL) {
260 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h,
261 CL_MEM_ALLOC_HOST_PTR, &clStatus);
265 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0,
266 sizeof(l_uint32) * wpl * h, 0, NULL, NULL);
271 l_uint32* srcdata = (l_uint32*) malloc(wpl*h*
sizeof(l_uint32));
272 memcpy(srcdata, pixGetData(pixs), wpl*h*
sizeof(l_uint32));
274 pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus);
277 pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h,
278 CL_MEM_ALLOC_HOST_PTR, &clStatus);
280 pixdCLIntermediate = allocateZeroCopyBuffer(
281 rEnv, NULL, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus);
283 return (
int)clStatus;
286 int OpenclDevice::InitEnv()
293 if( 1 == LoadOpencl() )
300 InitOpenclRunEnv_DeviceSelection( 0 );
306 int OpenclDevice::ReleaseOpenclRunEnv()
308 ReleaseOpenclEnv( &gpuEnv );
314 inline int OpenclDevice::AddKernelConfig(
int kCount,
const char *kName )
317 fprintf(stderr,
"Error: ( KCount < 1 ) AddKernelConfig\n" );
318 strcpy( gpuEnv.mArrykernelNames[kCount-1], kName );
319 gpuEnv.mnKernelCount++;
322 int OpenclDevice::RegistOpenclKernel()
324 if ( !gpuEnv.mnIsUserCreated )
325 memset( &gpuEnv, 0,
sizeof(gpuEnv) );
327 gpuEnv.mnFileCount = 0;
328 gpuEnv.mnKernelCount = 0UL;
330 AddKernelConfig( 1, (
const char*)
"oclAverageSub1" );
334 int OpenclDevice::InitOpenclRunEnv_DeviceSelection(
int argc ) {
339 ds_device bestDevice_DS = getDeviceSelection( );
341 cl_device_id bestDevice = bestDevice_DS.oclDeviceID;
343 if (selectedDeviceIsOpenCL() ) {
345 populateGPUEnvFromDevice( &gpuEnv, bestDevice );
346 gpuEnv.mnFileCount = 0;
347 gpuEnv.mnKernelCount = 0UL;
349 CompileKernelFile(&gpuEnv,
"");
361 OpenclDevice::OpenclDevice()
366 OpenclDevice::~OpenclDevice()
371 int OpenclDevice::ReleaseOpenclEnv( GPUEnv *gpuInfo )
381 for ( i = 0; i < gpuEnv.mnFileCount; i++ )
383 if ( gpuEnv.mpArryPrograms[i] )
385 clStatus = clReleaseProgram( gpuEnv.mpArryPrograms[i] );
386 CHECK_OPENCL( clStatus,
"clReleaseProgram" );
387 gpuEnv.mpArryPrograms[i] = NULL;
390 if ( gpuEnv.mpCmdQueue )
392 clReleaseCommandQueue( gpuEnv.mpCmdQueue );
393 gpuEnv.mpCmdQueue = NULL;
395 if ( gpuEnv.mpContext )
397 clReleaseContext( gpuEnv.mpContext );
398 gpuEnv.mpContext = NULL;
401 gpuInfo->mnIsUserCreated = 0;
402 delete[] gpuInfo->mpArryDevsID;
405 int OpenclDevice::BinaryGenerated(
const char * clFileName, FILE ** fhandle )
412 char fileName[256] = {0}, cl_name[128] = {0};
413 char deviceName[1024];
414 clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME,
415 sizeof(deviceName), deviceName, NULL);
416 CHECK_OPENCL(clStatus,
"clGetDeviceInfo");
417 str = (
char *)strstr(clFileName, (
char *)
".cl");
418 memcpy(cl_name, clFileName, str - clFileName);
419 cl_name[str - clFileName] =
'\0';
420 sprintf(fileName,
"%s-%s.bin", cl_name, deviceName);
421 legalizeFileName(fileName);
422 fd = fopen(fileName,
"rb");
423 status = (fd != NULL) ? 1 : 0;
430 int OpenclDevice::CachedOfKernerPrg(
const GPUEnv *gpuEnvCached,
const char * clFileName )
433 for ( i = 0; i < gpuEnvCached->mnFileCount; i++ )
435 if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 )
437 if (gpuEnvCached->mpArryPrograms[i] != NULL) {
445 int OpenclDevice::WriteBinaryToFile(
const char* fileName,
const char* birary,
size_t numBytes )
448 output = fopen(fileName,
"wb");
449 if (output == NULL) {
453 fwrite( birary,
sizeof(
char), numBytes, output );
459 int OpenclDevice::GeneratBinFromKernelSource( cl_program program,
const char * clFileName )
465 cl_device_id *mpArryDevsID;
466 char **binaries, *str = NULL;
468 clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
469 sizeof(numDevices), &numDevices, NULL);
470 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
472 mpArryDevsID = (cl_device_id*) malloc(
sizeof(cl_device_id) * numDevices );
473 if (mpArryDevsID == NULL) {
477 clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
478 sizeof(cl_device_id) * numDevices, mpArryDevsID,
480 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
483 binarySizes = (
size_t*) malloc(
sizeof(
size_t) * numDevices );
486 clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
487 sizeof(
size_t) * numDevices, binarySizes, NULL);
488 CHECK_OPENCL( clStatus,
"clGetProgramInfo" );
491 binaries = (
char**) malloc(
sizeof(
char *) * numDevices );
492 if (binaries == NULL) {
496 for ( i = 0; i < numDevices; i++ )
498 if ( binarySizes[i] != 0 )
500 binaries[i] = (
char*) malloc(
sizeof(
char) * binarySizes[i] );
501 if (binaries[i] == NULL) {
511 clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES,
512 sizeof(
char *) * numDevices, binaries, NULL);
513 CHECK_OPENCL(clStatus,
"clGetProgramInfo");
516 for ( i = 0; i < numDevices; i++ )
518 char fileName[256] = { 0 }, cl_name[128] = { 0 };
520 if ( binarySizes[i] != 0 )
522 char deviceName[1024];
523 clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME,
524 sizeof(deviceName), deviceName, NULL);
525 CHECK_OPENCL( clStatus,
"clGetDeviceInfo" );
527 str = (
char*) strstr( clFileName, (
char*)
".cl" );
528 memcpy( cl_name, clFileName, str - clFileName );
529 cl_name[str - clFileName] =
'\0';
530 sprintf( fileName,
"%s-%s.bin", cl_name, deviceName );
531 legalizeFileName(fileName);
532 if ( !WriteBinaryToFile( fileName, binaries[i], binarySizes[i] ) )
534 printf(
"[OD] write binary[%s] failed\n", fileName);
537 printf(
"[OD] write binary[%s] successfully\n", fileName);
542 for ( i = 0; i < numDevices; i++ )
560 void copyIntBuffer( KernelEnv rEnv, cl_mem xValues,
const l_uint32 *_pValues,
size_t nElements, cl_int *pStatus )
562 l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer(
563 rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0,
564 nElements *
sizeof(l_int32), 0, NULL, NULL, NULL);
565 clFinish(rEnv.mpkCmdQueue);
566 if (_pValues != NULL) {
567 for (
int i = 0; i < (int)nElements; i++) pValues[i] = (l_int32)_pValues[i];
570 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL,
576 int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo,
const char *buildOption )
581 char *buildLog = NULL, *binary;
583 size_t source_size[1];
584 int b_error, binary_status, binaryExisted, idx;
586 cl_device_id *mpArryDevsID;
590 if ( CachedOfKernerPrg(gpuInfo, filename) == 1 )
595 idx = gpuInfo->mnFileCount;
599 source_size[0] = strlen( source );
601 binaryExisted = BinaryGenerated( filename, &fd );
603 if ( binaryExisted == 1 )
605 clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES,
606 sizeof(numDevices), &numDevices, NULL);
607 CHECK_OPENCL(clStatus,
"clGetContextInfo");
609 mpArryDevsID = (cl_device_id *)malloc(
sizeof(cl_device_id) * numDevices);
610 if (mpArryDevsID == NULL) {
616 b_error |= fseek( fd, 0, SEEK_END ) < 0;
617 b_error |= ( length = ftell(fd) ) <= 0;
618 b_error |= fseek( fd, 0, SEEK_SET ) < 0;
624 binary = (
char*) malloc( length + 2 );
630 memset( binary, 0, length + 2 );
631 b_error |= fread( binary, 1, length, fd ) != length;
638 clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES,
639 sizeof(cl_device_id) * numDevices,
641 CHECK_OPENCL( clStatus,
"clGetContextInfo" );
644 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
645 mpArryDevsID, &length, (
const unsigned char**) &binary,
646 &binary_status, &clStatus );
647 CHECK_OPENCL( clStatus,
"clCreateProgramWithBinary" );
650 free( mpArryDevsID );
658 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithSource( gpuInfo->mpContext, 1, &source,
659 source_size, &clStatus);
660 CHECK_OPENCL( clStatus,
"clCreateProgramWithSource" );
664 if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) {
672 if (!gpuInfo->mnIsUserCreated)
675 clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
676 buildOption, NULL, NULL);
682 clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
683 buildOption, NULL, NULL);
687 if ( clStatus != CL_SUCCESS )
689 printf (
"BuildProgram error!\n");
690 if ( !gpuInfo->mnIsUserCreated )
692 clStatus = clGetProgramBuildInfo(
693 gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
694 CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
698 clStatus = clGetProgramBuildInfo(
699 gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
700 CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
702 if ( clStatus != CL_SUCCESS )
704 printf(
"opencl create build log fail\n");
707 buildLog = (
char*) malloc( length );
708 if (buildLog == (
char *)NULL) {
711 if ( !gpuInfo->mnIsUserCreated )
713 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
714 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
718 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
719 CL_PROGRAM_BUILD_LOG, length, buildLog, &length );
721 if ( clStatus != CL_SUCCESS )
723 printf(
"opencl program build info fail\n");
727 fd1 = fopen(
"kernel-build.log",
"w+" );
729 fwrite(buildLog,
sizeof(
char), length, fd1);
738 strcpy( gpuInfo->mArryKnelSrcFile[idx], filename );
740 if ( binaryExisted == 0 ) {
741 GeneratBinFromKernelSource( gpuInfo->mpArryPrograms[idx], filename );
745 gpuInfo->mnFileCount += 1;
750 l_uint32* OpenclDevice::pixReadFromTiffKernel(l_uint32 *tiffdata,l_int32 w,l_int32 h,l_int32 wpl,l_uint32 *line)
755 size_t globalThreads[2];
756 size_t localThreads[2];
762 gsize = (w + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
763 globalThreads[0] = gsize;
764 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
765 globalThreads[1] = gsize;
766 localThreads[0] = GROUPSIZE_X;
767 localThreads[1] = GROUPSIZE_Y;
769 SetKernelEnv( &rEnv );
771 l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32));
772 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus );
773 CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel");
776 valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
777 outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus);
780 clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl);
781 CHECK_OPENCL( clStatus, "clSetKernelArg");
782 clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w);
783 CHECK_OPENCL( clStatus, "clSetKernelArg" );
784 clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h);
785 CHECK_OPENCL( clStatus, "clSetKernelArg" );
786 clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl);
787 CHECK_OPENCL( clStatus, "clSetKernelArg" );
788 clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl);
789 CHECK_OPENCL( clStatus, "clSetKernelArg");
794 clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL,
795 globalThreads, localThreads, 0, NULL, NULL);
796 CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel");
799 void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ,
800 0, w * h * sizeof(l_uint32), 0, NULL, NULL,
802 CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl");
803 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL);
806 clFinish(rEnv.mpkCmdQueue);
812 PIX * OpenclDevice::pixReadTiffCl ( const
char *filename, l_int32 n )
819 PROCNAME("pixReadTiff");
822 return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
824 if ((fp = fopenReadStream(filename)) == NULL)
825 return (PIX *)ERROR_PTR("image file not found", procName, NULL);
826 if ((pix = pixReadStreamTiffCl(fp, n)) == NULL) {
828 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
835 OpenclDevice::fopenTiffCl(FILE *fp,
836 const char *modestring)
840 PROCNAME(
"fopenTiff");
842 if (!fp)
return (TIFF *)ERROR_PTR(
"stream not opened", procName, NULL);
844 return (TIFF *)ERROR_PTR(
"modestring not defined", procName, NULL);
846 if ((fd = fileno(fp)) < 0)
847 return (TIFF *)ERROR_PTR(
"invalid file descriptor", procName, NULL);
848 lseek(fd, 0, SEEK_SET);
850 return TIFFFdOpen(fd,
"TIFFstream", modestring);
852 l_int32 OpenclDevice::getTiffStreamResolutionCl(TIFF *tif,
857 l_int32 foundxres, foundyres;
858 l_float32 fxres, fyres;
860 PROCNAME(
"getTiffStreamResolution");
863 return ERROR_INT(
"tif not opened", procName, 1);
864 if (!pxres || !pyres)
865 return ERROR_INT(
"&xres and &yres not both defined", procName, 1);
868 TIFFGetFieldDefaulted(tif, TIFFTAG_RESOLUTIONUNIT, &resunit);
869 foundxres = TIFFGetField(tif, TIFFTAG_XRESOLUTION, &fxres);
870 foundyres = TIFFGetField(tif, TIFFTAG_YRESOLUTION, &fyres);
871 if (!foundxres && !foundyres)
return 1;
872 if (!foundxres && foundyres)
874 else if (foundxres && !foundyres)
877 if (resunit == RESUNIT_CENTIMETER) {
878 *pxres = (l_int32)(2.54 * fxres + 0.5);
879 *pyres = (l_int32)(2.54 * fyres + 0.5);
882 *pxres = (l_int32)fxres;
883 *pyres = (l_int32)fyres;
900 typedef struct L_Memstream L_MEMSTREAM;
903 static L_MEMSTREAM *memstreamCreateForRead(l_uint8 *indata,
size_t pinsize);
904 static L_MEMSTREAM *memstreamCreateForWrite(l_uint8 **poutdata,
906 static tsize_t tiffReadCallback(thandle_t handle, tdata_t data, tsize_t length);
907 static tsize_t tiffWriteCallback(thandle_t handle, tdata_t data,
909 static toff_t tiffSeekCallback(thandle_t handle, toff_t offset, l_int32 whence);
910 static l_int32 tiffCloseCallback(thandle_t handle);
911 static toff_t tiffSizeCallback(thandle_t handle);
912 static l_int32 tiffMapCallback(thandle_t handle, tdata_t *data, toff_t *length);
913 static void tiffUnmapCallback(thandle_t handle, tdata_t data, toff_t length);
917 memstreamCreateForRead(l_uint8 *indata,
920 L_MEMSTREAM *mstream;
922 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
923 mstream->buffer = indata;
924 mstream->bufsize = insize;
925 mstream->hw = insize;
932 memstreamCreateForWrite(l_uint8 **poutdata,
935 L_MEMSTREAM *mstream;
937 mstream = (L_MEMSTREAM *)CALLOC(1,
sizeof(L_MEMSTREAM));
938 mstream->buffer = (l_uint8 *)CALLOC(8 * 1024, 1);
939 mstream->bufsize = 8 * 1024;
940 mstream->poutdata = poutdata;
941 mstream->poutsize = poutsize;
942 mstream->hw = mstream->offset = 0;
948 tiffReadCallback(thandle_t handle,
952 L_MEMSTREAM *mstream;
955 mstream = (L_MEMSTREAM *)handle;
956 amount = L_MIN((
size_t)length, mstream->hw - mstream->offset);
957 memcpy(data, mstream->buffer + mstream->offset, amount);
958 mstream->offset += amount;
964 tiffWriteCallback(thandle_t handle,
968 L_MEMSTREAM *mstream;
975 mstream = (L_MEMSTREAM *)handle;
976 if (mstream->offset + length > mstream->bufsize) {
977 newsize = 2 * (mstream->offset + length);
978 mstream->buffer = (l_uint8 *)reallocNew((
void **)&mstream->buffer,
979 mstream->offset, newsize);
980 mstream->bufsize = newsize;
983 memcpy(mstream->buffer + mstream->offset, data, length);
984 mstream->offset += length;
985 mstream->hw = L_MAX(mstream->offset, mstream->hw);
991 tiffSeekCallback(thandle_t handle,
995 L_MEMSTREAM *mstream;
997 PROCNAME(
"tiffSeekCallback");
998 mstream = (L_MEMSTREAM *)handle;
1002 mstream->offset = offset;
1006 mstream->offset += offset;
1011 mstream->offset = mstream->hw - offset;
1014 return (toff_t)ERROR_INT(
"bad whence value", procName,
1018 return mstream->offset;
1023 tiffCloseCallback(thandle_t handle)
1025 L_MEMSTREAM *mstream;
1027 mstream = (L_MEMSTREAM *)handle;
1028 if (mstream->poutdata) {
1029 *mstream->poutdata = mstream->buffer;
1030 *mstream->poutsize = mstream->hw;
1038 tiffSizeCallback(thandle_t handle)
1040 L_MEMSTREAM *mstream;
1042 mstream = (L_MEMSTREAM *)handle;
1048 tiffMapCallback(thandle_t handle,
1052 L_MEMSTREAM *mstream;
1054 mstream = (L_MEMSTREAM *)handle;
1055 *data = mstream->buffer;
1056 *length = mstream->hw;
1062 tiffUnmapCallback(thandle_t handle,
1087 fopenTiffMemstream(
const char *filename,
1088 const char *operation,
1092 L_MEMSTREAM *mstream;
1094 PROCNAME(
"fopenTiffMemstream");
1097 return (TIFF *)ERROR_PTR(
"filename not defined", procName, NULL);
1099 return (TIFF *)ERROR_PTR(
"operation not defined", procName, NULL);
1101 return (TIFF *)ERROR_PTR(
"&data not defined", procName, NULL);
1103 return (TIFF *)ERROR_PTR(
"&datasize not defined", procName, NULL);
1104 if (!strcmp(operation,
"r") && !strcmp(operation,
"w"))
1105 return (TIFF *)ERROR_PTR(
"operation not 'r' or 'w'}", procName,
1108 if (!strcmp(operation,
"r"))
1109 mstream = memstreamCreateForRead(*pdata, *pdatasize);
1111 mstream = memstreamCreateForWrite(pdata, pdatasize);
1113 return TIFFClientOpen(filename, operation, mstream, tiffReadCallback,
1114 tiffWriteCallback, tiffSeekCallback,
1115 tiffCloseCallback, tiffSizeCallback,
1116 tiffMapCallback, tiffUnmapCallback);
1122 OpenclDevice::pixReadMemTiffCl(
const l_uint8 *data,
size_t size,l_int32 n)
1124 l_int32 i, pagefound;
1128 PROCNAME(
"pixReadMemTiffCl");
1131 return (PIX *)ERROR_PTR(
"data pointer is NULL", procName, NULL);
1133 if ((tif = fopenTiffMemstream(
"",
"r", (l_uint8 **)&data, &size)) ==
1135 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1139 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1142 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1144 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1148 if (TIFFReadDirectory(tif) == 0)
break;
1151 if (pagefound ==
FALSE) {
1152 L_WARNING(
"tiff page %d not found\n", procName, i);
1162 OpenclDevice::pixReadStreamTiffCl(FILE *fp,
1165 l_int32 i, pagefound;
1169 PROCNAME(
"pixReadStreamTiff");
1171 if (!fp)
return (PIX *)ERROR_PTR(
"stream not defined", procName, NULL);
1173 if ((tif = fopenTiffCl(fp,
"rb")) == NULL)
1174 return (PIX *)ERROR_PTR(
"tif not opened", procName, NULL);
1178 for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) {
1181 if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) {
1183 return (PIX *)ERROR_PTR(
"pix not read", procName, NULL);
1187 if (TIFFReadDirectory(tif) == 0)
1191 if (pagefound ==
FALSE) {
1192 L_WARNING(
"tiff page %d not found", procName, n);
1202 getTiffCompressedFormat(l_uint16 tiffcomp)
1208 case COMPRESSION_CCITTFAX4:
1209 comptype = IFF_TIFF_G4;
1211 case COMPRESSION_CCITTFAX3:
1212 comptype = IFF_TIFF_G3;
1214 case COMPRESSION_CCITTRLE:
1215 comptype = IFF_TIFF_RLE;
1217 case COMPRESSION_PACKBITS:
1218 comptype = IFF_TIFF_PACKBITS;
1220 case COMPRESSION_LZW:
1221 comptype = IFF_TIFF_LZW;
1223 case COMPRESSION_ADOBE_DEFLATE:
1224 comptype = IFF_TIFF_ZIP;
1227 comptype = IFF_TIFF;
1233 void compare(l_uint32 *cpu, l_uint32 *gpu,
int size)
1235 for(
int i=0;i<size;i++)
1239 printf(
"\ndoesnot match\n");
1243 printf(
"\nit matches\n");
1249 OpenclDevice::pixReadFromTiffStreamCl(TIFF *tif)
1251 l_uint8 *linebuf, *data;
1252 l_uint16 spp, bps, bpp, tiffbpl, photometry, tiffcomp, orientation;
1253 l_uint16 *redmap, *greenmap, *bluemap;
1254 l_int32 d, wpl, bpl, comptype, i, ncolors;
1257 l_uint32 *line, *tiffdata;
1261 PROCNAME(
"pixReadFromTiffStream");
1263 if (!tif)
return (PIX *)ERROR_PTR(
"tif not defined", procName, NULL);
1265 TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps);
1266 TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp);
1269 return (PIX *)ERROR_PTR(
"can't handle bpp > 32", procName, NULL);
1272 else if (spp == 3 || spp == 4)
1275 return (PIX *)ERROR_PTR(
"spp not in set {1,3,4}", procName, NULL);
1277 TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w);
1278 TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h);
1279 tiffbpl = TIFFScanlineSize(tif);
1281 if ((pix = pixCreate(w, h, d)) == NULL)
1282 return (PIX *)ERROR_PTR(
"pix not made", procName, NULL);
1283 data = (l_uint8 *)pixGetData(pix);
1284 wpl = pixGetWpl(pix);
1288 if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1,
sizeof(l_uint8))) ==
1290 return (PIX *)ERROR_PTR(
"calloc fail for linebuf", procName, NULL);
1292 for (i = 0; i < h; i++) {
1293 if (TIFFReadScanline(tif, linebuf, i, 0) < 0) {
1296 return (PIX *)ERROR_PTR(
"line read fail", procName, NULL);
1298 memcpy((
char *)data, (
char *)linebuf, tiffbpl);
1302 pixEndianByteSwap(pix);
1304 pixEndianTwoByteSwap(pix);
1307 if ((tiffdata = (l_uint32 *)CALLOC(w * h,
sizeof(l_uint32))) == NULL) {
1309 return (PIX *)ERROR_PTR(
"calloc fail for tiffdata", procName, NULL);
1311 if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata,
1312 ORIENTATION_TOPLEFT, 0)) {
1315 return (PIX *)ERROR_PTR(
"failed to read tiffdata", procName, NULL);
1317 line = pixGetData(pix);
1320 l_uint32 *output_gpu = pixReadFromTiffKernel(tiffdata, w, h, wpl, line);
1322 pixSetData(pix, output_gpu);
1329 if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) {
1330 pixSetXRes(pix, xres);
1331 pixSetYRes(pix, yres);
1335 TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &tiffcomp);
1336 comptype = getTiffCompressedFormat(tiffcomp);
1337 pixSetInputFormat(pix, comptype);
1339 if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) {
1340 if ((cmap = pixcmapCreate(bps)) == NULL) {
1342 return (PIX *)ERROR_PTR(
"cmap not made", procName, NULL);
1345 for (i = 0; i < ncolors; i++)
1346 pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8,
1348 pixSetColormap(pix, cmap);
1350 if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) {
1351 if (tiffcomp == COMPRESSION_CCITTFAX3 ||
1352 tiffcomp == COMPRESSION_CCITTFAX4 ||
1353 tiffcomp == COMPRESSION_CCITTRLE ||
1354 tiffcomp == COMPRESSION_CCITTRLEW) {
1355 photometry = PHOTOMETRIC_MINISWHITE;
1357 photometry = PHOTOMETRIC_MINISBLACK;
1359 if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) ||
1360 (d == 8 && photometry == PHOTOMETRIC_MINISWHITE))
1361 pixInvert(pix, pix);
1364 if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) {
1365 if (orientation >= 1 && orientation <= 8) {
1366 struct tiff_transform *transform =
1367 &tiff_orientation_transforms[orientation - 1];
1368 if (transform->vflip) pixFlipTB(pix, pix);
1369 if (transform->hflip) pixFlipLR(pix, pix);
1370 if (transform->rotate) {
1372 pix = pixRotate90(oldpix, transform->rotate);
1373 pixDestroy(&oldpix);
1383 pixDilateCL_55(l_int32 wpl, l_int32 h)
1385 size_t globalThreads[2];
1389 size_t localThreads[2];
1392 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1393 globalThreads[0] = gsize;
1394 globalThreads[1] = GROUPSIZE_HMORY;
1395 localThreads[0] = GROUPSIZE_HMORX;
1396 localThreads[1] = GROUPSIZE_HMORY;
1398 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateHor_5x5", &status );
1399 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor_5x5");
1401 status = clSetKernelArg(rEnv.mpkKernel,
1405 status = clSetKernelArg(rEnv.mpkKernel,
1409 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1410 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1412 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1413 NULL, globalThreads, localThreads, 0,
1417 pixtemp = pixsCLBuffer;
1418 pixsCLBuffer = pixdCLBuffer;
1419 pixdCLBuffer = pixtemp;
1422 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1423 globalThreads[0] = gsize;
1424 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1425 globalThreads[1] = gsize;
1426 localThreads[0] = GROUPSIZE_X;
1427 localThreads[1] = GROUPSIZE_Y;
1429 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer_5x5", &status );
1430 CHECK_OPENCL(status,
"clCreateKernel morphoDilateVer_5x5");
1432 status = clSetKernelArg(rEnv.mpkKernel,
1436 status = clSetKernelArg(rEnv.mpkKernel,
1440 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1441 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1442 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1443 NULL, globalThreads, localThreads, 0,
1451 pixErodeCL_55(l_int32 wpl, l_int32 h)
1453 size_t globalThreads[2];
1457 l_uint32 fwmask, lwmask;
1458 size_t localThreads[2];
1460 lwmask = lmask32[31 - 2];
1461 fwmask = rmask32[31 - 2];
1464 gsize = (wpl*h + GROUPSIZE_HMORX - 1)/ GROUPSIZE_HMORX * GROUPSIZE_HMORX;
1465 globalThreads[0] = gsize;
1466 globalThreads[1] = GROUPSIZE_HMORY;
1467 localThreads[0] = GROUPSIZE_HMORX;
1468 localThreads[1] = GROUPSIZE_HMORY;
1470 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeHor_5x5", &status );
1471 CHECK_OPENCL(status,
"clCreateKernel morphoErodeHor_5x5");
1473 status = clSetKernelArg(rEnv.mpkKernel,
1477 status = clSetKernelArg(rEnv.mpkKernel,
1481 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1482 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1484 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1485 NULL, globalThreads, localThreads, 0,
1489 pixtemp = pixsCLBuffer;
1490 pixsCLBuffer = pixdCLBuffer;
1491 pixdCLBuffer = pixtemp;
1494 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1495 globalThreads[0] = gsize;
1496 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1497 globalThreads[1] = gsize;
1498 localThreads[0] = GROUPSIZE_X;
1499 localThreads[1] = GROUPSIZE_Y;
1501 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoErodeVer_5x5", &status );
1502 CHECK_OPENCL(status,
"clCreateKernel morphoErodeVer_5x5");
1504 status = clSetKernelArg(rEnv.mpkKernel,
1508 status = clSetKernelArg(rEnv.mpkKernel,
1512 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1513 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1514 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(fwmask), &fwmask);
1515 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(lwmask), &lwmask);
1516 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1517 NULL, globalThreads, localThreads, 0,
1525 pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1527 l_int32 xp, yp, xn, yn;
1529 size_t globalThreads[2];
1533 size_t localThreads[2];
1536 OpenclDevice::SetKernelEnv( &rEnv );
1538 if (hsize == 5 && vsize == 5)
1541 status = pixDilateCL_55(wpl, h);
1545 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1547 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1550 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1551 globalThreads[0] = gsize;
1552 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1553 globalThreads[1] = gsize;
1554 localThreads[0] = GROUPSIZE_X;
1555 localThreads[1] = GROUPSIZE_Y;
1557 if (xp > 31 || xn > 31)
1561 clCreateKernel(rEnv.mpkProgram,
"morphoDilateHor", &status);
1562 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor");
1564 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1565 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1566 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1567 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(xn), &xn);
1568 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(wpl), &wpl);
1569 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(h), &h);
1570 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1571 NULL, globalThreads, localThreads, 0,
1574 if (yp > 0 || yn > 0) {
1575 pixtemp = pixsCLBuffer;
1576 pixsCLBuffer = pixdCLBuffer;
1577 pixdCLBuffer = pixtemp;
1580 else if (xp > 0 || xn > 0 )
1584 clCreateKernel(rEnv.mpkProgram,
"morphoDilateHor_32word", &status);
1585 CHECK_OPENCL(status,
"clCreateKernel morphoDilateHor_32word");
1586 isEven = (xp != xn);
1588 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1589 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1590 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1591 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1592 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1593 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isEven), &isEven);
1594 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1595 NULL, globalThreads, localThreads, 0,
1598 if (yp > 0 || yn > 0) {
1599 pixtemp = pixsCLBuffer;
1600 pixsCLBuffer = pixdCLBuffer;
1601 pixdCLBuffer = pixtemp;
1605 if (yp > 0 || yn > 0)
1607 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"morphoDilateVer", &status );
1608 CHECK_OPENCL(status,
"clCreateKernel morphoDilateVer");
1610 status = clSetKernelArg(rEnv.mpkKernel,
1614 status = clSetKernelArg(rEnv.mpkKernel,
1618 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(yp), &yp);
1619 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1620 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1621 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(yn), &yn);
1622 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1623 NULL, globalThreads, localThreads, 0,
1631 cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) {
1632 l_int32 xp, yp, xn, yn;
1634 size_t globalThreads[2];
1635 size_t localThreads[2];
1639 char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC);
1640 l_uint32 rwmask, lwmask;
1643 sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT);
1645 selFindMaxTranslations(sel, &xp, &yp, &xn, &yn);
1647 OpenclDevice::SetKernelEnv(&rEnv);
1649 if (hsize == 5 && vsize == 5 && isAsymmetric) {
1651 status = pixErodeCL_55(wpl, h);
1655 lwmask = lmask32[31 - (xn & 31)];
1656 rwmask = rmask32[31 - (xp & 31)];
1659 gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1660 globalThreads[0] = gsize;
1661 gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1662 globalThreads[1] = gsize;
1663 localThreads[0] = GROUPSIZE_X;
1664 localThreads[1] = GROUPSIZE_Y;
1667 if (xp > 31 || xn > 31) {
1669 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"morphoErodeHor", &status);
1671 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1672 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1673 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1674 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(xn), &xn);
1675 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(wpl), &wpl);
1676 status = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(h), &h);
1678 clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(isAsymmetric), &isAsymmetric);
1679 status = clSetKernelArg(rEnv.mpkKernel, 7,
sizeof(rwmask), &rwmask);
1680 status = clSetKernelArg(rEnv.mpkKernel, 8,
sizeof(lwmask), &lwmask);
1681 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1682 NULL, globalThreads, localThreads, 0,
1685 if (yp > 0 || yn > 0) {
1686 pixtemp = pixsCLBuffer;
1687 pixsCLBuffer = pixdCLBuffer;
1688 pixdCLBuffer = pixtemp;
1690 }
else if (xp > 0 || xn > 0) {
1692 clCreateKernel(rEnv.mpkProgram,
"morphoErodeHor_32word", &status);
1693 isEven = (xp != xn);
1695 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1696 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1697 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(xp), &xp);
1698 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1699 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1701 clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isAsymmetric), &isAsymmetric);
1702 status = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(rwmask), &rwmask);
1703 status = clSetKernelArg(rEnv.mpkKernel, 7,
sizeof(lwmask), &lwmask);
1704 status = clSetKernelArg(rEnv.mpkKernel, 8,
sizeof(isEven), &isEven);
1705 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1706 NULL, globalThreads, localThreads, 0,
1709 if (yp > 0 || yn > 0) {
1710 pixtemp = pixsCLBuffer;
1711 pixsCLBuffer = pixdCLBuffer;
1712 pixdCLBuffer = pixtemp;
1717 if (yp > 0 || yn > 0) {
1718 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"morphoErodeVer", &status);
1719 CHECK_OPENCL(status,
"clCreateKernel morphoErodeVer");
1721 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &pixsCLBuffer);
1722 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &pixdCLBuffer);
1723 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(yp), &yp);
1724 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1725 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1727 clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(isAsymmetric), &isAsymmetric);
1728 status = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(yn), &yn);
1729 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1730 NULL, globalThreads, localThreads, 0,
1739 PIX *OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
1740 l_int32 vsize,
bool reqDataCopy =
false) {
1743 wpl = pixGetWpl(pixs);
1744 h = pixGetHeight(pixs);
1746 clStatus = pixDilateCL(hsize, vsize, wpl, h);
1749 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h,
1750 CL_MAP_READ,
false);
1758 PIX *OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
1759 l_int32 vsize,
bool reqDataCopy =
false) {
1762 wpl = pixGetWpl(pixs);
1763 h = pixGetHeight(pixs);
1765 clStatus = pixErodeCL(hsize, vsize, wpl, h);
1769 mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
1777 pixOpenCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1783 status = pixErodeCL(hsize, vsize, wpl, h);
1785 pixtemp = pixsCLBuffer;
1786 pixsCLBuffer = pixdCLBuffer;
1787 pixdCLBuffer = pixtemp;
1789 status = pixDilateCL(hsize, vsize, wpl, h);
1796 pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h)
1802 status = pixDilateCL(hsize, vsize, wpl, h);
1804 pixtemp = pixsCLBuffer;
1805 pixsCLBuffer = pixdCLBuffer;
1806 pixdCLBuffer = pixtemp;
1808 status = pixErodeCL(hsize, vsize, wpl, h);
1815 PIX *OpenclDevice::pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
1816 l_int32 vsize,
bool reqDataCopy =
false) {
1819 wpl = pixGetWpl(pixs);
1820 h = pixGetHeight(pixs);
1822 clStatus = pixCloseCL(hsize, vsize, wpl, h);
1826 mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
1834 PIX *OpenclDevice::pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize,
1835 l_int32 vsize,
bool reqDataCopy =
false) {
1838 wpl = pixGetWpl(pixs);
1839 h = pixGetHeight(pixs);
1841 clStatus = pixOpenCL(hsize, vsize, wpl, h);
1845 mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
1853 pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
1856 size_t globalThreads[2];
1858 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
1860 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1861 globalThreads[0] = gsize;
1862 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1863 globalThreads[1] = gsize;
1865 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixOR", &status );
1866 CHECK_OPENCL(status,
"clCreateKernel pixOR");
1868 status = clSetKernelArg(rEnv.mpkKernel,
1872 status = clSetKernelArg(rEnv.mpkKernel,
1876 status = clSetKernelArg(rEnv.mpkKernel,
1880 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1881 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1882 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1883 NULL, globalThreads, localThreads, 0,
1891 pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outbuffer)
1894 size_t globalThreads[2];
1896 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
1898 gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X;
1899 globalThreads[0] = gsize;
1900 gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y;
1901 globalThreads[1] = gsize;
1903 rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram,
"pixAND", &status );
1904 CHECK_OPENCL(status,
"clCreateKernel pixAND");
1907 status = clSetKernelArg(rEnv.mpkKernel,
1911 status = clSetKernelArg(rEnv.mpkKernel,
1915 status = clSetKernelArg(rEnv.mpkKernel,
1919 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(wpl), &wpl);
1920 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(h), &h);
1921 status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2,
1922 NULL, globalThreads, localThreads, 0,
1929 cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1,
1930 cl_mem buffer2, cl_mem outBuffer = NULL) {
1932 size_t globalThreads[2];
1934 size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y};
1936 gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X;
1937 globalThreads[0] = gsize;
1938 gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y;
1939 globalThreads[1] = gsize;
1941 if (outBuffer != NULL) {
1942 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram,
"pixSubtract", &status);
1943 CHECK_OPENCL(status,
"clCreateKernel pixSubtract");
1946 clCreateKernel(rEnv.mpkProgram,
"pixSubtract_inplace", &status);
1947 CHECK_OPENCL(status,
"clCreateKernel pixSubtract_inplace");
1951 status = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &buffer1);
1952 status = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(cl_mem), &buffer2);
1953 status = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(wpl), &wpl);
1954 status = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(h), &h);
1955 if (outBuffer != NULL) {
1956 status = clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(cl_mem), &outBuffer);
1959 clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL,
1960 globalThreads, localThreads, 0, NULL, NULL);
1967 PIX *OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2,
1968 bool reqDataCopy =
false) {
1971 PROCNAME(
"pixSubtractCL");
1973 if (!pixs1)
return (PIX *)ERROR_PTR(
"pixs1 not defined", procName, pixd);
1974 if (!pixs2)
return (PIX *)ERROR_PTR(
"pixs2 not defined", procName, pixd);
1975 if (pixGetDepth(pixs1) != pixGetDepth(pixs2))
1976 return (PIX *)ERROR_PTR(
"depths of pixs* unequal", procName, pixd);
1978 #if EQUAL_SIZE_WARNING
1979 if (!pixSizesEqual(pixs1, pixs2))
1980 L_WARNING(
"pixs1 and pixs2 not equal sizes", procName);
1983 wpl = pixGetWpl(pixs1);
1984 h = pixGetHeight(pixs1);
1986 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
1991 pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs1, wpl*h, CL_MAP_READ);
1999 PIX *OpenclDevice::pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize,
2000 l_int32 close_vsize, l_int32 open_hsize,
2001 l_int32 open_vsize,
bool reqDataCopy =
false) {
2005 wpl = pixGetWpl(pixs);
2006 h = pixGetHeight(pixs);
2009 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2014 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
2015 0,
sizeof(
int) * wpl * h, 0, NULL, NULL);
2018 pixtemp = pixsCLBuffer;
2019 pixsCLBuffer = pixdCLBuffer;
2020 pixdCLBuffer = pixtemp;
2022 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2025 pixtemp = pixsCLBuffer;
2026 pixsCLBuffer = pixdCLBuffer;
2027 pixdCLBuffer = pixdCLIntermediate;
2028 pixdCLIntermediate = pixtemp;
2030 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2035 mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ);
2042 void OpenclDevice::pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline,
2043 PIX **pix_hline, PIX **pixClosed,
2044 bool getpixClosed, l_int32 close_hsize,
2045 l_int32 close_vsize, l_int32 open_hsize,
2046 l_int32 open_vsize, l_int32 line_hsize,
2047 l_int32 line_vsize) {
2051 wpl = pixGetWpl(pixs);
2052 h = pixGetHeight(pixs);
2055 clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h);
2059 *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs,
2060 wpl * h, CL_MAP_READ,
true,
false);
2066 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
2067 0,
sizeof(
int) * wpl * h, 0, NULL, NULL);
2070 pixtemp = pixsCLBuffer;
2071 pixsCLBuffer = pixdCLBuffer;
2072 pixdCLBuffer = pixtemp;
2074 clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h);
2077 pixtemp = pixsCLBuffer;
2078 pixsCLBuffer = pixdCLBuffer;
2079 pixdCLBuffer = pixdCLIntermediate;
2080 pixdCLIntermediate = pixtemp;
2082 clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer);
2087 clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0,
2088 0,
sizeof(
int) * wpl * h, 0, NULL, NULL);
2090 pixtemp = pixsCLBuffer;
2091 pixsCLBuffer = pixdCLBuffer;
2092 pixdCLBuffer = pixtemp;
2096 clStatus = pixOpenCL(1, line_vsize, wpl, h);
2099 *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h,
2100 CL_MAP_READ,
true,
false);
2102 pixtemp = pixsCLBuffer;
2103 pixsCLBuffer = pixdCLIntermediate;
2104 pixdCLIntermediate = pixtemp;
2108 clStatus = pixOpenCL(line_hsize, 1, wpl, h);
2111 *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h,
2112 CL_MAP_READ,
true,
true);
2123 int OpenclDevice::HistogramRectOCL(
unsigned char *imageData,
2124 int bytes_per_pixel,
int bytes_per_line,
2128 int *histogramAllChannels) {
2133 SetKernelEnv(&histKern);
2134 KernelEnv histRedKern;
2135 SetKernelEnv(&histRedKern);
2141 cl_mem imageBuffer = clCreateBuffer(
2142 histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2143 width * height * bytes_per_pixel * sizeof(
char), imageData, &clStatus);
2144 CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
2147 int block_size = 256;
2149 clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
2150 sizeof(numCUs), &numCUs, NULL);
2151 CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
2153 int requestedOccupancy = 10;
2154 int numWorkGroups = numCUs * requestedOccupancy;
2155 int numThreads = block_size * numWorkGroups;
2156 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
2157 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
2158 size_t red_global_work_size[] = {
2159 static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};
2162 int numBins = kHistogramSize * bytes_per_pixel * numWorkGroups;
2164 cl_mem histogramBuffer = clCreateBuffer(
2165 histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
2166 kHistogramSize * bytes_per_pixel *
sizeof(
int), histogramAllChannels,
2168 CHECK_OPENCL(clStatus,
"clCreateBuffer histogramBuffer");
2172 int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed;
2174 cl_mem tmpHistogramBuffer =
2175 clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE,
2176 tmpHistogramBins *
sizeof(cl_uint), NULL, &clStatus);
2177 CHECK_OPENCL(clStatus,
"clCreateBuffer tmpHistogramBuffer");
2180 int *zeroBuffer =
new int[1];
2182 cl_mem atomicSyncBuffer = clCreateBuffer(
2183 histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
2184 sizeof(cl_int), zeroBuffer, &clStatus);
2185 CHECK_OPENCL(clStatus,
"clCreateBuffer atomicSyncBuffer");
2186 delete[] zeroBuffer;
2188 if (bytes_per_pixel == 1) {
2189 histKern.mpkKernel = clCreateKernel(
2190 histKern.mpkProgram,
"kernel_HistogramRectOneChannel", &clStatus);
2191 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_HistogramRectOneChannel");
2193 histRedKern.mpkKernel =
2194 clCreateKernel(histRedKern.mpkProgram,
2195 "kernel_HistogramRectOneChannelReduction", &clStatus);
2196 CHECK_OPENCL(clStatus,
2197 "clCreateKernel kernel_HistogramRectOneChannelReduction");
2199 histKern.mpkKernel = clCreateKernel( histKern.mpkProgram,
"kernel_HistogramRectAllChannels", &clStatus );
2200 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannels");
2202 histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram,
"kernel_HistogramRectAllChannelsReduction", &clStatus );
2203 CHECK_OPENCL( clStatus,
"clCreateKernel kernel_HistogramRectAllChannelsReduction");
2209 ptr = clEnqueueMapBuffer(
2210 histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0,
2211 tmpHistogramBins *
sizeof(cl_uint), 0, NULL, NULL, &clStatus);
2212 CHECK_OPENCL( clStatus,
"clEnqueueMapBuffer tmpHistogramBuffer");
2214 memset(ptr, 0, tmpHistogramBins*
sizeof(cl_uint));
2215 clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0,
2220 clSetKernelArg(histKern.mpkKernel, 0,
sizeof(cl_mem), &imageBuffer);
2221 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2222 cl_uint numPixels = width*height;
2224 clSetKernelArg(histKern.mpkKernel, 1,
sizeof(cl_uint), &numPixels);
2225 CHECK_OPENCL( clStatus,
"clSetKernelArg numPixels" );
2226 clStatus = clSetKernelArg(histKern.mpkKernel, 2,
sizeof(cl_mem),
2227 &tmpHistogramBuffer);
2228 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2231 int n = numThreads/bytes_per_pixel;
2232 clStatus = clSetKernelArg(histRedKern.mpkKernel, 0,
sizeof(cl_int), &n);
2233 CHECK_OPENCL( clStatus,
"clSetKernelArg imageBuffer");
2234 clStatus = clSetKernelArg(histRedKern.mpkKernel, 1,
sizeof(cl_mem),
2235 &tmpHistogramBuffer);
2236 CHECK_OPENCL( clStatus,
"clSetKernelArg tmpHistogramBuffer");
2237 clStatus = clSetKernelArg(histRedKern.mpkKernel, 2,
sizeof(cl_mem),
2239 CHECK_OPENCL( clStatus,
"clSetKernelArg histogramBuffer");
2243 clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1,
2244 NULL, global_work_size, local_work_size, 0,
2246 CHECK_OPENCL(clStatus,
2247 "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels");
2248 clFinish(histKern.mpkCmdQueue);
2249 if (clStatus != 0) {
2253 clStatus = clEnqueueNDRangeKernel(
2254 histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, NULL,
2255 red_global_work_size, local_work_size, 0, NULL, NULL);
2256 CHECK_OPENCL( clStatus,
"clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" );
2257 clFinish( histRedKern.mpkCmdQueue );
2258 if (clStatus != 0) {
2264 ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE,
2266 kHistogramSize * bytes_per_pixel * sizeof(
int), 0,
2267 NULL, NULL, &clStatus);
2268 CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer");
2269 if (clStatus != 0) {
2272 clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0,
2275 clReleaseMemObject(histogramBuffer);
2276 clReleaseMemObject(imageBuffer);
2287 int OpenclDevice::ThresholdRectToPixOCL(
unsigned char *imageData,
2288 int bytes_per_pixel,
int bytes_per_line,
2289 int *thresholds,
int *hi_values,
2290 Pix **pix,
int height,
int width,
2291 int top,
int left) {
2295 *pix = pixCreate(width, height, 1);
2296 uinT32 *pixData = pixGetData(*pix);
2297 int wpl = pixGetWpl(*pix);
2298 int pixSize = wpl * height * sizeof(
uinT32);
2302 SetKernelEnv(&rEnv);
2305 int block_size = 256;
2307 clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS,
2308 sizeof(numCUs), &numCUs, NULL);
2309 CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer");
2311 int requestedOccupancy = 10;
2312 int numWorkGroups = numCUs * requestedOccupancy;
2313 int numThreads = block_size * numWorkGroups;
2314 size_t local_work_size[] = {(size_t)block_size};
2315 size_t global_work_size[] = {(size_t)numThreads};
2322 cl_mem imageBuffer = clCreateBuffer(
2323 rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2324 width * height * bytes_per_pixel *
sizeof(
char), imageData, &clStatus);
2325 CHECK_OPENCL(clStatus,
"clCreateBuffer imageBuffer");
2329 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
2330 pixSize, pixData, &clStatus);
2331 CHECK_OPENCL(clStatus,
"clCreateBuffer pix");
2334 cl_mem thresholdsBuffer =
2335 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2336 bytes_per_pixel *
sizeof(
int), thresholds, &clStatus);
2337 CHECK_OPENCL(clStatus,
"clCreateBuffer thresholdBuffer");
2338 cl_mem hiValuesBuffer =
2339 clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
2340 bytes_per_pixel *
sizeof(
int), hi_values, &clStatus);
2341 CHECK_OPENCL(clStatus,
"clCreateBuffer hiValuesBuffer");
2344 if (bytes_per_pixel == 4) {
2346 clCreateKernel(rEnv.mpkProgram,
"kernel_ThresholdRectToPix", &clStatus);
2347 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_ThresholdRectToPix");
2349 rEnv.mpkKernel = clCreateKernel(
2350 rEnv.mpkProgram,
"kernel_ThresholdRectToPix_OneChan", &clStatus);
2351 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_ThresholdRectToPix_OneChan");
2355 clStatus = clSetKernelArg(rEnv.mpkKernel, 0,
sizeof(cl_mem), &imageBuffer);
2356 CHECK_OPENCL(clStatus,
"clSetKernelArg imageBuffer");
2357 cl_uint numPixels = width * height;
2358 clStatus = clSetKernelArg(rEnv.mpkKernel, 1,
sizeof(
int), &height);
2359 CHECK_OPENCL(clStatus,
"clSetKernelArg height");
2360 clStatus = clSetKernelArg(rEnv.mpkKernel, 2,
sizeof(
int), &width);
2361 CHECK_OPENCL(clStatus,
"clSetKernelArg width");
2362 clStatus = clSetKernelArg(rEnv.mpkKernel, 3,
sizeof(
int), &wpl);
2363 CHECK_OPENCL(clStatus,
"clSetKernelArg wpl");
2365 clSetKernelArg(rEnv.mpkKernel, 4,
sizeof(cl_mem), &thresholdsBuffer);
2366 CHECK_OPENCL(clStatus,
"clSetKernelArg thresholdsBuffer");
2367 clStatus = clSetKernelArg(rEnv.mpkKernel, 5,
sizeof(cl_mem), &hiValuesBuffer);
2368 CHECK_OPENCL(clStatus,
"clSetKernelArg hiValuesBuffer");
2369 clStatus = clSetKernelArg(rEnv.mpkKernel, 6,
sizeof(cl_mem), &pixThBuffer);
2370 CHECK_OPENCL(clStatus,
"clSetKernelArg pixThBuffer");
2374 clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1,
2375 NULL, global_work_size, local_work_size,
2377 CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix");
2378 clFinish(rEnv.mpkCmdQueue);
2380 if (clStatus != 0) {
2381 printf(
"Setting return value to -1\n");
2386 clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0,
2387 pixSize, 0, NULL, NULL, &clStatus);
2388 CHECK_OPENCL(clStatus,
"clEnqueueMapBuffer histogramBuffer");
2389 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL,
2392 clReleaseMemObject(imageBuffer);
2393 clReleaseMemObject(thresholdsBuffer);
2394 clReleaseMemObject(hiValuesBuffer);
2407 typedef struct _TessScoreEvaluationInputData {
2411 unsigned char *imageData;
2413 } TessScoreEvaluationInputData;
2415 void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input ) {
2420 int numChannels = 4;
2421 input->height = height;
2422 input->width = width;
2423 input->numChannels = numChannels;
2424 unsigned char (*imageData4)[4] = (
unsigned char (*)[4]) malloc(height*width*numChannels*
sizeof(
unsigned char));
2425 input->imageData = (
unsigned char *) &imageData4[0];
2428 unsigned char pixelWhite[4] = { 0, 0, 0, 255};
2429 unsigned char pixelBlack[4] = {255, 255, 255, 255};
2430 for (
int p = 0; p < height*width; p++) {
2432 imageData4[p][0] = pixelWhite[0];
2433 imageData4[p][1] = pixelWhite[1];
2434 imageData4[p][2] = pixelWhite[2];
2435 imageData4[p][3] = pixelWhite[3];
2438 int maxLineWidth = 64;
2441 for (
int i = 0; i < numLines; i++) {
2442 int lineWidth = rand()%maxLineWidth;
2443 int vertLinePos = lineWidth + rand()%(width-2*lineWidth);
2445 for (
int row = vertLinePos-lineWidth/2; row < vertLinePos+lineWidth/2; row++) {
2446 for (
int col = 0; col < height; col++) {
2448 imageData4[row*width+col][0] = pixelBlack[0];
2449 imageData4[row*width+col][1] = pixelBlack[1];
2450 imageData4[row*width+col][2] = pixelBlack[2];
2451 imageData4[row*width+col][3] = pixelBlack[3];
2456 for (
int i = 0; i < numLines; i++) {
2457 int lineWidth = rand()%maxLineWidth;
2458 int horLinePos = lineWidth + rand()%(height-2*lineWidth);
2460 for (
int row = 0; row < width; row++) {
2461 for (
int col = horLinePos-lineWidth/2; col < horLinePos+lineWidth/2; col++) {
2464 imageData4[row*width+col][0] = pixelBlack[0];
2465 imageData4[row*width+col][1] = pixelBlack[1];
2466 imageData4[row*width+col][2] = pixelBlack[2];
2467 imageData4[row*width+col][3] = pixelBlack[3];
2472 float fractionBlack = 0.1;
2473 int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2);
2474 for (
int i = 0; i < numSpots; i++) {
2475 int lineWidth = rand()%maxLineWidth;
2476 int col = lineWidth + rand()%(width-2*lineWidth);
2477 int row = lineWidth + rand()%(height-2*lineWidth);
2479 for (
int r = row-lineWidth/2; r < row+lineWidth/2; r++) {
2480 for (
int c = col-lineWidth/2; c < col+lineWidth/2; c++) {
2483 imageData4[r*width+c][0] = pixelBlack[0];
2484 imageData4[r*width+c][1] = pixelBlack[1];
2485 imageData4[r*width+c][2] = pixelBlack[2];
2486 imageData4[r*width+c][3] = pixelBlack[3];
2491 input->pix = pixCreate(input->width, input->height, 1);
2494 typedef struct _TessDeviceScore {
2504 double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2507 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2508 QueryPerformanceFrequency(&freq);
2510 mach_timebase_info_data_t info = {0, 0};
2511 mach_timebase_info(&info);
2512 long long start, stop;
2514 timespec time_funct_start, time_funct_end;
2517 l_uint32 *tiffdata = (l_uint32 *)input.imageData;
2520 if (type == DS_DEVICE_OPENCL_DEVICE) {
2522 QueryPerformanceCounter(&time_funct_start);
2524 start = mach_absolute_time();
2526 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2529 OpenclDevice::gpuEnv = *env;
2530 int wpl = pixGetWpl(input.pix);
2531 OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height,
2534 QueryPerformanceCounter(&time_funct_end);
2535 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2537 stop = mach_absolute_time();
2538 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2540 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2541 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2546 QueryPerformanceCounter(&time_funct_start);
2548 start = mach_absolute_time();
2550 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2552 Pix *pix = pixCreate(input.width, input.height, 32);
2553 l_uint32 *pixData = pixGetData(pix);
2554 int wpl = pixGetWpl(pix);
2559 for (i = 0; i < input.height ; i++) {
2560 for (j = 0; j < input.width; j++) {
2561 l_uint32 tiffword = tiffdata[i * input.width + j];
2562 l_int32 rval = ((tiffword) & 0xff);
2563 l_int32 gval = (((tiffword) >> 8) & 0xff);
2564 l_int32 bval = (((tiffword) >> 16) & 0xff);
2565 l_uint32 value = (rval << 24) | (gval << 16) | (bval << 8);
2566 pixData[idx] = value;
2571 QueryPerformanceCounter(&time_funct_end);
2572 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2574 stop = mach_absolute_time();
2575 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2577 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2578 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2589 double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2592 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2593 QueryPerformanceFrequency(&freq);
2595 mach_timebase_info_data_t info = {0, 0};
2596 mach_timebase_info(&info);
2597 long long start, stop;
2599 timespec time_funct_start, time_funct_end;
2602 unsigned char pixelHi = (
unsigned char)255;
2606 int kHistogramSize = 256;
2607 int bytes_per_line = input.width*input.numChannels;
2608 int *histogramAllChannels =
new int[kHistogramSize*input.numChannels];
2611 if (type == DS_DEVICE_OPENCL_DEVICE) {
2613 QueryPerformanceCounter(&time_funct_start);
2615 start = mach_absolute_time();
2617 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2620 OpenclDevice::gpuEnv = *env;
2621 int wpl = pixGetWpl(input.pix);
2622 retVal = OpenclDevice::HistogramRectOCL(
2623 input.imageData, input.numChannels, bytes_per_line, top, left,
2624 input.width, input.height, kHistogramSize, histogramAllChannels);
2627 QueryPerformanceCounter(&time_funct_end);
2628 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2630 stop = mach_absolute_time();
2632 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2637 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2638 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2643 QueryPerformanceCounter(&time_funct_start);
2645 start = mach_absolute_time();
2647 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2649 for (
int ch = 0; ch < input.numChannels; ++ch) {
2651 input.width, input.height, histogram);
2654 QueryPerformanceCounter(&time_funct_end);
2655 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2657 stop = mach_absolute_time();
2658 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2660 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2661 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2667 delete[] histogramAllChannels;
2672 void ThresholdRectToPix_Native(
const unsigned char* imagedata,
2673 int bytes_per_pixel,
2675 const int* thresholds,
2676 const int* hi_values,
2680 int width = pixGetWidth(*pix);
2681 int height = pixGetHeight(*pix);
2683 *pix = pixCreate(width, height, 1);
2684 uinT32 *pixdata = pixGetData(*pix);
2685 int wpl = pixGetWpl(*pix);
2686 const unsigned char* srcdata = imagedata + top * bytes_per_line +
2687 left * bytes_per_pixel;
2688 for (
int y = 0; y < height; ++y) {
2689 const uinT8 *linedata = srcdata;
2690 uinT32 *pixline = pixdata + y * wpl;
2691 for (
int x = 0; x < width; ++x, linedata += bytes_per_pixel) {
2692 bool white_result =
true;
2693 for (
int ch = 0; ch < bytes_per_pixel; ++ch) {
2694 if (hi_values[ch] >= 0 &&
2695 (linedata[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
2696 white_result =
false;
2701 CLEAR_DATA_BIT(pixline, x);
2703 SET_DATA_BIT(pixline, x);
2705 srcdata += bytes_per_line;
2709 double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2713 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2714 QueryPerformanceFrequency(&freq);
2716 mach_timebase_info_data_t info = {0, 0};
2717 mach_timebase_info(&info);
2718 long long start, stop;
2720 timespec time_funct_start, time_funct_end;
2724 unsigned char pixelHi = (
unsigned char)255;
2725 int* thresholds =
new int[4];
2726 thresholds[0] = pixelHi/2;
2727 thresholds[1] = pixelHi/2;
2728 thresholds[2] = pixelHi/2;
2729 thresholds[3] = pixelHi/2;
2730 int *hi_values =
new int[4];
2731 thresholds[0] = pixelHi;
2732 thresholds[1] = pixelHi;
2733 thresholds[2] = pixelHi;
2734 thresholds[3] = pixelHi;
2738 int bytes_per_line = input.width*input.numChannels;
2741 if (type == DS_DEVICE_OPENCL_DEVICE) {
2743 QueryPerformanceCounter(&time_funct_start);
2745 start = mach_absolute_time();
2747 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2750 OpenclDevice::gpuEnv = *env;
2751 int wpl = pixGetWpl(input.pix);
2752 retVal = OpenclDevice::ThresholdRectToPixOCL(
2753 input.imageData, input.numChannels, bytes_per_line, thresholds,
2754 hi_values, &input.pix, input.height, input.width, top, left);
2757 QueryPerformanceCounter(&time_funct_end);
2758 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2760 stop = mach_absolute_time();
2762 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2769 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2770 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2778 QueryPerformanceCounter(&time_funct_start);
2780 start = mach_absolute_time();
2782 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2784 ThresholdRectToPix_Native( input.imageData, input.numChannels, bytes_per_line,
2785 thresholds, hi_values, &input.pix );
2788 QueryPerformanceCounter(&time_funct_end);
2789 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2791 stop = mach_absolute_time();
2792 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2794 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2795 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2800 delete[] thresholds;
2805 double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) {
2809 LARGE_INTEGER freq, time_funct_start, time_funct_end;
2810 QueryPerformanceFrequency(&freq);
2812 mach_timebase_info_data_t info = {0, 0};
2813 mach_timebase_info(&info);
2814 long long start, stop;
2816 timespec time_funct_start, time_funct_end;
2820 int resolution = 300;
2821 int wpl = pixGetWpl(input.pix);
2826 int closing_brick = max_line_width / 3;
2829 if (type == DS_DEVICE_OPENCL_DEVICE) {
2831 QueryPerformanceCounter(&time_funct_start);
2833 start = mach_absolute_time();
2835 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2837 Pix *src_pix = input.pix;
2838 OpenclDevice::gpuEnv = *env;
2839 OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix);
2840 Pix *pix_vline = NULL, *pix_hline = NULL, *pix_closed = NULL;
2841 OpenclDevice::pixGetLinesCL(
2842 NULL, input.pix, &pix_vline, &pix_hline, &pix_closed,
true,
2843 closing_brick, closing_brick, max_line_width, max_line_width,
2844 min_line_length, min_line_length);
2846 OpenclDevice::releaseMorphCLBuffers();
2849 QueryPerformanceCounter(&time_funct_end);
2850 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2852 stop = mach_absolute_time();
2853 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2855 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2856 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2860 QueryPerformanceCounter(&time_funct_start);
2862 start = mach_absolute_time();
2864 clock_gettime( CLOCK_MONOTONIC, &time_funct_start );
2868 Pix *src_pix = input.pix;
2870 pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
2872 pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width);
2873 Pix *pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
2874 pixDestroy(&pix_solid);
2875 Pix *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
2876 Pix *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
2877 pixDestroy(&pix_hollow);
2880 QueryPerformanceCounter(&time_funct_end);
2881 time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(
double)(freq.QuadPart);
2883 stop = mach_absolute_time();
2884 time = ((stop - start) * (
double)info.numer / info.denom) / 1.0E9;
2886 clock_gettime( CLOCK_MONOTONIC, &time_funct_end );
2887 time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0;
2903 ds_status serializeScore( ds_device* device,
void **serializedScore,
unsigned int* serializedScoreSize ) {
2904 *serializedScoreSize =
sizeof(TessDeviceScore);
2905 *serializedScore =
new unsigned char[*serializedScoreSize];
2906 memcpy(*serializedScore, device->score, *serializedScoreSize);
2911 ds_status deserializeScore( ds_device* device,
const unsigned char* serializedScore,
unsigned int serializedScoreSize ) {
2913 device->score =
new TessDeviceScore;
2914 memcpy(device->score, serializedScore, serializedScoreSize);
2918 ds_status releaseScore(
void *score) {
2919 delete (TessDeviceScore *)score;
2924 ds_status evaluateScoreForDevice( ds_device *device,
void *inputData) {
2927 printf(
"\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
2929 if (device->type == DS_DEVICE_OPENCL_DEVICE) {
2932 populateGPUEnvFromDevice( env, device->oclDeviceID);
2933 env->mnFileCount = 0;
2934 env->mnKernelCount = 0UL;
2936 OpenclDevice::gpuEnv = *env;
2937 OpenclDevice::CompileKernelFile(env,
"");
2940 TessScoreEvaluationInputData *input = (TessScoreEvaluationInputData *)inputData;
2943 double composeRGBPixelTime = composeRGBPixelMicroBench( env, *input, device->type );
2946 double histogramRectTime = histogramRectMicroBench( env, *input, device->type );
2949 double thresholdRectToPixTime = thresholdRectToPixMicroBench( env, *input, device->type );
2952 double getLineMasksMorphTime = getLineMasksMorphMicroBench( env, *input, device->type );
2957 float composeRGBPixelWeight = 1.2f;
2958 float histogramRectWeight = 2.4f;
2959 float thresholdRectToPixWeight = 4.5f;
2960 float getLineMasksMorphWeight = 5.0f;
2962 float weightedTime = composeRGBPixelWeight * composeRGBPixelTime +
2963 histogramRectWeight * histogramRectTime +
2964 thresholdRectToPixWeight * thresholdRectToPixTime +
2965 getLineMasksMorphWeight * getLineMasksMorphTime;
2966 device->score =
new TessDeviceScore;
2967 ((TessDeviceScore *)device->score)->time = weightedTime;
2969 printf(
"[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ?
"OpenCL" :
"Native" );
2970 printf(
"[DS]%25s: %f (w=%.1f)\n",
"composeRGBPixel", composeRGBPixelTime, composeRGBPixelWeight );
2971 printf(
"[DS]%25s: %f (w=%.1f)\n",
"HistogramRect", histogramRectTime, histogramRectWeight );
2972 printf(
"[DS]%25s: %f (w=%.1f)\n",
"ThresholdRectToPix", thresholdRectToPixTime, thresholdRectToPixWeight );
2973 printf(
"[DS]%25s: %f (w=%.1f)\n",
"getLineMasksMorph", getLineMasksMorphTime, getLineMasksMorphWeight );
2974 printf(
"[DS]%25s: %f\n",
"Score", ((TessDeviceScore *)device->score)->time );
2979 ds_device OpenclDevice::getDeviceSelection( ) {
2980 if (!deviceIsSelected) {
2983 if (1 == LoadOpencl()) {
2988 ds_profile *profile;
2989 status = initDSProfile(&profile,
"v0.1");
2992 const
char *fileName = "tesseract_opencl_profile_devices.dat";
2993 status = readProfileFromFile(profile, deserializeScore, fileName);
2994 if (status != DS_SUCCESS) {
2996 printf(
"[DS] Profile file not available (%s); performing profiling.\n",
3000 TessScoreEvaluationInputData input;
3001 populateTessScoreEvaluationInputData(&input);
3004 unsigned int numUpdates;
3005 status = profileDevices(profile, DS_EVALUATE_ALL,
3006 evaluateScoreForDevice, &input, &numUpdates);
3009 if (status == DS_SUCCESS) {
3010 status = writeProfileToFile(profile, serializeScore, fileName);
3012 if (status == DS_SUCCESS) {
3013 printf(
"[DS] Scores written to file (%s).\n", fileName);
3016 "[DS] Error saving scores to file (%s); scores not written to "
3022 "[DS] Unable to evaluate performance; scores not written to "
3027 printf("[DS] Profile read from file (%s).\n", fileName);
3032 float bestTime = FLT_MAX;
3033 int bestDeviceIdx = -1;
3034 for (
int d = 0; d < profile->numDevices; d++) {
3035 ds_device device = profile->devices[d];
3036 TessDeviceScore score = *(TessDeviceScore *)device.score;
3038 float time = score.time;
3039 printf(
"[DS] Device[%i] %i:%s score is %f\n", d + 1, device.type,
3040 device.oclDeviceName, time);
3041 if (time < bestTime) {
3046 printf(
"[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
3047 profile->devices[bestDeviceIdx].oclDeviceName,
3048 profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
3054 bool overridden =
false;
3055 char *overrideDeviceStr = getenv(
"TESSERACT_OPENCL_DEVICE");
3056 if (overrideDeviceStr != NULL) {
3057 int overrideDeviceIdx = atoi(overrideDeviceStr);
3058 if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) {
3060 "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, "
3062 overrideDeviceStr, overrideDeviceIdx);
3063 bestDeviceIdx = overrideDeviceIdx - 1;
3067 "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are "
3068 "valid devices).\n",
3069 overrideDeviceStr, profile->numDevices);
3074 printf(
"[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1,
3075 profile->devices[bestDeviceIdx].oclDeviceName,
3076 profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE
3080 selectedDevice = profile->devices[bestDeviceIdx];
3082 releaseDSProfile(profile, releaseScore);
3085 printf(
"[DS] OpenCL runtime not available.\n");
3086 selectedDevice.type = DS_DEVICE_NATIVE_CPU;
3087 selectedDevice.oclDeviceName =
"(null)";
3088 selectedDevice.score = NULL;
3089 selectedDevice.oclDeviceID = NULL;
3090 selectedDevice.oclDriverVersion = NULL;
3092 deviceIsSelected =
true;
3097 return selectedDevice;
3101 bool OpenclDevice::selectedDeviceIsOpenCL() {
3102 ds_device device = getDeviceSelection();
3103 return (device.type == DS_DEVICE_OPENCL_DEVICE);
3106 bool OpenclDevice::selectedDeviceIsNativeCPU() {
3107 ds_device device = getDeviceSelection();
3108 return (device.type == DS_DEVICE_NATIVE_CPU);
3122 #define SET_DATA_BYTE(pdata, n, val) \
3123 (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))
3125 Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix,
3126 float rwt,
float gwt,
float bwt) {
3130 if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL;
3132 if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) {
3139 float sum = rwt + gwt + bwt;
3146 pixGetDimensions(srcPix, &w, &h, NULL);
3148 unsigned int *srcData = pixGetData(srcPix);
3149 int srcWPL = pixGetWpl(srcPix);
3150 int srcSize = srcWPL * h *
sizeof(
unsigned int);
3153 if ((dstPix = pixCreate(w, h, 8)) == NULL)
return NULL;
3154 pixCopyResolution(dstPix, srcPix);
3155 unsigned int *dstData = pixGetData(dstPix);
3156 int dstWPL = pixGetWpl(dstPix);
3157 int dstWords = dstWPL * h;
3158 int dstSize = dstWords *
sizeof(
unsigned int);
3165 SetKernelEnv(&kEnv);
3169 clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
3170 srcSize, srcData, &clStatus);
3171 CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer");
3175 clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
3176 dstSize, dstData, &clStatus);
3177 CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer");
3180 int block_size = 256;
3181 int numWorkGroups = ((h * w + block_size - 1) / block_size);
3182 int numThreads = block_size * numWorkGroups;
3183 size_t local_work_size[] = {
static_cast<size_t>(block_size)};
3184 size_t global_work_size[] = {
static_cast<size_t>(numThreads)};
3189 clCreateKernel(kEnv.mpkProgram,
"kernel_RGBToGray", &clStatus);
3190 CHECK_OPENCL(clStatus,
"clCreateKernel kernel_RGBToGray");
3193 clStatus = clSetKernelArg(kEnv.mpkKernel, 0,
sizeof(cl_mem), &srcBuffer);
3194 CHECK_OPENCL(clStatus,
"clSetKernelArg srcBuffer");
3195 clStatus = clSetKernelArg(kEnv.mpkKernel, 1,
sizeof(cl_mem), &dstBuffer);
3196 CHECK_OPENCL(clStatus,
"clSetKernelArg dstBuffer");
3197 clStatus = clSetKernelArg(kEnv.mpkKernel, 2,
sizeof(
int), &srcWPL);
3198 CHECK_OPENCL(clStatus,
"clSetKernelArg srcWPL");
3199 clStatus = clSetKernelArg(kEnv.mpkKernel, 3,
sizeof(
int), &dstWPL);
3200 CHECK_OPENCL(clStatus,
"clSetKernelArg dstWPL");
3201 clStatus = clSetKernelArg(kEnv.mpkKernel, 4,
sizeof(
int), &h);
3202 CHECK_OPENCL(clStatus,
"clSetKernelArg height");
3203 clStatus = clSetKernelArg(kEnv.mpkKernel, 5,
sizeof(
int), &w);
3204 CHECK_OPENCL(clStatus,
"clSetKernelArg width");
3205 clStatus = clSetKernelArg(kEnv.mpkKernel, 6,
sizeof(
float), &rwt);
3206 CHECK_OPENCL(clStatus,
"clSetKernelArg rwt");
3207 clStatus = clSetKernelArg(kEnv.mpkKernel, 7,
sizeof(
float), &gwt);
3208 CHECK_OPENCL(clStatus,
"clSetKernelArg gwt");
3209 clStatus = clSetKernelArg(kEnv.mpkKernel, 8,
sizeof(
float), &bwt);
3210 CHECK_OPENCL(clStatus,
"clSetKernelArg bwt");
3214 clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1,
3215 NULL, global_work_size, local_work_size,
3217 CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray");
3218 clFinish(kEnv.mpkCmdQueue);
3223 clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0,
3224 dstSize, 0, NULL, NULL, &clStatus);
3225 CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer");
3226 clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL,
3231 Pix *cpuPix = pixCreate(w, h, 8);
3232 pixCopyResolution(cpuPix, srcPix);
3233 unsigned int *cpuData = pixGetData(cpuPix);
3234 int cpuWPL = pixGetWpl(cpuPix);
3235 unsigned int *cpuLine, *srcLine;
3237 for (i = 0, srcLine = srcData, cpuLine = cpuData; i < h; i++) {
3238 for (j = 0; j < w; j++) {
3239 unsigned int word = *(srcLine + j);
3240 int val = (l_int32)(rwt * ((word >> L_RED_SHIFT) & 0xff) +
3241 gwt * ((word >> L_GREEN_SHIFT) & 0xff) +
3242 bwt * ((word >> L_BLUE_SHIFT) & 0xff) + 0.5);
3243 SET_DATA_BYTE(cpuLine, j, val);
3250 printf(
"converted 32-bit -> 8-bit image\n");
3251 for (
int row = 0; row < h; row++) {
3252 for (
int col = 0; col < w; col++) {
3253 int idx = row*w + col;
3254 unsigned int srcVal = srcData[idx];
3255 unsigned char cpuVal = ((
unsigned char *)cpuData)[idx];
3256 unsigned char oclVal = ((
unsigned char *)dstData)[idx];
3258 printf(
"%4i,%4i: %u, %u, %u\n", row, col, srcVal, cpuVal, oclVal);
3265 clReleaseMemObject(srcBuffer);
3266 clReleaseMemObject(dstBuffer);
const int kMinLineLengthFraction
Denominator of resolution makes min pixels to demand line lengths to be.
#define PERF_COUNT_SUB(SUB)
void HistogramRect(Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
const int kThinLineFraction
Denominator of resolution makes max pixel width to allow thin lines.
#define PERF_COUNT_START(FUNCT_NAME)
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)