10 #ifndef _OCL_KERNEL_H_
11 #define _OCL_KERNEL_H_
12 #ifndef USE_EXTERNAL_KERNEL
13 #define KERNEL( ... )# __VA_ARGS__ "\n"
20 \n#ifdef KHR_DP_EXTENSION\n
21 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
22 \n#elif AMD_DP_EXTENSION\n
23 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
26 __kernel
void composeRGBPixel(__global uint *tiffdata,
int w,
int h,
int wpl, __global uint *output)
28 int i = get_global_id(1);
29 int j = get_global_id(0);
30 int tiffword,rval,gval,bval;
33 if ((i >= h) || (j >= w))
36 tiffword = tiffdata[i * w + j];
37 rval = ((tiffword) & 0xff);
38 gval = (((tiffword) >> 8) & 0xff);
39 bval = (((tiffword) >> 16) & 0xff);
40 output[i*wpl+j] = (rval << (8 * (
sizeof(uint) - 1 - 0))) | (gval << (8 * (
sizeof(uint) - 1 - 1))) | (bval << (8 * (
sizeof(uint) - 1 - 2)));
45 \n__kernel
void pixSubtract_inplace(__global
int *dword, __global
int *sword,
46 const int wpl,
const int h)
48 const unsigned int row = get_global_id(1);
49 const unsigned int col = get_global_id(0);
50 const unsigned int pos = row * wpl + col;
53 if (row >= h || col >= wpl)
56 *(dword + pos) &= ~(*(sword + pos));
61 \n__kernel
void pixSubtract(__global
int *dword, __global
int *sword,
62 const int wpl,
const int h, __global
int *outword)
64 const unsigned int row = get_global_id(1);
65 const unsigned int col = get_global_id(0);
66 const unsigned int pos = row * wpl + col;
69 if (row >= h || col >= wpl)
72 *(outword + pos) = *(dword + pos) & ~(*(sword + pos));
77 \n__kernel
void pixAND(__global
int *dword, __global
int *sword, __global
int *outword,
78 const int wpl,
const int h)
80 const unsigned int row = get_global_id(1);
81 const unsigned int col = get_global_id(0);
82 const unsigned int pos = row * wpl + col;
85 if (row >= h || col >= wpl)
88 *(outword + pos) = *(dword + pos) & (*(sword + pos));
93 \n__kernel
void pixOR(__global
int *dword, __global
int *sword, __global
int *outword,
94 const int wpl,
const int h)
96 const unsigned int row = get_global_id(1);
97 const unsigned int col = get_global_id(0);
98 const unsigned int pos = row * wpl + col;
101 if (row >= h || col >= wpl)
104 *(outword + pos) = *(dword + pos) | (*(sword + pos));
109 \n__kernel
void morphoDilateHor_5x5(__global
int *sword,__global
int *dword,
110 const int wpl,
const int h)
112 const unsigned int pos = get_global_id(0);
113 unsigned int prevword, nextword, currword,tempword;
114 unsigned int destword;
115 const int col = pos % wpl;
118 if (pos >= (wpl * h))
122 currword = *(sword + pos);
129 prevword = *(sword + pos - 1);
134 nextword = *(sword + pos + 1);
140 tempword = (prevword << (31)) | ((currword >> 1));
141 destword |= tempword;
143 tempword = (currword << 1) | (nextword >> (31));
144 destword |= tempword;
148 tempword = (prevword << (30)) | ((currword >> 2));
149 destword |= tempword;
151 tempword = (currword << 2) | (nextword >> (30));
152 destword |= tempword;
155 *(dword + pos) = destword;
161 \n__kernel
void morphoDilateVer_5x5(__global
int *sword,__global
int *dword,
162 const int wpl,
const int h)
164 const int col = get_global_id(0);
165 const int row = get_global_id(1);
166 const unsigned int pos = row * wpl + col;
167 unsigned int tempword;
168 unsigned int destword;
172 if (row >= h || col >= wpl)
175 destword = *(sword + pos);
178 i = (row - 2) < 0 ? row : (row - 2);
179 tempword = *(sword + i*wpl + col);
180 destword |= tempword;
183 i = (row - 1) < 0 ? row : (row - 1);
184 tempword = *(sword + i*wpl + col);
185 destword |= tempword;
188 i = (row >= (h - 1)) ? row : (row + 1);
189 tempword = *(sword + i*wpl + col);
190 destword |= tempword;
193 i = (row >= (h - 2)) ? row : (row + 2);
194 tempword = *(sword + i*wpl + col);
195 destword |= tempword;
197 *(dword + pos) = destword;
202 \n__kernel
void morphoDilateHor(__global
int *sword,__global
int *dword,
const int xp,
const int xn,
const int wpl,
const int h)
204 const int col = get_global_id(0);
205 const int row = get_global_id(1);
206 const unsigned int pos = row * wpl + col;
207 unsigned int parbitsxp, parbitsxn, nwords;
208 unsigned int destword, tempword, lastword, currword;
209 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
210 int i, j, siter, eiter;
213 if (pos >= (wpl*h) || (xn < 1 && xp < 1))
216 currword = *(sword + pos);
228 siter = (col - nwords);
229 eiter = (col + nwords);
235 firstword = *(sword + pos - 1);
238 if (col == (wpl - 1))
241 secondword = *(sword + pos + 1);
244 for (i = 1; i <= parbitsxp; i++)
247 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i));
249 destword |= tempword;
252 tempword = (currword << i) | (secondword >> (32 - i));
253 destword |= tempword;
261 destword |= firstword;
265 destword |= secondword;
268 *(dword + pos) = destword;
275 firstword = *(sword + row*wpl + siter);
280 lastword = *(sword + row*wpl + eiter);
282 for ( i = 1; i < nwords; i++)
288 secondword = *(sword + row*wpl + siter + i);
290 lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
292 firstword = secondword;
294 if ((siter + i + 1) < 0)
297 secondword = *(sword + row*wpl + siter + i + 1);
299 lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
302 if ((eiter - i) >= wpl)
305 firstword = *(sword + row*wpl + eiter - i);
307 rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
309 lastword = firstword;
310 if ((eiter - i - 1) >= wpl)
313 firstword = *(sword + row*wpl + eiter - i - 1);
315 rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
317 for (j = 1; j < 32; j++)
320 tempword = (lprevword << j) | (lnextword >> (32 - j));
321 destword |= tempword;
324 tempword = (rprevword << j) | (rnextword >> (32 - j));
325 destword |= tempword;
328 destword |= lprevword;
329 destword |= lnextword;
330 destword |= rprevword;
331 destword |= rnextword;
333 lastword = firstword;
334 firstword = secondword;
337 *(dword + pos) = destword;
342 \n__kernel
void morphoDilateHor_32word(__global
int *sword,__global
int *dword,
344 const int wpl,
const int h,
347 const int col = get_global_id(0);
348 const int row = get_global_id(1);
349 const unsigned int pos = row * wpl + col;
350 unsigned int prevword, nextword, currword,tempword;
351 unsigned int destword;
355 if (pos >= (wpl * h))
358 currword = *(sword + pos);
365 prevword = *(sword + pos - 1);
370 nextword = *(sword + pos + 1);
372 for (i = 1; i <= halfwidth; i++)
375 if (i == halfwidth && isEven)
381 tempword = (prevword << (32-i)) | ((currword >> i));
384 destword |= tempword;
387 tempword = (currword << i) | (nextword >> (32 - i));
389 destword |= tempword;
392 *(dword + pos) = destword;
397 \n__kernel
void morphoDilateVer(__global
int *sword,__global
int *dword,
399 const int wpl,
const int h,
402 const int col = get_global_id(0);
403 const int row = get_global_id(1);
404 const unsigned int pos = row * wpl + col;
405 unsigned int tempword;
406 unsigned int destword;
410 if (row >= h || col >= wpl)
413 destword = *(sword + pos);
416 siter = (row - yn) < 0 ? 0 : (row - yn);
417 eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
419 for (i = siter; i <= eiter; i++)
421 tempword = *(sword + i*wpl + col);
423 destword |= tempword;
426 *(dword + pos) = destword;
431 \n__kernel
void morphoErodeHor_5x5(__global
int *sword,__global
int *dword,
432 const int wpl,
const int h)
434 const unsigned int pos = get_global_id(0);
435 unsigned int prevword, nextword, currword,tempword;
436 unsigned int destword;
437 const int col = pos % wpl;
440 if (pos >= (wpl * h))
443 currword = *(sword + pos);
450 prevword = *(sword + pos - 1);
455 nextword = *(sword + pos + 1);
461 tempword = (prevword << (31)) | ((currword >> 1));
462 destword &= tempword;
464 tempword = (currword << 1) | (nextword >> (31));
465 destword &= tempword;
469 tempword = (prevword << (30)) | ((currword >> 2));
470 destword &= tempword;
472 tempword = (currword << 2) | (nextword >> (30));
473 destword &= tempword;
476 *(dword + pos) = destword;
482 \n__kernel
void morphoErodeVer_5x5(__global
int *sword,__global
int *dword,
483 const int wpl,
const int h,
484 const int fwmask,
const int lwmask)
486 const int col = get_global_id(0);
487 const int row = get_global_id(1);
488 const unsigned int pos = row * wpl + col;
489 unsigned int tempword;
490 unsigned int destword;
494 if (row >= h || col >= wpl)
497 destword = *(sword + pos);
499 if (row < 2 || row >= (h - 2))
508 tempword = *(sword + i*wpl + col);
509 destword &= tempword;
514 tempword = *(sword + i*wpl + col);
515 destword &= tempword;
520 tempword = *(sword + i*wpl + col);
521 destword &= tempword;
526 tempword = *(sword + i*wpl + col);
527 destword &= tempword;
533 if (col == (wpl - 1))
540 *(dword + pos) = destword;
545 \n__kernel
void morphoErodeHor(__global
int *sword,__global
int *dword,
const int xp,
const int xn,
const int wpl,
546 const int h,
const char isAsymmetric,
const int rwmask,
const int lwmask)
548 const int col = get_global_id(0);
549 const int row = get_global_id(1);
550 const unsigned int pos = row * wpl + col;
551 unsigned int parbitsxp, parbitsxn, nwords;
552 unsigned int destword, tempword, lastword, currword;
553 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
554 int i, j, siter, eiter;
557 if (pos >= (wpl*h) || (xn < 1 && xp < 1))
560 currword = *(sword + pos);
572 siter = (col - nwords);
573 eiter = (col + nwords);
577 firstword = 0xffffffff;
579 firstword = *(sword + pos - 1);
582 if (col == (wpl - 1))
583 secondword = 0xffffffff;
585 secondword = *(sword + pos + 1);
588 for (i = 1; i <= parbitsxp; i++)
591 tempword = (firstword << (32-i)) | ((currword >> i));
592 destword &= tempword;
595 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
598 destword &= tempword;
606 destword &= firstword;
610 destword &= secondword;
618 if (col == (wpl - 1))
622 *(dword + pos) = destword;
627 firstword = 0xffffffff;
629 firstword = *(sword + row*wpl + siter);
632 lastword = 0xffffffff;
634 lastword = *(sword + row*wpl + eiter);
637 for ( i = 1; i < nwords; i++)
641 secondword = 0xffffffff;
643 secondword = *(sword + row*wpl + siter + i);
645 lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
647 firstword = secondword;
649 if ((siter + i + 1) < 0)
650 secondword = 0xffffffff;
652 secondword = *(sword + row*wpl + siter + i + 1);
654 lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
657 if ((eiter - i) >= wpl)
658 firstword = 0xffffffff;
660 firstword = *(sword + row*wpl + eiter - i);
662 rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
664 lastword = firstword;
665 if ((eiter - i - 1) >= wpl)
666 firstword = 0xffffffff;
668 firstword = *(sword + row*wpl + eiter - i - 1);
670 rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
672 for (j = 0; j < 32; j++)
675 tempword = (lprevword << j) | (lnextword >> (32 - j));
676 destword &= tempword;
679 tempword = (rprevword << j) | (rnextword >> (32 - j));
680 destword &= tempword;
683 destword &= lprevword;
684 destword &= lnextword;
685 destword &= rprevword;
686 destword &= rnextword;
688 lastword = firstword;
689 firstword = secondword;
695 if (col < (nwords - 1))
697 else if (col == (nwords - 1))
699 else if (col > (wpl - nwords))
701 else if (col == (wpl - nwords))
705 *(dword + pos) = destword;
710 \n__kernel
void morphoErodeHor_32word(__global
int *sword,__global
int *dword,
711 const int halfwidth,
const int wpl,
712 const int h,
const char clearBoundPixH,
713 const int rwmask,
const int lwmask,
716 const int col = get_global_id(0);
717 const int row = get_global_id(1);
718 const unsigned int pos = row * wpl + col;
719 unsigned int prevword, nextword, currword,tempword, destword;
723 if (pos >= (wpl * h))
726 currword = *(sword + pos);
733 prevword = *(sword + pos - 1);
738 nextword = *(sword + pos + 1);
740 for (i = 1; i <= halfwidth; i++)
743 tempword = (prevword << (32-i)) | ((currword >> i));
745 destword &= tempword;
748 if (i == halfwidth && isEven)
750 tempword = 0xffffffff;
754 tempword = (currword << i) | (nextword >> (32 - i));
757 destword &= tempword;
766 else if (col == (wpl - 1))
772 *(dword + pos) = destword;
777 \n__kernel
void morphoErodeVer(__global
int *sword,__global
int *dword,
779 const int wpl,
const int h,
780 const char clearBoundPixV,
const int yn)
782 const int col = get_global_id(0);
783 const int row = get_global_id(1);
784 const unsigned int pos = row * wpl + col;
785 unsigned int tempword, destword;
789 if (row >= h || col >= wpl)
792 destword = *(sword + pos);
795 siter = (row - yp) < 0 ? 0 : (row - yp);
796 eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
798 for (i = siter; i <= eiter; i++)
800 tempword = *(sword + i*wpl + col);
802 destword &= tempword;
806 if (clearBoundPixV && ((row < yp) || ((h - row) <= yn)))
811 *(dword + pos) = destword;
823 \n#define HIST_REDUNDANCY 256\n
824 \n#define GROUP_SIZE 256\n
825 \n#define HIST_SIZE 256\n
826 \n#define NUM_CHANNELS 4\n
827 \n#define HR_UNROLL_SIZE 8 \n
828 \n#define HR_UNROLL_TYPE uchar8 \n
830 __attribute__((reqd_work_group_size(256, 1, 1)))
832 void kernel_HistogramRectAllChannels(
833 __global
const uchar8 *data,
835 __global uint *histBuffer) {
839 int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
842 for ( uint pc = get_global_id(0); pc < numPixels*NUM_CHANNELS/HR_UNROLL_SIZE; pc += get_global_size(0) ) {
845 atomic_inc( &histBuffer[ 0*HIST_SIZE*HIST_REDUNDANCY + pixels.s0*HIST_REDUNDANCY + threadOffset ]);
846 atomic_inc( &histBuffer[ 0*HIST_SIZE*HIST_REDUNDANCY + pixels.s4*HIST_REDUNDANCY + threadOffset ]);
847 atomic_inc( &histBuffer[ 1*HIST_SIZE*HIST_REDUNDANCY + pixels.s1*HIST_REDUNDANCY + threadOffset ]);
848 atomic_inc( &histBuffer[ 1*HIST_SIZE*HIST_REDUNDANCY + pixels.s5*HIST_REDUNDANCY + threadOffset ]);
849 atomic_inc( &histBuffer[ 2*HIST_SIZE*HIST_REDUNDANCY + pixels.s2*HIST_REDUNDANCY + threadOffset ]);
850 atomic_inc( &histBuffer[ 2*HIST_SIZE*HIST_REDUNDANCY + pixels.s6*HIST_REDUNDANCY + threadOffset ]);
851 atomic_inc( &histBuffer[ 3*HIST_SIZE*HIST_REDUNDANCY + pixels.s3*HIST_REDUNDANCY + threadOffset ]);
852 atomic_inc( &histBuffer[ 3*HIST_SIZE*HIST_REDUNDANCY + pixels.s7*HIST_REDUNDANCY + threadOffset ]);
859 __attribute__((reqd_work_group_size(256, 1, 1)))
861 void kernel_HistogramRectOneChannel(
862 __global
const uchar8 *data,
864 __global uint *histBuffer) {
868 int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
871 for ( uint pc = get_global_id(0); pc < numPixels/HR_UNROLL_SIZE; pc += get_global_size(0) ) {
874 atomic_inc( &histBuffer[ pixels.s0*HIST_REDUNDANCY + threadOffset ]);
875 atomic_inc( &histBuffer[ pixels.s1*HIST_REDUNDANCY + threadOffset ]);
876 atomic_inc( &histBuffer[ pixels.s2*HIST_REDUNDANCY + threadOffset ]);
877 atomic_inc( &histBuffer[ pixels.s3*HIST_REDUNDANCY + threadOffset ]);
878 atomic_inc( &histBuffer[ pixels.s4*HIST_REDUNDANCY + threadOffset ]);
879 atomic_inc( &histBuffer[ pixels.s5*HIST_REDUNDANCY + threadOffset ]);
880 atomic_inc( &histBuffer[ pixels.s6*HIST_REDUNDANCY + threadOffset ]);
881 atomic_inc( &histBuffer[ pixels.s7*HIST_REDUNDANCY + threadOffset ]);
889 \n __attribute__((reqd_work_group_size(256, 1, 1)))
891 \n
void kernel_HistogramRectAllChannels_Grey(
892 \n __global
const uchar* data,
894 \n __global uint *histBuffer) {
899 \n
size_t groupId = get_group_id(0);
900 \n
size_t localId = get_local_id(0);
901 \n
size_t globalId = get_global_id(0);
902 \n uint numThreads = get_global_size(0);
905 \n
for ( uint pc = get_global_id(0); pc < numPixels; pc += get_global_size(0) ) {
906 \n uchar value = data[ pc ];
907 \n
int idx = value * get_global_size(0) + get_global_id(0);
908 \n histBuffer[ idx ]++;
920 __attribute__((reqd_work_group_size(256, 1, 1)))
922 void kernel_HistogramRectAllChannelsReduction(
924 __global uint *histBuffer,
925 __global
int* histResult) {
928 int channel = get_group_id(0)/HIST_SIZE;
929 int bin = get_group_id(0)%HIST_SIZE;
933 for ( uint i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
934 value += histBuffer[ channel*HIST_SIZE*HIST_REDUNDANCY+bin*HIST_REDUNDANCY+i];
938 __local
int localHist[GROUP_SIZE];
939 localHist[get_local_id(0)] = value;
940 barrier(CLK_LOCAL_MEM_FENCE);
941 for (
int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
942 if (get_local_id(0) < stride) {
943 value = localHist[ get_local_id(0)+stride];
945 barrier(CLK_LOCAL_MEM_FENCE);
946 if (get_local_id(0) < stride) {
947 localHist[ get_local_id(0)] += value;
949 barrier(CLK_LOCAL_MEM_FENCE);
953 if (get_local_id(0) == 0) {
954 histResult[get_group_id(0)] = localHist[0];
962 __attribute__((reqd_work_group_size(256, 1, 1)))
964 void kernel_HistogramRectOneChannelReduction(
966 __global uint *histBuffer,
967 __global
int* histResult) {
971 int bin = get_group_id(0)%HIST_SIZE;
975 for (
int i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
976 value += histBuffer[ bin*HIST_REDUNDANCY+i];
980 __local
int localHist[GROUP_SIZE];
981 localHist[get_local_id(0)] = value;
982 barrier(CLK_LOCAL_MEM_FENCE);
983 for (
int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
984 if (get_local_id(0) < stride) {
985 value = localHist[ get_local_id(0)+stride];
987 barrier(CLK_LOCAL_MEM_FENCE);
988 if (get_local_id(0) < stride) {
989 localHist[ get_local_id(0)] += value;
991 barrier(CLK_LOCAL_MEM_FENCE);
995 if (get_local_id(0) == 0) {
996 histResult[get_group_id(0)] = localHist[0];
1005 \n __attribute__((reqd_work_group_size(256, 1, 1)))
1007 \n
void kernel_HistogramRectAllChannelsReduction_Grey(
1009 \n __global uint *histBuffer,
1010 \n __global uint* histResult) {
1015 \n
size_t groupId = get_group_id(0);
1016 \n
size_t localId = get_local_id(0);
1017 \n
size_t globalId = get_global_id(0);
1018 \n uint numThreads = get_global_size(0);
1019 \n
unsigned int hist = 0;
1022 \n
for ( uint p = 0; p < n; p+=GROUP_SIZE) {
1023 \n hist += histBuffer[ (get_group_id(0)*n + p)];
1028 \n __local
unsigned int localHist[GROUP_SIZE];
1030 \n localHist[localId] = hist;
1031 \n barrier(CLK_LOCAL_MEM_FENCE);
1033 \n
for (
int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
1034 \n
if (localId < stride) {
1035 \n hist = localHist[ (localId+stride)];
1037 \n barrier(CLK_LOCAL_MEM_FENCE);
1038 \n
if (localId < stride) {
1039 \n localHist[ localId] += hist;
1041 \n barrier(CLK_LOCAL_MEM_FENCE);
1044 \n
if (localId == 0)
1045 \n histResult[get_group_id(0)] = localHist[0];
1055 \n#define CHAR_VEC_WIDTH 4 \n
1056 \n#define PIXELS_PER_WORD 32 \n
1057 \n#define PIXELS_PER_BURST 8 \n
1058 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
1060 uchar s[PIXELS_PER_BURST*NUM_CHANNELS];
1061 uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH];
1064 __attribute__((reqd_work_group_size(256, 1, 1)))
1066 void kernel_ThresholdRectToPix(
1067 __global const uchar4 *imageData,
1071 __global
int *thresholds,
1072 __global
int *hi_values,
1073 __global
int *pix) {
1076 int pThresholds[NUM_CHANNELS];
1077 int pHi_Values[NUM_CHANNELS];
1078 for (
int i = 0; i < NUM_CHANNELS; i++) {
1079 pThresholds[i] = thresholds[i];
1080 pHi_Values[i] = hi_values[i];
1084 for ( uint w = get_global_id(0); w < wpl*height; w += get_global_size(0) ) {
1085 unsigned int word = 0;
1087 for (
int b = 0; b < BURSTS_PER_WORD; b++) {
1090 int offset = (w / wpl) * width;
1091 offset += (w % wpl) * PIXELS_PER_WORD;
1092 offset += b * PIXELS_PER_BURST;
1094 for (
int i = 0; i < PIXELS_PER_BURST; ++i)
1095 pixels.v[i] = imageData[offset + i];
1098 for (
int p = 0; p < PIXELS_PER_BURST; p++) {
1099 for (
int c = 0; c < NUM_CHANNELS; c++) {
1100 unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c];
1101 if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
1102 const uint kTopBit = 0x80000000;
1103 word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1112 \n#define CHAR_VEC_WIDTH 8 \n
1113 \n#define PIXELS_PER_WORD 32 \n
1114 \n#define PIXELS_PER_BURST 8 \n
1115 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
1117 uchar s[PIXELS_PER_BURST*1];
1118 uchar8 v[(PIXELS_PER_BURST*1)/CHAR_VEC_WIDTH];
1121 __attribute__((reqd_work_group_size(256, 1, 1)))
1123 void kernel_ThresholdRectToPix_OneChan(
1124 __global const uchar8 *imageData,
1128 __global
int *thresholds,
1129 __global
int *hi_values,
1130 __global
int *pix) {
1135 for (
int i = 0; i < 1; i++) {
1136 pThresholds[i] = thresholds[i];
1137 pHi_Values[i] = hi_values[i];
1141 for ( uint w = get_global_id(0); w < wpl*height; w += get_global_size(0) ) {
1142 unsigned int word = 0;
1145 for (
int b = 0; b < BURSTS_PER_WORD; b++) {
1150 pixels.v[0] = imageData[
1156 for (
int p = 0; p < PIXELS_PER_BURST; p++) {
1161 \n#ifdef __ENDIAN_LITTLE__\n
1166 unsigned char pixChan = pixels.s[idx];
1167 if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
1168 const uint kTopBit = 0x80000000;
1169 word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1179 \n#define RED_SHIFT 24\n
1180 \n#define GREEN_SHIFT 16\n
1181 \n#define BLUE_SHIFT 8\n
1182 \n#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))\n
1184 \n__attribute__((reqd_work_group_size(256, 1, 1)))\n
1186 \nvoid kernel_RGBToGray(
1187 __global
const unsigned int *srcData,
1188 __global
unsigned char *dstData,
1198 int pixelIdx = get_global_id(0);
1199 if (pixelIdx >= height*width)
return;
1201 unsigned int word = srcData[pixelIdx];
1202 int output = (rwt * ((word >> RED_SHIFT) & 0xff) +
1203 gwt * ((word >> GREEN_SHIFT) & 0xff) +
1204 bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5f);
1206 dstData[pixelIdx] = output;
1212 #endif // USE_EXTERNAL_KERNEL
1213 #endif //_OCL_KERNEL_H_