tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
oclkernels.h
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 // http://www.apache.org/licenses/LICENSE-2.0
5 // Unless required by applicable law or agreed to in writing, software
6 // distributed under the License is distributed on an "AS IS" BASIS,
7 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8 // See the License for the specific language governing permissions and
9 // limitations under the License.
10 #ifndef _OCL_KERNEL_H_
11 #define _OCL_KERNEL_H_
12 #ifndef USE_EXTERNAL_KERNEL
13 #define KERNEL( ... )# __VA_ARGS__ "\n"
14 // Double precision is a default of spreadsheets
15 // cl_khr_fp64: Khronos extension
16 // cl_amd_fp64: AMD extension
17 // use build option outside to define fp_t
19 const char *kernel_src = KERNEL(
20 \n#ifdef KHR_DP_EXTENSION\n
21 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
22 \n#elif AMD_DP_EXTENSION\n
23 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
24 \n#else\n
25 \n#endif\n
26 __kernel void composeRGBPixel(__global uint *tiffdata, int w, int h,int wpl, __global uint *output)
27 {
28  int i = get_global_id(1);
29  int j = get_global_id(0);
30  int tiffword,rval,gval,bval;
31 
32  //Ignore the excess
33  if ((i >= h) || (j >= w))
34  return;
35 
36  tiffword = tiffdata[i * w + j];
37  rval = ((tiffword) & 0xff);
38  gval = (((tiffword) >> 8) & 0xff);
39  bval = (((tiffword) >> 16) & 0xff);
40  output[i*wpl+j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
41 }
42 )
43 
44 KERNEL(
45 \n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword,
46  const int wpl, const int h)
47 {
48  const unsigned int row = get_global_id(1);
49  const unsigned int col = get_global_id(0);
50  const unsigned int pos = row * wpl + col;
51 
52  //Ignore the execss
53  if (row >= h || col >= wpl)
54  return;
55 
56  *(dword + pos) &= ~(*(sword + pos));
57 }\n
58 )
59 
60 KERNEL(
61 \n__kernel void pixSubtract(__global int *dword, __global int *sword,
62  const int wpl, const int h, __global int *outword)
63 {
64  const unsigned int row = get_global_id(1);
65  const unsigned int col = get_global_id(0);
66  const unsigned int pos = row * wpl + col;
67 
68  //Ignore the execss
69  if (row >= h || col >= wpl)
70  return;
71 
72  *(outword + pos) = *(dword + pos) & ~(*(sword + pos));
73 }\n
74 )
75 
76 KERNEL(
77 \n__kernel void pixAND(__global int *dword, __global int *sword, __global int *outword,
78  const int wpl, const int h)
79 {
80  const unsigned int row = get_global_id(1);
81  const unsigned int col = get_global_id(0);
82  const unsigned int pos = row * wpl + col;
83 
84  //Ignore the execss
85  if (row >= h || col >= wpl)
86  return;
87 
88  *(outword + pos) = *(dword + pos) & (*(sword + pos));
89 }\n
90 )
91 
92 KERNEL(
93 \n__kernel void pixOR(__global int *dword, __global int *sword, __global int *outword,
94  const int wpl, const int h)
95 {
96  const unsigned int row = get_global_id(1);
97  const unsigned int col = get_global_id(0);
98  const unsigned int pos = row * wpl + col;
99 
100  //Ignore the execss
101  if (row >= h || col >= wpl)
102  return;
103 
104  *(outword + pos) = *(dword + pos) | (*(sword + pos));
105 }\n
106 )
107 
108 KERNEL(
109 \n__kernel void morphoDilateHor_5x5(__global int *sword,__global int *dword,
110  const int wpl, const int h)
111 {
112  const unsigned int pos = get_global_id(0);
113  unsigned int prevword, nextword, currword,tempword;
114  unsigned int destword;
115  const int col = pos % wpl;
116 
117  //Ignore the execss
118  if (pos >= (wpl * h))
119  return;
120 
121 
122  currword = *(sword + pos);
123  destword = currword;
124 
125  //Handle boundary conditions
126  if(col==0)
127  prevword=0;
128  else
129  prevword = *(sword + pos - 1);
130 
131  if(col==(wpl - 1))
132  nextword=0;
133  else
134  nextword = *(sword + pos + 1);
135 
136  //Loop unrolled
137 
138  //1 bit to left and 1 bit to right
139  //Get the max value on LHS of every pixel
140  tempword = (prevword << (31)) | ((currword >> 1));
141  destword |= tempword;
142  //Get max value on RHS of every pixel
143  tempword = (currword << 1) | (nextword >> (31));
144  destword |= tempword;
145 
146  //2 bit to left and 2 bit to right
147  //Get the max value on LHS of every pixel
148  tempword = (prevword << (30)) | ((currword >> 2));
149  destword |= tempword;
150  //Get max value on RHS of every pixel
151  tempword = (currword << 2) | (nextword >> (30));
152  destword |= tempword;
153 
154 
155  *(dword + pos) = destword;
156 
157 }\n
158 )
159 
160 KERNEL(
161 \n__kernel void morphoDilateVer_5x5(__global int *sword,__global int *dword,
162  const int wpl, const int h)
163 {
164  const int col = get_global_id(0);
165  const int row = get_global_id(1);
166  const unsigned int pos = row * wpl + col;
167  unsigned int tempword;
168  unsigned int destword;
169  int i;
170 
171  //Ignore the execss
172  if (row >= h || col >= wpl)
173  return;
174 
175  destword = *(sword + pos);
176 
177  //2 words above
178  i = (row - 2) < 0 ? row : (row - 2);
179  tempword = *(sword + i*wpl + col);
180  destword |= tempword;
181 
182  //1 word above
183  i = (row - 1) < 0 ? row : (row - 1);
184  tempword = *(sword + i*wpl + col);
185  destword |= tempword;
186 
187  //1 word below
188  i = (row >= (h - 1)) ? row : (row + 1);
189  tempword = *(sword + i*wpl + col);
190  destword |= tempword;
191 
192  //2 words below
193  i = (row >= (h - 2)) ? row : (row + 2);
194  tempword = *(sword + i*wpl + col);
195  destword |= tempword;
196 
197  *(dword + pos) = destword;
198 }\n
199 )
200 
201 KERNEL(
202 \n__kernel void morphoDilateHor(__global int *sword,__global int *dword,const int xp, const int xn, const int wpl, const int h)
203 {
204  const int col = get_global_id(0);
205  const int row = get_global_id(1);
206  const unsigned int pos = row * wpl + col;
207  unsigned int parbitsxp, parbitsxn, nwords;
208  unsigned int destword, tempword, lastword, currword;
209  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
210  int i, j, siter, eiter;
211 
212  //Ignore the execss
213  if (pos >= (wpl*h) || (xn < 1 && xp < 1))
214  return;
215 
216  currword = *(sword + pos);
217  destword = currword;
218 
219  parbitsxp = xp & 31;
220  parbitsxn = xn & 31;
221  nwords = xp >> 5;
222 
223  if (parbitsxp > 0)
224  nwords += 1;
225  else
226  parbitsxp = 31;
227 
228  siter = (col - nwords);
229  eiter = (col + nwords);
230 
231  //Get prev word
232  if (col==0)
233  firstword = 0x0;
234  else
235  firstword = *(sword + pos - 1);
236 
237  //Get next word
238  if (col == (wpl - 1))
239  secondword = 0x0;
240  else
241  secondword = *(sword + pos + 1);
242 
243  //Last partial bits on either side
244  for (i = 1; i <= parbitsxp; i++)
245  {
246  //Get the max value on LHS of every pixel
247  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i));
248 
249  destword |= tempword;
250 
251  //Get max value on RHS of every pixel
252  tempword = (currword << i) | (secondword >> (32 - i));
253  destword |= tempword;
254  }
255 
256  //Return if halfwidth <= 1 word
257  if (nwords == 1)
258  {
259  if (xn == 32)
260  {
261  destword |= firstword;
262  }
263  if (xp == 32)
264  {
265  destword |= secondword;
266  }
267 
268  *(dword + pos) = destword;
269  return;
270  }
271 
272  if (siter < 0)
273  firstword = 0x0;
274  else
275  firstword = *(sword + row*wpl + siter);
276 
277  if (eiter >= wpl)
278  lastword = 0x0;
279  else
280  lastword = *(sword + row*wpl + eiter);
281 
282  for ( i = 1; i < nwords; i++)
283  {
284  //Gets LHS words
285  if ((siter + i) < 0)
286  secondword = 0x0;
287  else
288  secondword = *(sword + row*wpl + siter + i);
289 
290  lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
291 
292  firstword = secondword;
293 
294  if ((siter + i + 1) < 0)
295  secondword = 0x0;
296  else
297  secondword = *(sword + row*wpl + siter + i + 1);
298 
299  lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
300 
301  //Gets RHS words
302  if ((eiter - i) >= wpl)
303  firstword = 0x0;
304  else
305  firstword = *(sword + row*wpl + eiter - i);
306 
307  rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
308 
309  lastword = firstword;
310  if ((eiter - i - 1) >= wpl)
311  firstword = 0x0;
312  else
313  firstword = *(sword + row*wpl + eiter - i - 1);
314 
315  rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
316 
317  for (j = 1; j < 32; j++)
318  {
319  //OR LHS full words
320  tempword = (lprevword << j) | (lnextword >> (32 - j));
321  destword |= tempword;
322 
323  //OR RHS full words
324  tempword = (rprevword << j) | (rnextword >> (32 - j));
325  destword |= tempword;
326  }
327 
328  destword |= lprevword;
329  destword |= lnextword;
330  destword |= rprevword;
331  destword |= rnextword;
332 
333  lastword = firstword;
334  firstword = secondword;
335  }
336 
337  *(dword + pos) = destword;
338 }\n
339 )
340 
341 KERNEL(
342 \n__kernel void morphoDilateHor_32word(__global int *sword,__global int *dword,
343  const int halfwidth,
344  const int wpl, const int h,
345  const char isEven)
346 {
347  const int col = get_global_id(0);
348  const int row = get_global_id(1);
349  const unsigned int pos = row * wpl + col;
350  unsigned int prevword, nextword, currword,tempword;
351  unsigned int destword;
352  int i;
353 
354  //Ignore the execss
355  if (pos >= (wpl * h))
356  return;
357 
358  currword = *(sword + pos);
359  destword = currword;
360 
361  //Handle boundary conditions
362  if(col==0)
363  prevword=0;
364  else
365  prevword = *(sword + pos - 1);
366 
367  if(col==(wpl - 1))
368  nextword=0;
369  else
370  nextword = *(sword + pos + 1);
371 
372  for (i = 1; i <= halfwidth; i++)
373  {
374  //Get the max value on LHS of every pixel
375  if (i == halfwidth && isEven)
376  {
377  tempword = 0x0;
378  }
379  else
380  {
381  tempword = (prevword << (32-i)) | ((currword >> i));
382  }
383 
384  destword |= tempword;
385 
386  //Get max value on RHS of every pixel
387  tempword = (currword << i) | (nextword >> (32 - i));
388 
389  destword |= tempword;
390  }
391 
392  *(dword + pos) = destword;
393 }\n
394 )
395 
396 KERNEL(
397 \n__kernel void morphoDilateVer(__global int *sword,__global int *dword,
398  const int yp,
399  const int wpl, const int h,
400  const int yn)
401 {
402  const int col = get_global_id(0);
403  const int row = get_global_id(1);
404  const unsigned int pos = row * wpl + col;
405  unsigned int tempword;
406  unsigned int destword;
407  int i, siter, eiter;
408 
409  //Ignore the execss
410  if (row >= h || col >= wpl)
411  return;
412 
413  destword = *(sword + pos);
414 
415  //Set start position and end position considering the boundary conditions
416  siter = (row - yn) < 0 ? 0 : (row - yn);
417  eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
418 
419  for (i = siter; i <= eiter; i++)
420  {
421  tempword = *(sword + i*wpl + col);
422 
423  destword |= tempword;
424  }
425 
426  *(dword + pos) = destword;
427 }\n
428 )
429 
430 KERNEL(
431 \n__kernel void morphoErodeHor_5x5(__global int *sword,__global int *dword,
432  const int wpl, const int h)
433 {
434  const unsigned int pos = get_global_id(0);
435  unsigned int prevword, nextword, currword,tempword;
436  unsigned int destword;
437  const int col = pos % wpl;
438 
439  //Ignore the execss
440  if (pos >= (wpl * h))
441  return;
442 
443  currword = *(sword + pos);
444  destword = currword;
445 
446  //Handle boundary conditions
447  if(col==0)
448  prevword=0xffffffff;
449  else
450  prevword = *(sword + pos - 1);
451 
452  if(col==(wpl - 1))
453  nextword=0xffffffff;
454  else
455  nextword = *(sword + pos + 1);
456 
457  //Loop unrolled
458 
459  //1 bit to left and 1 bit to right
460  //Get the min value on LHS of every pixel
461  tempword = (prevword << (31)) | ((currword >> 1));
462  destword &= tempword;
463  //Get min value on RHS of every pixel
464  tempword = (currword << 1) | (nextword >> (31));
465  destword &= tempword;
466 
467  //2 bit to left and 2 bit to right
468  //Get the min value on LHS of every pixel
469  tempword = (prevword << (30)) | ((currword >> 2));
470  destword &= tempword;
471  //Get min value on RHS of every pixel
472  tempword = (currword << 2) | (nextword >> (30));
473  destword &= tempword;
474 
475 
476  *(dword + pos) = destword;
477 
478 }\n
479 )
480 
481 KERNEL(
482 \n__kernel void morphoErodeVer_5x5(__global int *sword,__global int *dword,
483  const int wpl, const int h,
484  const int fwmask, const int lwmask)
485 {
486  const int col = get_global_id(0);
487  const int row = get_global_id(1);
488  const unsigned int pos = row * wpl + col;
489  unsigned int tempword;
490  unsigned int destword;
491  int i;
492 
493  //Ignore the execss
494  if (row >= h || col >= wpl)
495  return;
496 
497  destword = *(sword + pos);
498 
499  if (row < 2 || row >= (h - 2))
500  {
501  destword = 0x0;
502  }
503  else
504  {
505  //2 words above
506  //i = (row - 2) < 0 ? row : (row - 2);
507  i = (row - 2);
508  tempword = *(sword + i*wpl + col);
509  destword &= tempword;
510 
511  //1 word above
512  //i = (row - 1) < 0 ? row : (row - 1);
513  i = (row - 1);
514  tempword = *(sword + i*wpl + col);
515  destword &= tempword;
516 
517  //1 word below
518  //i = (row >= (h - 1)) ? row : (row + 1);
519  i = (row + 1);
520  tempword = *(sword + i*wpl + col);
521  destword &= tempword;
522 
523  //2 words below
524  //i = (row >= (h - 2)) ? row : (row + 2);
525  i = (row + 2);
526  tempword = *(sword + i*wpl + col);
527  destword &= tempword;
528 
529  if (col == 0)
530  {
531  destword &= fwmask;
532  }
533  if (col == (wpl - 1))
534  {
535  destword &= lwmask;
536  }
537  }
538 
539 
540  *(dword + pos) = destword;
541 }\n
542 )
543 
544 KERNEL(
545 \n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl,
546  const int h, const char isAsymmetric, const int rwmask, const int lwmask)
547 {
548  const int col = get_global_id(0);
549  const int row = get_global_id(1);
550  const unsigned int pos = row * wpl + col;
551  unsigned int parbitsxp, parbitsxn, nwords;
552  unsigned int destword, tempword, lastword, currword;
553  unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
554  int i, j, siter, eiter;
555 
556  //Ignore the execss
557  if (pos >= (wpl*h) || (xn < 1 && xp < 1))
558  return;
559 
560  currword = *(sword + pos);
561  destword = currword;
562 
563  parbitsxp = xp & 31;
564  parbitsxn = xn & 31;
565  nwords = xp >> 5;
566 
567  if (parbitsxp > 0)
568  nwords += 1;
569  else
570  parbitsxp = 31;
571 
572  siter = (col - nwords);
573  eiter = (col + nwords);
574 
575  //Get prev word
576  if (col==0)
577  firstword = 0xffffffff;
578  else
579  firstword = *(sword + pos - 1);
580 
581  //Get next word
582  if (col == (wpl - 1))
583  secondword = 0xffffffff;
584  else
585  secondword = *(sword + pos + 1);
586 
587  //Last partial bits on either side
588  for (i = 1; i <= parbitsxp; i++)
589  {
590  //Get the max value on LHS of every pixel
591  tempword = (firstword << (32-i)) | ((currword >> i));
592  destword &= tempword;
593 
594  //Get max value on RHS of every pixel
595  tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
596 
597  //tempword = (currword << i) | (secondword >> (32 - i));
598  destword &= tempword;
599  }
600 
601  //Return if halfwidth <= 1 word
602  if (nwords == 1)
603  {
604  if (xp == 32)
605  {
606  destword &= firstword;
607  }
608  if (xn == 32)
609  {
610  destword &= secondword;
611  }
612 
613  //Clear boundary pixels
614  if (isAsymmetric)
615  {
616  if (col == 0)
617  destword &= rwmask;
618  if (col == (wpl - 1))
619  destword &= lwmask;
620  }
621 
622  *(dword + pos) = destword;
623  return;
624  }
625 
626  if (siter < 0)
627  firstword = 0xffffffff;
628  else
629  firstword = *(sword + row*wpl + siter);
630 
631  if (eiter >= wpl)
632  lastword = 0xffffffff;
633  else
634  lastword = *(sword + row*wpl + eiter);
635 
636 
637  for ( i = 1; i < nwords; i++)
638  {
639  //Gets LHS words
640  if ((siter + i) < 0)
641  secondword = 0xffffffff;
642  else
643  secondword = *(sword + row*wpl + siter + i);
644 
645  lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
646 
647  firstword = secondword;
648 
649  if ((siter + i + 1) < 0)
650  secondword = 0xffffffff;
651  else
652  secondword = *(sword + row*wpl + siter + i + 1);
653 
654  lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
655 
656  //Gets RHS words
657  if ((eiter - i) >= wpl)
658  firstword = 0xffffffff;
659  else
660  firstword = *(sword + row*wpl + eiter - i);
661 
662  rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
663 
664  lastword = firstword;
665  if ((eiter - i - 1) >= wpl)
666  firstword = 0xffffffff;
667  else
668  firstword = *(sword + row*wpl + eiter - i - 1);
669 
670  rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
671 
672  for (j = 0; j < 32; j++)
673  {
674  //OR LHS full words
675  tempword = (lprevword << j) | (lnextword >> (32 - j));
676  destword &= tempword;
677 
678  //OR RHS full words
679  tempword = (rprevword << j) | (rnextword >> (32 - j));
680  destword &= tempword;
681  }
682 
683  destword &= lprevword;
684  destword &= lnextword;
685  destword &= rprevword;
686  destword &= rnextword;
687 
688  lastword = firstword;
689  firstword = secondword;
690  }
691 
692  if (isAsymmetric)
693  {
694  //Clear boundary pixels
695  if (col < (nwords - 1))
696  destword = 0x0;
697  else if (col == (nwords - 1))
698  destword &= rwmask;
699  else if (col > (wpl - nwords))
700  destword = 0x0;
701  else if (col == (wpl - nwords))
702  destword &= lwmask;
703  }
704 
705  *(dword + pos) = destword;
706 }\n
707 )
708 
709 KERNEL(
710 \n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword,
711  const int halfwidth, const int wpl,
712  const int h, const char clearBoundPixH,
713  const int rwmask, const int lwmask,
714  const char isEven)
715 {
716  const int col = get_global_id(0);
717  const int row = get_global_id(1);
718  const unsigned int pos = row * wpl + col;
719  unsigned int prevword, nextword, currword,tempword, destword;
720  int i;
721 
722  //Ignore the execss
723  if (pos >= (wpl * h))
724  return;
725 
726  currword = *(sword + pos);
727  destword = currword;
728 
729  //Handle boundary conditions
730  if(col==0)
731  prevword=0xffffffff;
732  else
733  prevword = *(sword + pos - 1);
734 
735  if(col==(wpl - 1))
736  nextword=0xffffffff;
737  else
738  nextword = *(sword + pos + 1);
739 
740  for (i = 1; i <= halfwidth; i++)
741  {
742  //Get the min value on LHS of every pixel
743  tempword = (prevword << (32-i)) | ((currword >> i));
744 
745  destword &= tempword;
746 
747  //Get min value on RHS of every pixel
748  if (i == halfwidth && isEven)
749  {
750  tempword = 0xffffffff;
751  }
752  else
753  {
754  tempword = (currword << i) | (nextword >> (32 - i));
755  }
756 
757  destword &= tempword;
758  }
759 
760  if (clearBoundPixH)
761  {
762  if (col == 0)
763  {
764  destword &= rwmask;
765  }
766  else if (col == (wpl - 1))
767  {
768  destword &= lwmask;
769  }
770  }
771 
772  *(dword + pos) = destword;
773 }\n
774 )
775 
776 KERNEL(
777 \n__kernel void morphoErodeVer(__global int *sword,__global int *dword,
778  const int yp,
779  const int wpl, const int h,
780  const char clearBoundPixV, const int yn)
781 {
782  const int col = get_global_id(0);
783  const int row = get_global_id(1);
784  const unsigned int pos = row * wpl + col;
785  unsigned int tempword, destword;
786  int i, siter, eiter;
787 
788  //Ignore the execss
789  if (row >= h || col >= wpl)
790  return;
791 
792  destword = *(sword + pos);
793 
794  //Set start position and end position considering the boundary conditions
795  siter = (row - yp) < 0 ? 0 : (row - yp);
796  eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
797 
798  for (i = siter; i <= eiter; i++)
799  {
800  tempword = *(sword + i*wpl + col);
801 
802  destword &= tempword;
803  }
804 
805  //Clear boundary pixels
806  if (clearBoundPixV && ((row < yp) || ((h - row) <= yn)))
807  {
808  destword = 0x0;
809  }
810 
811  *(dword + pos) = destword;
812 }\n
813 )
814 
815 // HistogramRect Kernel: Accumulate
816 // assumes 4 channels, i.e., bytes_per_pixel = 4
817 // assumes number of pixels is multiple of 8
818 // data is laid out as
819 // ch0 ch1 ...
820 // bin0 bin1 bin2... bin0...
821 // rpt0,1,2...256 rpt0,1,2...
822 KERNEL(
823 \n#define HIST_REDUNDANCY 256\n
824 \n#define GROUP_SIZE 256\n
825 \n#define HIST_SIZE 256\n
826 \n#define NUM_CHANNELS 4\n
827 \n#define HR_UNROLL_SIZE 8 \n
828 \n#define HR_UNROLL_TYPE uchar8 \n
829 
830 __attribute__((reqd_work_group_size(256, 1, 1)))
831 __kernel
832 void kernel_HistogramRectAllChannels(
833  __global const uchar8 *data,
834  uint numPixels,
835  __global uint *histBuffer) {
836 
837  // declare variables
838  uchar8 pixels;
839  int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
840 
841  // for each pixel/channel, accumulate in global memory
842  for ( uint pc = get_global_id(0); pc < numPixels*NUM_CHANNELS/HR_UNROLL_SIZE; pc += get_global_size(0) ) {
843  pixels = data[pc];
844  // channel bin thread
845  atomic_inc( &histBuffer[ 0*HIST_SIZE*HIST_REDUNDANCY + pixels.s0*HIST_REDUNDANCY + threadOffset ]); // ch0
846  atomic_inc( &histBuffer[ 0*HIST_SIZE*HIST_REDUNDANCY + pixels.s4*HIST_REDUNDANCY + threadOffset ]); // ch0
847  atomic_inc( &histBuffer[ 1*HIST_SIZE*HIST_REDUNDANCY + pixels.s1*HIST_REDUNDANCY + threadOffset ]); // ch1
848  atomic_inc( &histBuffer[ 1*HIST_SIZE*HIST_REDUNDANCY + pixels.s5*HIST_REDUNDANCY + threadOffset ]); // ch1
849  atomic_inc( &histBuffer[ 2*HIST_SIZE*HIST_REDUNDANCY + pixels.s2*HIST_REDUNDANCY + threadOffset ]); // ch2
850  atomic_inc( &histBuffer[ 2*HIST_SIZE*HIST_REDUNDANCY + pixels.s6*HIST_REDUNDANCY + threadOffset ]); // ch2
851  atomic_inc( &histBuffer[ 3*HIST_SIZE*HIST_REDUNDANCY + pixels.s3*HIST_REDUNDANCY + threadOffset ]); // ch3
852  atomic_inc( &histBuffer[ 3*HIST_SIZE*HIST_REDUNDANCY + pixels.s7*HIST_REDUNDANCY + threadOffset ]); // ch3
853  }
854 }
855 )
856 
857 KERNEL(
858 // NUM_CHANNELS = 1
859 __attribute__((reqd_work_group_size(256, 1, 1)))
860 __kernel
861 void kernel_HistogramRectOneChannel(
862  __global const uchar8 *data,
863  uint numPixels,
864  __global uint *histBuffer) {
865 
866  // declare variables
867  uchar8 pixels;
868  int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
869 
870  // for each pixel/channel, accumulate in global memory
871  for ( uint pc = get_global_id(0); pc < numPixels/HR_UNROLL_SIZE; pc += get_global_size(0) ) {
872  pixels = data[pc];
873  // bin thread
874  atomic_inc( &histBuffer[ pixels.s0*HIST_REDUNDANCY + threadOffset ]);
875  atomic_inc( &histBuffer[ pixels.s1*HIST_REDUNDANCY + threadOffset ]);
876  atomic_inc( &histBuffer[ pixels.s2*HIST_REDUNDANCY + threadOffset ]);
877  atomic_inc( &histBuffer[ pixels.s3*HIST_REDUNDANCY + threadOffset ]);
878  atomic_inc( &histBuffer[ pixels.s4*HIST_REDUNDANCY + threadOffset ]);
879  atomic_inc( &histBuffer[ pixels.s5*HIST_REDUNDANCY + threadOffset ]);
880  atomic_inc( &histBuffer[ pixels.s6*HIST_REDUNDANCY + threadOffset ]);
881  atomic_inc( &histBuffer[ pixels.s7*HIST_REDUNDANCY + threadOffset ]);
882  }
883 }
884 )
885 
886 
887 KERNEL(
888 // unused
889 \n __attribute__((reqd_work_group_size(256, 1, 1)))
890 \n __kernel
891 \n void kernel_HistogramRectAllChannels_Grey(
892 \n __global const uchar* data,
893 \n uint numPixels,
894 \n __global uint *histBuffer) { // each wg will write HIST_SIZE*NUM_CHANNELS into this result; cpu will accumulate across wg's
895 \n
896 \n /* declare variables */
897 \n
898 \n // work indices
899 \n size_t groupId = get_group_id(0);
900 \n size_t localId = get_local_id(0); // 0 -> 256-1
901 \n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1
902 \n uint numThreads = get_global_size(0);
903 \n
904 \n /* accumulate in global memory */
905 \n for ( uint pc = get_global_id(0); pc < numPixels; pc += get_global_size(0) ) {
906 \n uchar value = data[ pc ];
907 \n int idx = value * get_global_size(0) + get_global_id(0);
908 \n histBuffer[ idx ]++;
909 \n
910 \n }
911 \n
912 \n } // kernel_HistogramRectAllChannels_Grey
913 
914 )
915 
916 // HistogramRect Kernel: Reduction
917 // only supports 4 channels
918 // each work group handles a single channel of a single histogram bin
919 KERNEL(
920 __attribute__((reqd_work_group_size(256, 1, 1)))
921 __kernel
922 void kernel_HistogramRectAllChannelsReduction(
923  int n, // unused pixel redundancy
924  __global uint *histBuffer,
925  __global int* histResult) {
926 
927  // declare variables
928  int channel = get_group_id(0)/HIST_SIZE;
929  int bin = get_group_id(0)%HIST_SIZE;
930  int value = 0;
931 
932  // accumulate in register
933  for ( uint i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
934  value += histBuffer[ channel*HIST_SIZE*HIST_REDUNDANCY+bin*HIST_REDUNDANCY+i];
935  }
936 
937  // reduction in local memory
938  __local int localHist[GROUP_SIZE];
939  localHist[get_local_id(0)] = value;
940  barrier(CLK_LOCAL_MEM_FENCE);
941  for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
942  if (get_local_id(0) < stride) {
943  value = localHist[ get_local_id(0)+stride];
944  }
945  barrier(CLK_LOCAL_MEM_FENCE);
946  if (get_local_id(0) < stride) {
947  localHist[ get_local_id(0)] += value;
948  }
949  barrier(CLK_LOCAL_MEM_FENCE);
950  }
951 
952  // write reduction to final result
953  if (get_local_id(0) == 0) {
954  histResult[get_group_id(0)] = localHist[0];
955  }
956 } // kernel_HistogramRectAllChannels
957 )
958 
959 
960 KERNEL(
961 // NUM_CHANNELS = 1
962 __attribute__((reqd_work_group_size(256, 1, 1)))
963 __kernel
964 void kernel_HistogramRectOneChannelReduction(
965  int n, // unused pixel redundancy
966  __global uint *histBuffer,
967  __global int* histResult) {
968 
969  // declare variables
970  // int channel = get_group_id(0)/HIST_SIZE;
971  int bin = get_group_id(0)%HIST_SIZE;
972  int value = 0;
973 
974  // accumulate in register
975  for ( int i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
976  value += histBuffer[ bin*HIST_REDUNDANCY+i];
977  }
978 
979  // reduction in local memory
980  __local int localHist[GROUP_SIZE];
981  localHist[get_local_id(0)] = value;
982  barrier(CLK_LOCAL_MEM_FENCE);
983  for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
984  if (get_local_id(0) < stride) {
985  value = localHist[ get_local_id(0)+stride];
986  }
987  barrier(CLK_LOCAL_MEM_FENCE);
988  if (get_local_id(0) < stride) {
989  localHist[ get_local_id(0)] += value;
990  }
991  barrier(CLK_LOCAL_MEM_FENCE);
992  }
993 
994  // write reduction to final result
995  if (get_local_id(0) == 0) {
996  histResult[get_group_id(0)] = localHist[0];
997  }
998 } // kernel_HistogramRectOneChannelReduction
999 )
1000 
1001 
1002 KERNEL(
1003 // unused
1004  // each work group (x256) handles a histogram bin
1005 \n __attribute__((reqd_work_group_size(256, 1, 1)))
1006 \n __kernel
1007 \n void kernel_HistogramRectAllChannelsReduction_Grey(
1008 \n int n, // pixel redundancy that needs to be accumulated
1009 \n __global uint *histBuffer,
1010 \n __global uint* histResult) { // each wg accumulates 1 bin
1011 \n
1012 \n /* declare variables */
1013 \n
1014 \n // work indices
1015 \n size_t groupId = get_group_id(0);
1016 \n size_t localId = get_local_id(0); // 0 -> 256-1
1017 \n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1
1018 \n uint numThreads = get_global_size(0);
1019 \n unsigned int hist = 0;
1020 \n
1021 \n /* accumulate in global memory */
1022 \n for ( uint p = 0; p < n; p+=GROUP_SIZE) {
1023 \n hist += histBuffer[ (get_group_id(0)*n + p)];
1024 \n }
1025 \n
1026 \n /* reduction in local memory */
1027 \n // populate local memory
1028 \n __local unsigned int localHist[GROUP_SIZE];
1029 
1030 \n localHist[localId] = hist;
1031 \n barrier(CLK_LOCAL_MEM_FENCE);
1032 \n
1033 \n for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
1034 \n if (localId < stride) {
1035 \n hist = localHist[ (localId+stride)];
1036 \n }
1037 \n barrier(CLK_LOCAL_MEM_FENCE);
1038 \n if (localId < stride) {
1039 \n localHist[ localId] += hist;
1040 \n }
1041 \n barrier(CLK_LOCAL_MEM_FENCE);
1042 \n }
1043 \n
1044 \n if (localId == 0)
1045 \n histResult[get_group_id(0)] = localHist[0];
1046 \n
1047 \n } // kernel_HistogramRectAllChannelsReduction_Grey
1048 )
1049 
1050 // ThresholdRectToPix Kernel
1051 // only supports 4 channels
1052 // imageData is input image (24-bits/pixel)
1053 // pix is output image (1-bit/pixel)
1054 KERNEL(
1055 \n#define CHAR_VEC_WIDTH 4 \n
1056 \n#define PIXELS_PER_WORD 32 \n
1057 \n#define PIXELS_PER_BURST 8 \n
1058 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
1059  typedef union {
1060  uchar s[PIXELS_PER_BURST*NUM_CHANNELS];
1061  uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH];
1062  } charVec;
1063 
1064 __attribute__((reqd_work_group_size(256, 1, 1)))
1065 __kernel
1066 void kernel_ThresholdRectToPix(
1067  __global const uchar4 *imageData,
1068  int height,
1069  int width,
1070  int wpl, // words per line
1071  __global int *thresholds,
1072  __global int *hi_values,
1073  __global int *pix) {
1074 
1075  // declare variables
1076  int pThresholds[NUM_CHANNELS];
1077  int pHi_Values[NUM_CHANNELS];
1078  for ( int i = 0; i < NUM_CHANNELS; i++) {
1079  pThresholds[i] = thresholds[i];
1080  pHi_Values[i] = hi_values[i];
1081  }
1082 
1083  // for each word (32 pixels) in output image
1084  for ( uint w = get_global_id(0); w < wpl*height; w += get_global_size(0) ) {
1085  unsigned int word = 0; // all bits start at zero
1086  // for each burst in word
1087  for ( int b = 0; b < BURSTS_PER_WORD; b++) {
1088  // load burst
1089  charVec pixels;
1090  int offset = (w / wpl) * width;
1091  offset += (w % wpl) * PIXELS_PER_WORD;
1092  offset += b * PIXELS_PER_BURST;
1093 
1094  for (int i = 0; i < PIXELS_PER_BURST; ++i)
1095  pixels.v[i] = imageData[offset + i];
1096 
1097  // for each pixel in burst
1098  for ( int p = 0; p < PIXELS_PER_BURST; p++) {
1099  for ( int c = 0; c < NUM_CHANNELS; c++) {
1100  unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c];
1101  if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
1102  const uint kTopBit = 0x80000000;
1103  word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1104  }
1105  }
1106  }
1107  }
1108  pix[w] = word;
1109  }
1110 }
1111 
1112 \n#define CHAR_VEC_WIDTH 8 \n
1113 \n#define PIXELS_PER_WORD 32 \n
1114 \n#define PIXELS_PER_BURST 8 \n
1115 \n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
1116  typedef union {
1117  uchar s[PIXELS_PER_BURST*1];
1118  uchar8 v[(PIXELS_PER_BURST*1)/CHAR_VEC_WIDTH];
1119  } charVec1;
1120 
1121 __attribute__((reqd_work_group_size(256, 1, 1)))
1122 __kernel
1123 void kernel_ThresholdRectToPix_OneChan(
1124  __global const uchar8 *imageData,
1125  int height,
1126  int width,
1127  int wpl, // words per line of output image
1128  __global int *thresholds,
1129  __global int *hi_values,
1130  __global int *pix) {
1131 
1132  // declare variables
1133  int pThresholds[1];
1134  int pHi_Values[1];
1135  for ( int i = 0; i < 1; i++) {
1136  pThresholds[i] = thresholds[i];
1137  pHi_Values[i] = hi_values[i];
1138  }
1139 
1140  // for each word (32 pixels) in output image
1141  for ( uint w = get_global_id(0); w < wpl*height; w += get_global_size(0) ) {
1142  unsigned int word = 0; // all bits start at zero
1143 
1144  // for each burst in word
1145  for ( int b = 0; b < BURSTS_PER_WORD; b++) {
1146 
1147  // load burst
1148  charVec1 pixels;
1149  // for each char8 in burst
1150  pixels.v[0] = imageData[
1151  w*BURSTS_PER_WORD
1152  + b
1153  + 0 ];
1154 
1155  // for each pixel in burst
1156  for ( int p = 0; p < PIXELS_PER_BURST; p++) {
1157 
1158  //int littleEndianIdx = p ^ 3;
1159  //int bigEndianIdx = p;
1160  int idx =
1161 \n#ifdef __ENDIAN_LITTLE__\n
1162  p ^ 3;
1163 \n#else\n
1164  p;
1165 \n#endif\n
1166  unsigned char pixChan = pixels.s[idx];
1167  if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
1168  const uint kTopBit = 0x80000000;
1169  word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1170  }
1171  }
1172  }
1173  pix[w] = word;
1174  }
1175 }
1176 )
1177 
1178 KERNEL(
1179 \n#define RED_SHIFT 24\n
1180 \n#define GREEN_SHIFT 16\n
1181 \n#define BLUE_SHIFT 8\n
1182 \n#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))\n
1183 \n
1184 \n__attribute__((reqd_work_group_size(256, 1, 1)))\n
1185 \n__kernel\n
1186 \nvoid kernel_RGBToGray(
1187  __global const unsigned int *srcData,
1188  __global unsigned char *dstData,
1189  int srcWPL,
1190  int dstWPL,
1191  int height,
1192  int width,
1193  float rwt,
1194  float gwt,
1195  float bwt ) {
1196 
1197  // pixel index
1198  int pixelIdx = get_global_id(0);
1199  if (pixelIdx >= height*width) return;
1200 
1201  unsigned int word = srcData[pixelIdx];
1202  int output = (rwt * ((word >> RED_SHIFT) & 0xff) +
1203  gwt * ((word >> GREEN_SHIFT) & 0xff) +
1204  bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5f);
1205  // SET_DATA_BYTE
1206  dstData[pixelIdx] = output;
1207 }
1208 )
1209 
1210  ; // close char*
1211 
1212 #endif // USE_EXTERNAL_KERNEL
1213 #endif //_OCL_KERNEL_H_
1214 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define KERNEL(...)
Definition: oclkernels.h:13
const char * kernel_src
Definition: oclkernels.h:19