tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
baseapi.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: baseapi.cpp
3  * Description: Simple API for calling tesseract.
4  * Author: Ray Smith
5  * Created: Fri Oct 06 15:35:01 PDT 2006
6  *
7  * (C) Copyright 2006, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #ifdef __linux__
26 #include <signal.h>
27 #endif
28 
29 #if defined(_WIN32)
30 #ifdef _MSC_VER
31 #include "vcsversion.h"
32 #include "mathfix.h"
33 #elif MINGW
34 // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME
35 #undef __STRICT_ANSI__
36 #endif // _MSC_VER
37 #include <fcntl.h>
38 #include <io.h>
39 #else
40 #include <dirent.h>
41 #include <libgen.h>
42 #include <string.h>
43 #endif // _WIN32
44 
45 #include <iostream>
46 #include <string>
47 #include <iterator>
48 #include <fstream>
49 
50 #include "allheaders.h"
51 
52 #include "baseapi.h"
53 #include "blobclass.h"
54 #include "resultiterator.h"
55 #include "mutableiterator.h"
56 #include "thresholder.h"
57 #include "tesseractclass.h"
58 #include "pageres.h"
59 #include "paragraphs.h"
60 #include "tessvars.h"
61 #include "control.h"
62 #include "dict.h"
63 #include "pgedit.h"
64 #include "paramsd.h"
65 #include "output.h"
66 #include "globaloc.h"
67 #include "globals.h"
68 #include "edgblob.h"
69 #include "equationdetect.h"
70 #include "tessbox.h"
71 #include "makerow.h"
72 #include "otsuthr.h"
73 #include "osdetect.h"
74 #include "params.h"
75 #include "renderer.h"
76 #include "strngs.h"
77 #include "openclwrapper.h"
78 
79 BOOL_VAR(stream_filelist, FALSE, "Stream a filelist from stdin");
80 
81 namespace tesseract {
82 
84 const int kMinRectSize = 10;
86 const char kTesseractReject = '~';
88 const char kUNLVReject = '~';
90 const char kUNLVSuspect = '^';
95 const char* kInputFile = "noname.tif";
99 const char* kOldVarsFile = "failed_vars.txt";
101 const int kMaxIntSize = 22;
106 const int kMinCredibleResolution = 70;
108 const int kMaxCredibleResolution = 2400;
109 
111  : tesseract_(NULL),
112  osd_tesseract_(NULL),
113  equ_detect_(NULL),
114  // Thresholder is initialized to NULL here, but will be set before use by:
115  // A constructor of a derived API, SetThresholder(), or
116  // created implicitly when used in InternalSetImage.
117  thresholder_(NULL),
118  paragraph_models_(NULL),
119  block_list_(NULL),
120  page_res_(NULL),
121  input_file_(NULL),
122  output_file_(NULL),
123  datapath_(NULL),
124  language_(NULL),
125  last_oem_requested_(OEM_DEFAULT),
126  recognition_done_(false),
127  truth_cb_(NULL),
128  rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0),
129  image_width_(0), image_height_(0) {
130  unknown_title_ = "";
131 }
132 
134  End();
135 }
136 
140 const char* TessBaseAPI::Version() {
141 #if defined(GIT_REV) && (defined(DEBUG) || defined(_DEBUG))
142  return GIT_REV;
143 #else
144  return TESSERACT_VERSION_STR;
145 #endif
146 }
147 
155 #ifdef USE_OPENCL
156 #if USE_DEVICE_SELECTION
157 #include "opencl_device_selection.h"
158 #endif
159 #endif
160 size_t TessBaseAPI::getOpenCLDevice(void **data) {
161 #ifdef USE_OPENCL
162 #if USE_DEVICE_SELECTION
163  ds_device device = OpenclDevice::getDeviceSelection();
164  if (device.type == DS_DEVICE_OPENCL_DEVICE) {
165  *data = reinterpret_cast<void*>(new cl_device_id);
166  memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id));
167  return sizeof(cl_device_id);
168  }
169 #endif
170 #endif
171 
172  *data = NULL;
173  return 0;
174 }
175 
181 #ifdef __linux__
182  struct sigaction action;
183  memset(&action, 0, sizeof(action));
184  action.sa_handler = &signal_exit;
185  action.sa_flags = SA_RESETHAND;
186  sigaction(SIGSEGV, &action, NULL);
187  sigaction(SIGFPE, &action, NULL);
188  sigaction(SIGBUS, &action, NULL);
189 #else
190  // Warn API users that an implementation is needed.
191  tprintf("CatchSignals has no non-linux implementation!\n");
192 #endif
193 }
194 
199 void TessBaseAPI::SetInputName(const char* name) {
200  if (input_file_ == NULL)
201  input_file_ = new STRING(name);
202  else
203  *input_file_ = name;
204 }
205 
207 void TessBaseAPI::SetOutputName(const char* name) {
208  if (output_file_ == NULL)
209  output_file_ = new STRING(name);
210  else
211  *output_file_ = name;
212 }
213 
214 bool TessBaseAPI::SetVariable(const char* name, const char* value) {
215  if (tesseract_ == NULL) tesseract_ = new Tesseract;
217  tesseract_->params());
218 }
219 
220 bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) {
221  if (tesseract_ == NULL) tesseract_ = new Tesseract;
223  tesseract_->params());
224 }
225 
226 bool TessBaseAPI::GetIntVariable(const char *name, int *value) const {
227  IntParam *p = ParamUtils::FindParam<IntParam>(
229  if (p == NULL) return false;
230  *value = (inT32)(*p);
231  return true;
232 }
233 
234 bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const {
235  BoolParam *p = ParamUtils::FindParam<BoolParam>(
237  if (p == NULL) return false;
238  *value = (BOOL8)(*p);
239  return true;
240 }
241 
242 const char *TessBaseAPI::GetStringVariable(const char *name) const {
243  StringParam *p = ParamUtils::FindParam<StringParam>(
245  return (p != NULL) ? p->string() : NULL;
246 }
247 
248 bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const {
249  DoubleParam *p = ParamUtils::FindParam<DoubleParam>(
251  if (p == NULL) return false;
252  *value = (double)(*p);
253  return true;
254 }
255 
257 bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) {
258  return ParamUtils::GetParamAsString(name, tesseract_->params(), val);
259 }
260 
262 void TessBaseAPI::PrintVariables(FILE *fp) const {
264 }
265 
274 int TessBaseAPI::Init(const char* datapath, const char* language,
275  OcrEngineMode oem, char **configs, int configs_size,
276  const GenericVector<STRING> *vars_vec,
277  const GenericVector<STRING> *vars_values,
278  bool set_only_non_debug_params) {
279  PERF_COUNT_START("TessBaseAPI::Init")
280  // Default language is "eng".
281  if (language == NULL) language = "eng";
282  // If the datapath, OcrEngineMode or the language have changed - start again.
283  // Note that the language_ field stores the last requested language that was
284  // initialized successfully, while tesseract_->lang stores the language
285  // actually used. They differ only if the requested language was NULL, in
286  // which case tesseract_->lang is set to the Tesseract default ("eng").
287  if (tesseract_ != NULL &&
288  (datapath_ == NULL || language_ == NULL ||
289  *datapath_ != datapath || last_oem_requested_ != oem ||
290  (*language_ != language && tesseract_->lang != language))) {
291  delete tesseract_;
292  tesseract_ = NULL;
293  }
294  // PERF_COUNT_SUB("delete tesseract_")
295 #ifdef USE_OPENCL
296  OpenclDevice od;
297  od.InitEnv();
298 #endif
299  PERF_COUNT_SUB("OD::InitEnv()")
300  bool reset_classifier = true;
301  if (tesseract_ == NULL) {
302  reset_classifier = false;
303  tesseract_ = new Tesseract;
305  datapath, output_file_ != NULL ? output_file_->string() : NULL,
306  language, oem, configs, configs_size, vars_vec, vars_values,
307  set_only_non_debug_params) != 0) {
308  return -1;
309  }
310  }
311  PERF_COUNT_SUB("update tesseract_")
312  // Update datapath and language requested for the last valid initialization.
313  if (datapath_ == NULL)
314  datapath_ = new STRING(datapath);
315  else
316  *datapath_ = datapath;
317  if ((strcmp(datapath_->string(), "") == 0) &&
318  (strcmp(tesseract_->datadir.string(), "") != 0))
320 
321  if (language_ == NULL)
322  language_ = new STRING(language);
323  else
324  *language_ = language;
326  // PERF_COUNT_SUB("update last_oem_requested_")
327  // For same language and datapath, just reset the adaptive classifier.
328  if (reset_classifier) {
330  PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()")
331  }
333  return 0;
334 }
335 
345  return (language_ == NULL || language_->string() == NULL) ?
346  "" : language_->string();
347 }
348 
355  GenericVector<STRING>* langs) const {
356  langs->clear();
357  if (tesseract_ != NULL) {
358  langs->push_back(tesseract_->lang);
359  int num_subs = tesseract_->num_sub_langs();
360  for (int i = 0; i < num_subs; ++i)
361  langs->push_back(tesseract_->get_sub_lang(i)->lang);
362  }
363 }
364 
369  GenericVector<STRING>* langs) const {
370  langs->clear();
371  if (tesseract_ != NULL) {
372 #ifdef _WIN32
373  STRING pattern = tesseract_->datadir + "/*." + kTrainedDataSuffix;
374  char fname[_MAX_FNAME];
375  WIN32_FIND_DATA data;
376  BOOL result = TRUE;
377  HANDLE handle = FindFirstFile(pattern.string(), &data);
378  if (handle != INVALID_HANDLE_VALUE) {
379  for (; result; result = FindNextFile(handle, &data)) {
380  _splitpath(data.cFileName, NULL, NULL, fname, NULL);
381  langs->push_back(STRING(fname));
382  }
383  FindClose(handle);
384  }
385 #else // _WIN32
386  DIR *dir;
387  struct dirent *dirent;
388  char *dot;
389 
390  STRING extension = STRING(".") + kTrainedDataSuffix;
391 
392  dir = opendir(tesseract_->datadir.string());
393  if (dir != NULL) {
394  while ((dirent = readdir(dir))) {
395  // Skip '.', '..', and hidden files
396  if (dirent->d_name[0] != '.') {
397  if (strstr(dirent->d_name, extension.string()) != NULL) {
398  dot = strrchr(dirent->d_name, '.');
399  // This ensures that .traineddata is at the end of the file name
400  if (strncmp(dot, extension.string(),
401  strlen(extension.string())) == 0) {
402  *dot = '\0';
403  langs->push_back(STRING(dirent->d_name));
404  }
405  }
406  }
407  }
408  closedir(dir);
409  }
410 #endif
411  }
412 }
413 
420 int TessBaseAPI::InitLangMod(const char* datapath, const char* language) {
421  if (tesseract_ == NULL)
422  tesseract_ = new Tesseract;
423  else
425  return tesseract_->init_tesseract_lm(datapath, NULL, language);
426 }
427 
433  if (tesseract_ == NULL) {
434  tesseract_ = new Tesseract;
436  }
437 }
438 
446 }
447 
451 }
452 
459  if (tesseract_ == NULL)
460  tesseract_ = new Tesseract;
461  tesseract_->tessedit_pageseg_mode.set_value(mode);
462 }
463 
466  if (tesseract_ == NULL)
467  return PSM_SINGLE_BLOCK;
468  return static_cast<PageSegMode>(
469  static_cast<int>(tesseract_->tessedit_pageseg_mode));
470 }
471 
485 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
486  int bytes_per_pixel,
487  int bytes_per_line,
488  int left, int top,
489  int width, int height) {
490  if (tesseract_ == NULL || width < kMinRectSize || height < kMinRectSize)
491  return NULL; // Nothing worth doing.
492 
493  // Since this original api didn't give the exact size of the image,
494  // we have to invent a reasonable value.
495  int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
496  SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
497  bytes_per_pixel, bytes_per_line);
498  SetRectangle(left, top, width, height);
499 
500  return GetUTF8Text();
501 }
502 
508  if (tesseract_ == NULL)
509  return;
512 }
513 
521 void TessBaseAPI::SetImage(const unsigned char* imagedata,
522  int width, int height,
523  int bytes_per_pixel, int bytes_per_line) {
524  if (InternalSetImage()) {
525  thresholder_->SetImage(imagedata, width, height,
526  bytes_per_pixel, bytes_per_line);
528  }
529 }
530 
532  if (thresholder_)
534  else
535  tprintf("Please call SetImage before SetSourceResolution.\n");
536 }
537 
546 void TessBaseAPI::SetImage(Pix* pix) {
547  if (InternalSetImage()) {
548  thresholder_->SetImage(pix);
550  }
551 }
552 
558 void TessBaseAPI::SetRectangle(int left, int top, int width, int height) {
559  if (thresholder_ == NULL)
560  return;
561  thresholder_->SetRectangle(left, top, width, height);
562  ClearResults();
563 }
564 
570  if (tesseract_ == NULL || thresholder_ == NULL)
571  return NULL;
572  if (tesseract_->pix_binary() == NULL)
574  return pixClone(tesseract_->pix_binary());
575 }
576 
582 Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
583  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
584 }
585 
594 Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding,
595  Pixa** pixa, int** blockids, int** paraids) {
596  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
597  pixa, blockids, paraids);
598 }
599 
608 Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) {
609  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
610 }
611 
617 Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
618  return GetComponentImages(RIL_WORD, true, pixa, NULL);
619 }
620 
628  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
629 }
630 
640  bool text_only, bool raw_image,
641  const int raw_padding,
642  Pixa** pixa, int** blockids,
643  int** paraids) {
644  PageIterator* page_it = GetIterator();
645  if (page_it == NULL)
646  page_it = AnalyseLayout();
647  if (page_it == NULL)
648  return NULL; // Failed.
649 
650  // Count the components to get a size for the arrays.
651  int component_count = 0;
652  int left, top, right, bottom;
653 
654  TessResultCallback<bool>* get_bbox = NULL;
655  if (raw_image) {
656  // Get bounding box in original raw image with padding.
658  level, raw_padding,
659  &left, &top, &right, &bottom);
660  } else {
661  // Get bounding box from binarized imaged. Note that this could be
662  // differently scaled from the original image.
663  get_bbox = NewPermanentTessCallback(page_it,
665  level, &left, &top, &right, &bottom);
666  }
667  do {
668  if (get_bbox->Run() &&
669  (!text_only || PTIsTextType(page_it->BlockType())))
670  ++component_count;
671  } while (page_it->Next(level));
672 
673  Boxa* boxa = boxaCreate(component_count);
674  if (pixa != NULL)
675  *pixa = pixaCreate(component_count);
676  if (blockids != NULL)
677  *blockids = new int[component_count];
678  if (paraids != NULL)
679  *paraids = new int[component_count];
680 
681  int blockid = 0;
682  int paraid = 0;
683  int component_index = 0;
684  page_it->Begin();
685  do {
686  if (get_bbox->Run() &&
687  (!text_only || PTIsTextType(page_it->BlockType()))) {
688  Box* lbox = boxCreate(left, top, right - left, bottom - top);
689  boxaAddBox(boxa, lbox, L_INSERT);
690  if (pixa != NULL) {
691  Pix* pix = NULL;
692  if (raw_image) {
693  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
694  &top);
695  } else {
696  pix = page_it->GetBinaryImage(level);
697  }
698  pixaAddPix(*pixa, pix, L_INSERT);
699  pixaAddBox(*pixa, lbox, L_CLONE);
700  }
701  if (paraids != NULL) {
702  (*paraids)[component_index] = paraid;
703  if (page_it->IsAtFinalElement(RIL_PARA, level))
704  ++paraid;
705  }
706  if (blockids != NULL) {
707  (*blockids)[component_index] = blockid;
708  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
709  ++blockid;
710  paraid = 0;
711  }
712  }
713  ++component_index;
714  }
715  } while (page_it->Next(level));
716  delete page_it;
717  delete get_bbox;
718  return boxa;
719 }
720 
722  if (thresholder_ == NULL) {
723  return 0;
724  }
725  return thresholder_->GetScaleFactor();
726 }
727 
729 void TessBaseAPI::DumpPGM(const char* filename) {
730  if (tesseract_ == NULL)
731  return;
732  FILE *fp = fopen(filename, "wb");
733  Pix* pix = tesseract_->pix_binary();
734  int width = pixGetWidth(pix);
735  int height = pixGetHeight(pix);
736  l_uint32* data = pixGetData(pix);
737  fprintf(fp, "P5 %d %d 255\n", width, height);
738  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
739  for (int x = 0; x < width; ++x) {
740  uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
741  fwrite(&b, 1, 1, fp);
742  }
743  }
744  fclose(fp);
745 }
746 
747 #ifndef NO_CUBE_BUILD
748 
754 int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks,
755  Boxa* boxa_words, Pixa* pixa_words,
756  const FCOORD& reskew, Pix* page_pix,
757  PAGE_RES* page_res) {
758  int block_count = boxaGetCount(boxa_blocks);
759  ASSERT_HOST(block_count == pixaGetCount(pixa_blocks));
760  // Write each block to the current directory as junk_write_display.nnn.png.
761  for (int i = 0; i < block_count; ++i) {
762  Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE);
763  pixDisplayWrite(pix, 1);
764  }
765  int word_count = boxaGetCount(boxa_words);
766  ASSERT_HOST(word_count == pixaGetCount(pixa_words));
767  int pr_word = 0;
768  PAGE_RES_IT page_res_it(page_res);
769  for (page_res_it.restart_page(); page_res_it.word () != NULL;
770  page_res_it.forward(), ++pr_word) {
771  WERD_RES *word = page_res_it.word();
772  WERD_CHOICE* choice = word->best_choice;
773  // Write the first 100 words to files names wordims/<wordstring>.tif.
774  if (pr_word < 100) {
775  STRING filename("wordims/");
776  if (choice != NULL) {
777  filename += choice->unichar_string();
778  } else {
779  char numbuf[32];
780  filename += "unclassified";
781  snprintf(numbuf, 32, "%03d", pr_word);
782  filename += numbuf;
783  }
784  filename += ".tif";
785  Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE);
786  pixWrite(filename.string(), pix, IFF_TIFF_G4);
787  }
788  }
789  ASSERT_HOST(pr_word == word_count);
790  return 0;
791 }
792 #endif // NO_CUBE_BUILD
793 
810 
811 PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) {
812  if (FindLines() == 0) {
813  if (block_list_->empty())
814  return NULL; // The page was empty.
815  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
816  DetectParagraphs(false);
817  return new PageIterator(
821  }
822  return NULL;
823 }
824 
830  if (tesseract_ == NULL)
831  return -1;
832  if (FindLines() != 0)
833  return -1;
834  delete page_res_;
835  if (block_list_->empty()) {
836  page_res_ = new PAGE_RES(false, block_list_,
838  return 0; // Empty page.
839  }
840 
842  recognition_done_ = true;
847  } else {
848  // TODO(rays) LSTM here.
849  page_res_ = new PAGE_RES(false,
851  }
852  if (page_res_ == NULL) {
853  return -1;
854  }
857  return 0;
858  }
859 
860  if (truth_cb_ != NULL) {
861  tesseract_->wordrec_run_blamer.set_value(true);
862  PageIterator *page_it = new PageIterator(
867  image_height_, page_it, this->tesseract()->pix_grey());
868  delete page_it;
869  }
870 
871  int result = 0;
873  #ifndef GRAPHICS_DISABLED
875  #endif // GRAPHICS_DISABLED
876  // The page_res is invalid after an interactive session, so cleanup
877  // in a way that lets us continue to the next page without crashing.
878  delete page_res_;
879  page_res_ = NULL;
880  return -1;
882  STRING fontname;
883  ExtractFontName(*output_file_, &fontname);
885  } else if (tesseract_->tessedit_ambigs_training) {
886  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
887  // OCR the page segmented into words by tesseract.
889  *input_file_, page_res_, monitor, training_output_file);
890  fclose(training_output_file);
891  } else {
892  // Now run the main recognition.
893  bool wait_for_text = true;
894  GetBoolVariable("paragraph_text_based", &wait_for_text);
895  if (!wait_for_text) DetectParagraphs(false);
896  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
897  if (wait_for_text) DetectParagraphs(true);
898  } else {
899  result = -1;
900  }
901  }
902  return result;
903 }
904 
907  if (tesseract_ == NULL)
908  return -1;
909  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
910  tprintf("Please call SetImage before attempting recognition.");
911  return -1;
912  }
913  if (page_res_ != NULL)
914  ClearResults();
915  if (FindLines() != 0)
916  return -1;
917  // Additional conditions under which chopper test cannot be run
918  if (tesseract_->interactive_display_mode) return -1;
919 
920  recognition_done_ = true;
921 
922  page_res_ = new PAGE_RES(false, block_list_,
924 
925  PAGE_RES_IT page_res_it(page_res_);
926 
927  while (page_res_it.word() != NULL) {
928  WERD_RES *word_res = page_res_it.word();
929  GenericVector<TBOX> boxes;
930  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
931  page_res_it.row()->row, word_res);
932  page_res_it.forward();
933  }
934  return 0;
935 }
936 
937 // Takes ownership of the input pix.
939 
941 
943  if (input_file_)
944  return input_file_->c_str();
945  return NULL;
946 }
947 
948 const char * TessBaseAPI::GetDatapath() {
949  return tesseract_->datadir.c_str();
950 }
951 
954 }
955 
956 // If flist exists, get data from there. Otherwise get data from buf.
957 // Seems convoluted, but is the easiest way I know of to meet multiple
958 // goals. Support streaming from stdin, and also work on platforms
959 // lacking fmemopen.
960 bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
961  STRING *buf,
962  const char* retry_config,
963  int timeout_millisec,
964  TessResultRenderer* renderer,
965  int tessedit_page_number) {
966  if (!flist && !buf) return false;
967  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
968  char pagename[MAX_PATH];
969 
970  GenericVector<STRING> lines;
971  if (!flist) {
972  buf->split('\n', &lines);
973  if (lines.empty()) return false;
974  }
975 
976  // Skip to the requested page number.
977  for (int i = 0; i < page; i++) {
978  if (flist) {
979  if (fgets(pagename, sizeof(pagename), flist) == NULL) break;
980  }
981  }
982 
983  // Begin producing output
984  if (renderer && !renderer->BeginDocument(unknown_title_)) {
985  return false;
986  }
987 
988  // Loop over all pages - or just the requested one
989  while (true) {
990  if (flist) {
991  if (fgets(pagename, sizeof(pagename), flist) == NULL) break;
992  } else {
993  if (page >= lines.size()) break;
994  snprintf(pagename, sizeof(pagename), "%s", lines[page].c_str());
995  }
996  chomp_string(pagename);
997  Pix *pix = pixRead(pagename);
998  if (pix == NULL) {
999  tprintf("Image file %s cannot be read!\n", pagename);
1000  return false;
1001  }
1002  tprintf("Page %d : %s\n", page, pagename);
1003  bool r = ProcessPage(pix, page, pagename, retry_config,
1004  timeout_millisec, renderer);
1005  pixDestroy(&pix);
1006  if (!r) return false;
1007  if (tessedit_page_number >= 0) break;
1008  ++page;
1009  }
1010 
1011  // Finish producing output
1012  if (renderer && !renderer->EndDocument()) {
1013  return false;
1014  }
1015  return true;
1016 }
1017 
1018 bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data,
1019  size_t size,
1020  const char* filename,
1021  const char* retry_config,
1022  int timeout_millisec,
1023  TessResultRenderer* renderer,
1024  int tessedit_page_number) {
1025 #ifndef ANDROID_BUILD
1026  Pix *pix = NULL;
1027 #ifdef USE_OPENCL
1028  OpenclDevice od;
1029 #endif // USE_OPENCL
1030  int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1031  size_t offset = 0;
1032  for (; ; ++page) {
1033  if (tessedit_page_number >= 0)
1034  page = tessedit_page_number;
1035 #ifdef USE_OPENCL
1036  if ( od.selectedDeviceIsOpenCL() ) {
1037  pix = (data) ?
1038  od.pixReadMemTiffCl(data, size, page) :
1039  od.pixReadTiffCl(filename, page);
1040  } else {
1041 #endif // USE_OPENCL
1042  pix = (data) ?
1043  pixReadMemFromMultipageTiff(data, size, &offset) :
1044  pixReadFromMultipageTiff(filename, &offset);
1045 #ifdef USE_OPENCL
1046  }
1047 #endif // USE_OPENCL
1048  if (pix == NULL) break;
1049  tprintf("Page %d\n", page + 1);
1050  char page_str[kMaxIntSize];
1051  snprintf(page_str, kMaxIntSize - 1, "%d", page);
1052  SetVariable("applybox_page", page_str);
1053  bool r = ProcessPage(pix, page, filename, retry_config,
1054  timeout_millisec, renderer);
1055  pixDestroy(&pix);
1056  if (!r) return false;
1057  if (tessedit_page_number >= 0) break;
1058  if (!offset) break;
1059  }
1060  return true;
1061 #else
1062  return false;
1063 #endif
1064 }
1065 
1066 // Master ProcessPages calls ProcessPagesInternal and then does any post-
1067 // processing required due to being in a training mode.
1068 bool TessBaseAPI::ProcessPages(const char* filename, const char* retry_config,
1069  int timeout_millisec,
1070  TessResultRenderer* renderer) {
1071  bool result =
1072  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1073  if (result) {
1076  tprintf("Write of TR file failed: %s\n", output_file_->string());
1077  return false;
1078  }
1079  }
1080  return result;
1081 }
1082 
1083 // In the ideal scenario, Tesseract will start working on data as soon
1084 // as it can. For example, if you steam a filelist through stdin, we
1085 // should start the OCR process as soon as the first filename is
1086 // available. This is particularly useful when hooking Tesseract up to
1087 // slow hardware such as a book scanning machine.
1088 //
1089 // Unfortunately there are tradeoffs. You can't seek on stdin. That
1090 // makes automatic detection of datatype (TIFF? filelist? PNG?)
1091 // impractical. So we support a command line flag to explicitly
1092 // identify the scenario that really matters: filelists on
1093 // stdin. We'll still do our best if the user likes pipes.
1094 bool TessBaseAPI::ProcessPagesInternal(const char* filename,
1095  const char* retry_config,
1096  int timeout_millisec,
1097  TessResultRenderer* renderer) {
1098  PERF_COUNT_START("ProcessPages")
1099  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1100  if (stdInput) {
1101 #ifdef WIN32
1102  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1103  tprintf("ERROR: cin to binary: %s", strerror(errno));
1104 #endif // WIN32
1105  }
1106 
1107  if (stream_filelist) {
1108  return ProcessPagesFileList(stdin, NULL, retry_config,
1109  timeout_millisec, renderer,
1111  }
1112 
1113  // At this point we are officially in autodection territory.
1114  // That means any data in stdin must be buffered, to make it
1115  // seekable.
1116  std::string buf;
1117  const l_uint8 *data = NULL;
1118  if (stdInput) {
1119  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1120  (std::istreambuf_iterator<char>()));
1121  data = reinterpret_cast<const l_uint8 *>(buf.data());
1122  }
1123 
1124  // Here is our autodetection
1125  int format;
1126  int r = (stdInput) ?
1127  findFileFormatBuffer(data, &format) :
1128  findFileFormat(filename, &format);
1129 
1130  // Maybe we have a filelist
1131  if (r != 0 || format == IFF_UNKNOWN) {
1132  STRING s;
1133  if (stdInput) {
1134  s = buf.c_str();
1135  } else {
1136  std::ifstream t(filename);
1137  std::string u((std::istreambuf_iterator<char>(t)),
1138  std::istreambuf_iterator<char>());
1139  s = u.c_str();
1140  }
1141  return ProcessPagesFileList(NULL, &s, retry_config,
1142  timeout_millisec, renderer,
1144  }
1145 
1146  // Maybe we have a TIFF which is potentially multipage
1147  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1148  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1149  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1150  format == IFF_TIFF_ZIP);
1151 
1152  // Fail early if we can, before producing any output
1153  Pix *pix = NULL;
1154  if (!tiff) {
1155  pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
1156  if (pix == NULL) {
1157  return false;
1158  }
1159  }
1160 
1161  // Begin the output
1162  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1163  pixDestroy(&pix);
1164  return false;
1165  }
1166 
1167  // Produce output
1168  r = (tiff) ?
1169  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1170  timeout_millisec, renderer,
1172  ProcessPage(pix, 0, filename, retry_config,
1173  timeout_millisec, renderer);
1174 
1175  // Clean up memory as needed
1176  pixDestroy(&pix);
1177 
1178  // End the output
1179  if (!r || (renderer && !renderer->EndDocument())) {
1180  return false;
1181  }
1183  return true;
1184 }
1185 
1186 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
1187  const char* retry_config, int timeout_millisec,
1188  TessResultRenderer* renderer) {
1189  PERF_COUNT_START("ProcessPage")
1190  SetInputName(filename);
1191  SetImage(pix);
1192  bool failed = false;
1193 
1195  // Disabled character recognition
1196  PageIterator* it = AnalyseLayout();
1197 
1198  if (it == NULL) {
1199  failed = true;
1200  } else {
1201  delete it;
1202  }
1204  failed = FindLines() != 0;
1205  } else if (timeout_millisec > 0) {
1206  // Running with a timeout.
1207  ETEXT_DESC monitor;
1208  monitor.cancel = NULL;
1209  monitor.cancel_this = NULL;
1210  monitor.set_deadline_msecs(timeout_millisec);
1211 
1212  // Now run the main recognition.
1213  failed = Recognize(&monitor) < 0;
1214  } else {
1215  // Normal layout and character recognition with no timeout.
1216  failed = Recognize(NULL) < 0;
1217  }
1218 
1220 #ifndef ANDROID_BUILD
1221  Pix* page_pix = GetThresholdedImage();
1222  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1223 #endif // ANDROID_BUILD
1224  }
1225 
1226  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1227  // Save current config variables before switching modes.
1228  FILE* fp = fopen(kOldVarsFile, "wb");
1229  PrintVariables(fp);
1230  fclose(fp);
1231  // Switch to alternate mode for retry.
1232  ReadConfigFile(retry_config);
1233  SetImage(pix);
1234  Recognize(NULL);
1235  // Restore saved config variables.
1237  }
1238 
1239  if (renderer && !failed) {
1240  failed = !renderer->AddImage(this);
1241  }
1242 
1244  return !failed;
1245 }
1246 
1252  if (tesseract_ == NULL || page_res_ == NULL)
1253  return NULL;
1254  return new LTRResultIterator(
1258 }
1259 
1269  if (tesseract_ == NULL || page_res_ == NULL)
1270  return NULL;
1275 }
1276 
1286  if (tesseract_ == NULL || page_res_ == NULL)
1287  return NULL;
1288  return new MutableIterator(page_res_, tesseract_,
1292 }
1293 
1296  if (tesseract_ == NULL ||
1297  (!recognition_done_ && Recognize(NULL) < 0))
1298  return NULL;
1299  STRING text("");
1300  ResultIterator *it = GetIterator();
1301  do {
1302  if (it->Empty(RIL_PARA)) continue;
1303  char *para_text = it->GetUTF8Text(RIL_PARA);
1304  text += para_text;
1305  delete []para_text;
1306  } while (it->Next(RIL_PARA));
1307  char* result = new char[text.length() + 1];
1308  strncpy(result, text.string(), text.length() + 1);
1309  delete it;
1310  return result;
1311 }
1312 
1316 static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
1317  tesseract::Orientation orientation;
1318  tesseract::WritingDirection writing_direction;
1319  tesseract::TextlineOrder textline_order;
1320  float deskew_angle;
1321  it->Orientation(&orientation, &writing_direction, &textline_order,
1322  &deskew_angle);
1323  return orientation;
1324 }
1325 
1334 static void AddBaselineCoordsTohOCR(const PageIterator *it,
1335  PageIteratorLevel level,
1336  STRING* hocr_str) {
1337  tesseract::Orientation orientation = GetBlockTextOrientation(it);
1338  if (orientation != ORIENTATION_PAGE_UP) {
1339  hocr_str->add_str_int("; textangle ", 360 - orientation * 90);
1340  return;
1341  }
1342 
1343  int left, top, right, bottom;
1344  it->BoundingBox(level, &left, &top, &right, &bottom);
1345 
1346  // Try to get the baseline coordinates at this level.
1347  int x1, y1, x2, y2;
1348  if (!it->Baseline(level, &x1, &y1, &x2, &y2))
1349  return;
1350  // Following the description of this field of the hOCR spec, we convert the
1351  // baseline coordinates so that "the bottom left of the bounding box is the
1352  // origin".
1353  x1 -= left;
1354  x2 -= left;
1355  y1 -= bottom;
1356  y2 -= bottom;
1357 
1358  // Now fit a line through the points so we can extract coefficients for the
1359  // equation: y = p1 x + p0
1360  double p1 = 0;
1361  double p0 = 0;
1362  if (x1 == x2) {
1363  // Problem computing the polynomial coefficients.
1364  return;
1365  }
1366  p1 = (y2 - y1) / static_cast<double>(x2 - x1);
1367  p0 = y1 - static_cast<double>(p1 * x1);
1368 
1369  hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0);
1370  hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0);
1371 }
1372 
1373 static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1,
1374  int num2) {
1375  const size_t BUFSIZE = 64;
1376  char id_buffer[BUFSIZE];
1377  if (num2 >= 0) {
1378  snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2);
1379  } else {
1380  snprintf(id_buffer, BUFSIZE - 1, "%s_%d", base.c_str(), num1);
1381  }
1382  id_buffer[BUFSIZE - 1] = '\0';
1383  *hocr_str += " id='";
1384  *hocr_str += id_buffer;
1385  *hocr_str += "'";
1386 }
1387 
1388 static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level,
1389  STRING* hocr_str) {
1390  int left, top, right, bottom;
1391  it->BoundingBox(level, &left, &top, &right, &bottom);
1392  // This is the only place we use double quotes instead of single quotes,
1393  // but it may too late to change for consistency
1394  hocr_str->add_str_int(" title=\"bbox ", left);
1395  hocr_str->add_str_int(" ", top);
1396  hocr_str->add_str_int(" ", right);
1397  hocr_str->add_str_int(" ", bottom);
1398  // Add baseline coordinates & heights for textlines only.
1399  if (level == RIL_TEXTLINE) {
1400  AddBaselineCoordsTohOCR(it, level, hocr_str);
1401  // add custom height measures
1402  float row_height, descenders, ascenders; // row attributes
1403  it->RowAttributes(&row_height, &descenders, &ascenders);
1404  // TODO(rays): Do we want to limit these to a single decimal place?
1405  hocr_str->add_str_double("; x_size ", row_height);
1406  hocr_str->add_str_double("; x_descenders ", descenders * -1);
1407  hocr_str->add_str_double("; x_ascenders ", ascenders);
1408  }
1409  *hocr_str += "\">";
1410 }
1411 
1412 static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level,
1413  STRING* hocr_str) {
1414  int left, top, right, bottom;
1415  it->BoundingBox(level, &left, &top, &right, &bottom);
1416  hocr_str->add_str_int("\t", left);
1417  hocr_str->add_str_int("\t", top);
1418  hocr_str->add_str_int("\t", right - left);
1419  hocr_str->add_str_int("\t", bottom - top);
1420 }
1421 
1430 char* TessBaseAPI::GetHOCRText(int page_number) {
1431  return GetHOCRText(NULL, page_number);
1432 }
1433 
1442 char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
1443  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
1444  return NULL;
1445 
1446  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1447  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1448  bool para_is_ltr = true; // Default direction is LTR
1449  const char* paragraph_lang = NULL;
1450  bool font_info = false;
1451  GetBoolVariable("hocr_font_info", &font_info);
1452 
1453  STRING hocr_str("");
1454 
1455  if (input_file_ == NULL)
1456  SetInputName(NULL);
1457 
1458 #ifdef _WIN32
1459  // convert input name from ANSI encoding to utf-8
1460  int str16_len =
1461  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
1462  wchar_t *uni16_str = new WCHAR[str16_len];
1463  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1464  uni16_str, str16_len);
1465  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
1466  NULL, NULL);
1467  char *utf8_str = new char[utf8_len];
1468  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1469  utf8_len, NULL, NULL);
1470  *input_file_ = utf8_str;
1471  delete[] uni16_str;
1472  delete[] utf8_str;
1473 #endif
1474 
1475  hocr_str += " <div class='ocr_page'";
1476  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1477  hocr_str += " title='image \"";
1478  if (input_file_) {
1479  hocr_str += HOcrEscape(input_file_->string());
1480  } else {
1481  hocr_str += "unknown";
1482  }
1483  hocr_str.add_str_int("\"; bbox ", rect_left_);
1484  hocr_str.add_str_int(" ", rect_top_);
1485  hocr_str.add_str_int(" ", rect_width_);
1486  hocr_str.add_str_int(" ", rect_height_);
1487  hocr_str.add_str_int("; ppageno ", page_number);
1488  hocr_str += "'>\n";
1489 
1490  ResultIterator *res_it = GetIterator();
1491  while (!res_it->Empty(RIL_BLOCK)) {
1492  if (res_it->Empty(RIL_WORD)) {
1493  res_it->Next(RIL_WORD);
1494  continue;
1495  }
1496 
1497  // Open any new block/paragraph/textline.
1498  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1499  para_is_ltr = true; // reset to default direction
1500  hocr_str += " <div class='ocr_carea'";
1501  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1502  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1503  }
1504  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1505  hocr_str += "\n <p class='ocr_par'";
1506  para_is_ltr = res_it->ParagraphIsLtr();
1507  if (!para_is_ltr) {
1508  hocr_str += " dir='rtl'";
1509  }
1510  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1511  paragraph_lang = res_it->WordRecognitionLanguage();
1512  if (paragraph_lang) {
1513  hocr_str += " lang='";
1514  hocr_str += paragraph_lang;
1515  hocr_str += "'";
1516  }
1517  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1518  }
1519  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1520  hocr_str += "\n <span class='ocr_line'";
1521  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1522  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1523  }
1524 
1525  // Now, process the word...
1526  hocr_str += "<span class='ocrx_word'";
1527  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1528  int left, top, right, bottom;
1529  bool bold, italic, underlined, monospace, serif, smallcaps;
1530  int pointsize, font_id;
1531  const char *font_name;
1532  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1533  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1534  &monospace, &serif, &smallcaps,
1535  &pointsize, &font_id);
1536  hocr_str.add_str_int(" title='bbox ", left);
1537  hocr_str.add_str_int(" ", top);
1538  hocr_str.add_str_int(" ", right);
1539  hocr_str.add_str_int(" ", bottom);
1540  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1541  if (font_info) {
1542  if (font_name) {
1543  hocr_str += "; x_font ";
1544  hocr_str += HOcrEscape(font_name);
1545  }
1546  hocr_str.add_str_int("; x_fsize ", pointsize);
1547  }
1548  hocr_str += "'";
1549  const char* lang = res_it->WordRecognitionLanguage();
1550  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
1551  hocr_str += " lang='";
1552  hocr_str += lang;
1553  hocr_str += "'";
1554  }
1555  switch (res_it->WordDirection()) {
1556  // Only emit direction if different from current paragraph direction
1557  case DIR_LEFT_TO_RIGHT:
1558  if (!para_is_ltr) hocr_str += " dir='ltr'";
1559  break;
1560  case DIR_RIGHT_TO_LEFT:
1561  if (para_is_ltr) hocr_str += " dir='rtl'";
1562  break;
1563  case DIR_MIX:
1564  case DIR_NEUTRAL:
1565  default: // Do nothing.
1566  break;
1567  }
1568  hocr_str += ">";
1569  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1570  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1571  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1572  if (bold) hocr_str += "<strong>";
1573  if (italic) hocr_str += "<em>";
1574  do {
1575  const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
1576  if (grapheme && grapheme[0] != 0) {
1577  hocr_str += HOcrEscape(grapheme);
1578  }
1579  delete []grapheme;
1580  res_it->Next(RIL_SYMBOL);
1581  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1582  if (italic) hocr_str += "</em>";
1583  if (bold) hocr_str += "</strong>";
1584  hocr_str += "</span> ";
1585  wcnt++;
1586  // Close any ending block/paragraph/textline.
1587  if (last_word_in_line) {
1588  hocr_str += "\n </span>";
1589  lcnt++;
1590  }
1591  if (last_word_in_para) {
1592  hocr_str += "\n </p>\n";
1593  pcnt++;
1594  para_is_ltr = true; // back to default direction
1595  }
1596  if (last_word_in_block) {
1597  hocr_str += " </div>\n";
1598  bcnt++;
1599  }
1600  }
1601  hocr_str += " </div>\n";
1602 
1603  char *ret = new char[hocr_str.length() + 1];
1604  strcpy(ret, hocr_str.string());
1605  delete res_it;
1606  return ret;
1607 }
1608 
1613 char* TessBaseAPI::GetTSVText(int page_number) {
1614  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
1615  return NULL;
1616 
1617  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1618  int page_id = page_number + 1; // we use 1-based page numbers.
1619 
1620  STRING tsv_str("");
1621 
1622  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
1623  word_num = 0;
1624 
1625  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1626  tsv_str.add_str_int("\t", block_num);
1627  tsv_str.add_str_int("\t", par_num);
1628  tsv_str.add_str_int("\t", line_num);
1629  tsv_str.add_str_int("\t", word_num);
1630  tsv_str.add_str_int("\t", rect_left_);
1631  tsv_str.add_str_int("\t", rect_top_);
1632  tsv_str.add_str_int("\t", rect_width_);
1633  tsv_str.add_str_int("\t", rect_height_);
1634  tsv_str += "\t-1\t\n";
1635 
1636  ResultIterator* res_it = GetIterator();
1637  while (!res_it->Empty(RIL_BLOCK)) {
1638  if (res_it->Empty(RIL_WORD)) {
1639  res_it->Next(RIL_WORD);
1640  continue;
1641  }
1642 
1643  // Add rows for any new block/paragraph/textline.
1644  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1645  block_num++, par_num = 0, line_num = 0, word_num = 0;
1646  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1647  tsv_str.add_str_int("\t", block_num);
1648  tsv_str.add_str_int("\t", par_num);
1649  tsv_str.add_str_int("\t", line_num);
1650  tsv_str.add_str_int("\t", word_num);
1651  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1652  tsv_str += "\t-1\t\n"; // end of row for block
1653  }
1654  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1655  par_num++, line_num = 0, word_num = 0;
1656  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1657  tsv_str.add_str_int("\t", block_num);
1658  tsv_str.add_str_int("\t", par_num);
1659  tsv_str.add_str_int("\t", line_num);
1660  tsv_str.add_str_int("\t", word_num);
1661  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1662  tsv_str += "\t-1\t\n"; // end of row for para
1663  }
1664  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1665  line_num++, word_num = 0;
1666  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1667  tsv_str.add_str_int("\t", block_num);
1668  tsv_str.add_str_int("\t", par_num);
1669  tsv_str.add_str_int("\t", line_num);
1670  tsv_str.add_str_int("\t", word_num);
1671  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1672  tsv_str += "\t-1\t\n"; // end of row for line
1673  }
1674 
1675  // Now, process the word...
1676  int left, top, right, bottom;
1677  bool bold, italic, underlined, monospace, serif, smallcaps;
1678  int pointsize, font_id;
1679  const char* font_name;
1680  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1681  font_name =
1682  res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
1683  &serif, &smallcaps, &pointsize, &font_id);
1684  word_num++;
1685  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1686  tsv_str.add_str_int("\t", block_num);
1687  tsv_str.add_str_int("\t", par_num);
1688  tsv_str.add_str_int("\t", line_num);
1689  tsv_str.add_str_int("\t", word_num);
1690  tsv_str.add_str_int("\t", left);
1691  tsv_str.add_str_int("\t", top);
1692  tsv_str.add_str_int("\t", right - left);
1693  tsv_str.add_str_int("\t", bottom - top);
1694  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1695  tsv_str += "\t";
1696 
1697  // Increment counts if at end of block/paragraph/textline.
1698  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1699  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1700  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1701 
1702  do {
1703  tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
1704  res_it->Next(RIL_SYMBOL);
1705  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1706  tsv_str += "\n"; // end of row
1707  wcnt++;
1708  }
1709 
1710  char* ret = new char[tsv_str.length() + 1];
1711  strcpy(ret, tsv_str.string());
1712  delete res_it;
1713  return ret;
1714 }
1715 
1717 const int kNumbersPerBlob = 5;
1722 const int kBytesPerNumber = 5;
1730 const int kBytesPer64BitNumber = 20;
1738  UNICHAR_LEN;
1739 
1745 char* TessBaseAPI::GetBoxText(int page_number) {
1746  if (tesseract_ == NULL ||
1747  (!recognition_done_ && Recognize(NULL) < 0))
1748  return NULL;
1749  int blob_count;
1750  int utf8_length = TextLength(&blob_count);
1751  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1753  char* result = new char[total_length];
1754  result[0] = '\0';
1755  int output_length = 0;
1757  do {
1758  int left, top, right, bottom;
1759  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1760  char* text = it->GetUTF8Text(RIL_SYMBOL);
1761  // Tesseract uses space for recognition failure. Fix to a reject
1762  // character, kTesseractReject so we don't create illegal box files.
1763  for (int i = 0; text[i] != '\0'; ++i) {
1764  if (text[i] == ' ')
1765  text[i] = kTesseractReject;
1766  }
1767  snprintf(result + output_length, total_length - output_length,
1768  "%s %d %d %d %d %d\n",
1769  text, left, image_height_ - bottom,
1770  right, image_height_ - top, page_number);
1771  output_length += strlen(result + output_length);
1772  delete [] text;
1773  // Just in case...
1774  if (output_length + kMaxBytesPerLine > total_length)
1775  break;
1776  }
1777  } while (it->Next(RIL_SYMBOL));
1778  delete it;
1779  return result;
1780 }
1781 
1787 const int kUniChs[] = {
1788  0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1789 };
1791 const int kLatinChs[] = {
1792  0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1793 };
1794 
1801  if (tesseract_ == NULL ||
1802  (!recognition_done_ && Recognize(NULL) < 0))
1803  return NULL;
1804  bool tilde_crunch_written = false;
1805  bool last_char_was_newline = true;
1806  bool last_char_was_tilde = false;
1807 
1808  int total_length = TextLength(NULL);
1809  PAGE_RES_IT page_res_it(page_res_);
1810  char* result = new char[total_length];
1811  char* ptr = result;
1812  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1813  page_res_it.forward()) {
1814  WERD_RES *word = page_res_it.word();
1815  // Process the current word.
1816  if (word->unlv_crunch_mode != CR_NONE) {
1817  if (word->unlv_crunch_mode != CR_DELETE &&
1818  (!tilde_crunch_written ||
1819  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1820  word->word->space() > 0 &&
1821  !word->word->flag(W_FUZZY_NON) &&
1822  !word->word->flag(W_FUZZY_SP)))) {
1823  if (!word->word->flag(W_BOL) &&
1824  word->word->space() > 0 &&
1825  !word->word->flag(W_FUZZY_NON) &&
1826  !word->word->flag(W_FUZZY_SP)) {
1827  /* Write a space to separate from preceding good text */
1828  *ptr++ = ' ';
1829  last_char_was_tilde = false;
1830  }
1831  if (!last_char_was_tilde) {
1832  // Write a reject char.
1833  last_char_was_tilde = true;
1834  *ptr++ = kUNLVReject;
1835  tilde_crunch_written = true;
1836  last_char_was_newline = false;
1837  }
1838  }
1839  } else {
1840  // NORMAL PROCESSING of non tilde crunched words.
1841  tilde_crunch_written = false;
1843  const char* wordstr = word->best_choice->unichar_string().string();
1844  const STRING& lengths = word->best_choice->unichar_lengths();
1845  int length = lengths.length();
1846  int i = 0;
1847  int offset = 0;
1848 
1849  if (last_char_was_tilde &&
1850  word->word->space() == 0 && wordstr[offset] == ' ') {
1851  // Prevent adjacent tilde across words - we know that adjacent tildes
1852  // within words have been removed.
1853  // Skip the first character.
1854  offset = lengths[i++];
1855  }
1856  if (i < length && wordstr[offset] != 0) {
1857  if (!last_char_was_newline)
1858  *ptr++ = ' ';
1859  else
1860  last_char_was_newline = false;
1861  for (; i < length; offset += lengths[i++]) {
1862  if (wordstr[offset] == ' ' ||
1863  wordstr[offset] == kTesseractReject) {
1864  *ptr++ = kUNLVReject;
1865  last_char_was_tilde = true;
1866  } else {
1867  if (word->reject_map[i].rejected())
1868  *ptr++ = kUNLVSuspect;
1869  UNICHAR ch(wordstr + offset, lengths[i]);
1870  int uni_ch = ch.first_uni();
1871  for (int j = 0; kUniChs[j] != 0; ++j) {
1872  if (kUniChs[j] == uni_ch) {
1873  uni_ch = kLatinChs[j];
1874  break;
1875  }
1876  }
1877  if (uni_ch <= 0xff) {
1878  *ptr++ = static_cast<char>(uni_ch);
1879  last_char_was_tilde = false;
1880  } else {
1881  *ptr++ = kUNLVReject;
1882  last_char_was_tilde = true;
1883  }
1884  }
1885  }
1886  }
1887  }
1888  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1889  /* Add a new line output */
1890  *ptr++ = '\n';
1891  tilde_crunch_written = false;
1892  last_char_was_newline = true;
1893  last_char_was_tilde = false;
1894  }
1895  }
1896  *ptr++ = '\n';
1897  *ptr = '\0';
1898  return result;
1899 }
1900 
1909 bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) {
1910  OSResults osr;
1911 
1912  bool osd = DetectOS(&osr);
1913  if (!osd) {
1914  return false;
1915  }
1916 
1917  int orient_id = osr.best_result.orientation_id;
1918  int script_id = osr.get_best_script(orient_id);
1919  if (orient_conf)
1920  *orient_conf = osr.best_result.oconfidence;
1921  if (orient_deg)
1922  *orient_deg = orient_id * 90; // convert quadrant to degrees
1923 
1924  if (script_name) {
1925  const char* script =
1926  osr.unicharset->get_script_from_script_id(script_id);
1927 
1928  *script_name = script;
1929  }
1930 
1931  if (script_conf)
1932  *script_conf = osr.best_result.sconfidence;
1933 
1934  return true;
1935 }
1936 
1942 char* TessBaseAPI::GetOsdText(int page_number) {
1943  int orient_deg;
1944  float orient_conf;
1945  const char* script_name;
1946  float script_conf;
1947 
1948  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf))
1949  return NULL;
1950 
1951  // clockwise rotation needed to make the page upright
1952  int rotate = OrientationIdToValue(orient_deg / 90);
1953 
1954  const int kOsdBufsize = 255;
1955  char* osd_buf = new char[kOsdBufsize];
1956  snprintf(osd_buf, kOsdBufsize,
1957  "Page number: %d\n"
1958  "Orientation in degrees: %d\n"
1959  "Rotate: %d\n"
1960  "Orientation confidence: %.2f\n"
1961  "Script: %s\n"
1962  "Script confidence: %.2f\n",
1963  page_number, orient_deg, rotate, orient_conf, script_name,
1964  script_conf);
1965 
1966  return osd_buf;
1967 }
1968 
1971  int* conf = AllWordConfidences();
1972  if (!conf) return 0;
1973  int sum = 0;
1974  int *pt = conf;
1975  while (*pt >= 0) sum += *pt++;
1976  if (pt != conf) sum /= pt - conf;
1977  delete [] conf;
1978  return sum;
1979 }
1980 
1983  if (tesseract_ == NULL ||
1984  (!recognition_done_ && Recognize(NULL) < 0))
1985  return NULL;
1986  int n_word = 0;
1987  PAGE_RES_IT res_it(page_res_);
1988  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1989  n_word++;
1990 
1991  int* conf = new int[n_word+1];
1992  n_word = 0;
1993  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1994  WERD_RES *word = res_it.word();
1995  WERD_CHOICE* choice = word->best_choice;
1996  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1997  // This is the eq for converting Tesseract confidence to 1..100
1998  if (w_conf < 0) w_conf = 0;
1999  if (w_conf > 100) w_conf = 100;
2000  conf[n_word++] = w_conf;
2001  }
2002  conf[n_word] = -1;
2003  return conf;
2004 }
2005 
2016 bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
2017  int debug = 0;
2018  GetIntVariable("applybox_debug", &debug);
2019  bool success = true;
2020  PageSegMode current_psm = GetPageSegMode();
2021  SetPageSegMode(mode);
2022  SetVariable("classify_enable_learning", "0");
2023  char* text = GetUTF8Text();
2024  if (debug) {
2025  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
2026  }
2027  if (text != NULL) {
2028  PAGE_RES_IT it(page_res_);
2029  WERD_RES* word_res = it.word();
2030  if (word_res != NULL) {
2031  word_res->word->set_text(wordstr);
2032  } else {
2033  success = false;
2034  }
2035  // Check to see if text matches wordstr.
2036  int w = 0;
2037  int t = 0;
2038  for (t = 0; text[t] != '\0'; ++t) {
2039  if (text[t] == '\n' || text[t] == ' ')
2040  continue;
2041  while (wordstr[w] != '\0' && wordstr[w] == ' ')
2042  ++w;
2043  if (text[t] != wordstr[w])
2044  break;
2045  ++w;
2046  }
2047  if (text[t] != '\0' || wordstr[w] != '\0') {
2048  // No match.
2049  delete page_res_;
2050  GenericVector<TBOX> boxes;
2054  PAGE_RES_IT pr_it(page_res_);
2055  if (pr_it.word() == NULL)
2056  success = false;
2057  else
2058  word_res = pr_it.word();
2059  } else {
2060  word_res->BestChoiceToCorrectText();
2061  }
2062  if (success) {
2063  tesseract_->EnableLearning = true;
2064  tesseract_->LearnWord(NULL, word_res);
2065  }
2066  delete [] text;
2067  } else {
2068  success = false;
2069  }
2070  SetPageSegMode(current_psm);
2071  return success;
2072 }
2073 
2081  if (thresholder_ != NULL)
2082  thresholder_->Clear();
2083  ClearResults();
2084  if (tesseract_ != NULL) SetInputImage(NULL);
2085 }
2086 
2094  Clear();
2095  if (thresholder_ != NULL) {
2096  delete thresholder_;
2097  thresholder_ = NULL;
2098  }
2099  if (page_res_ != NULL) {
2100  delete page_res_;
2101  page_res_ = NULL;
2102  }
2103  if (block_list_ != NULL) {
2104  delete block_list_;
2105  block_list_ = NULL;
2106  }
2107  if (paragraph_models_ != NULL) {
2109  delete paragraph_models_;
2110  paragraph_models_ = NULL;
2111  }
2112  if (tesseract_ != NULL) {
2113  delete tesseract_;
2114  if (osd_tesseract_ == tesseract_)
2115  osd_tesseract_ = NULL;
2116  tesseract_ = NULL;
2117  }
2118  if (osd_tesseract_ != NULL) {
2119  delete osd_tesseract_;
2120  osd_tesseract_ = NULL;
2121  }
2122  if (equ_detect_ != NULL) {
2123  delete equ_detect_;
2124  equ_detect_ = NULL;
2125  }
2126  if (input_file_ != NULL) {
2127  delete input_file_;
2128  input_file_ = NULL;
2129  }
2130  if (output_file_ != NULL) {
2131  delete output_file_;
2132  output_file_ = NULL;
2133  }
2134  if (datapath_ != NULL) {
2135  delete datapath_;
2136  datapath_ = NULL;
2137  }
2138  if (language_ != NULL) {
2139  delete language_;
2140  language_ = NULL;
2141  }
2142 }
2143 
2144 // Clear any library-level memory caches.
2145 // There are a variety of expensive-to-load constant data structures (mostly
2146 // language dictionaries) that are cached globally -- surviving the Init()
2147 // and End() of individual TessBaseAPI's. This function allows the clearing
2148 // of these caches.
2151 }
2152 
2157 int TessBaseAPI::IsValidWord(const char *word) {
2158  return tesseract_->getDict().valid_word(word);
2159 }
2160 // Returns true if utf8_character is defined in the UniCharset.
2161 bool TessBaseAPI::IsValidCharacter(const char *utf8_character) {
2162  return tesseract_->unicharset.contains_unichar(utf8_character);
2163 }
2164 
2165 
2166 // TODO(rays) Obsolete this function and replace with a more aptly named
2167 // function that returns image coordinates rather than tesseract coordinates.
2168 bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) {
2169  PageIterator* it = AnalyseLayout();
2170  if (it == NULL) {
2171  return false;
2172  }
2173  int x1, x2, y1, y2;
2174  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2175  // Calculate offset and slope (NOTE: Kind of ugly)
2176  if (x2 <= x1) x2 = x1 + 1;
2177  // Convert the point pair to slope/offset of the baseline (in image coords.)
2178  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2179  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2180  // Get the y-coord of the baseline at the left and right edges of the
2181  // textline's bounding box.
2182  int left, top, right, bottom;
2183  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2184  delete it;
2185  return false;
2186  }
2187  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2188  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2189  // Shift the baseline down so it passes through the nearest bottom-corner
2190  // of the textline's bounding box. This is the difference between the y
2191  // at the lowest (max) edge of the box and the actual box bottom.
2192  *out_offset += bottom - MAX(left_y, right_y);
2193  // Switch back to bottom-up tesseract coordinates. Requires negation of
2194  // the slope and height - offset for the offset.
2195  *out_slope = -*out_slope;
2196  *out_offset = rect_height_ - *out_offset;
2197  delete it;
2198 
2199  return true;
2200 }
2201 
2204  if (tesseract_ != NULL) {
2206  }
2207 }
2208 
2218  if (tesseract_ != NULL) {
2220  // Set it for the sublangs too.
2221  int num_subs = tesseract_->num_sub_langs();
2222  for (int i = 0; i < num_subs; ++i) {
2224  }
2225  }
2226 }
2227 
2230  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2231 }
2232 
2235  if (tesseract_ == NULL) {
2236  tprintf("Please call Init before attempting to set an image.");
2237  return false;
2238  }
2239  if (thresholder_ == NULL)
2241  ClearResults();
2242  return true;
2243 }
2244 
2251 void TessBaseAPI::Threshold(Pix** pix) {
2252  ASSERT_HOST(pix != NULL);
2253  if (*pix != NULL)
2254  pixDestroy(pix);
2255  // Zero resolution messes up the algorithms, so make sure it is credible.
2256  int y_res = thresholder_->GetScaledYResolution();
2257  if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
2258  // Use the minimum default resolution, as it is safer to under-estimate
2259  // than over-estimate resolution.
2260  tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n",
2261  y_res, kMinCredibleResolution);
2263  }
2264  PageSegMode pageseg_mode =
2265  static_cast<PageSegMode>(
2266  static_cast<int>(tesseract_->tessedit_pageseg_mode));
2267  thresholder_->ThresholdToPix(pageseg_mode, pix);
2271  if (!thresholder_->IsBinary()) {
2274  } else {
2276  tesseract_->set_pix_grey(NULL);
2277  }
2278  // Set the internal resolution that is used for layout parameters from the
2279  // estimated resolution, rather than the image resolution, which may be
2280  // fabricated, but we will use the image resolution, if there is one, to
2281  // report output point sizes.
2282  int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(),
2285  if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
2286  tprintf("Estimated resolution %d out of range! Corrected to %d\n",
2287  thresholder_->GetScaledEstimatedResolution(), estimated_res);
2288  }
2289  tesseract_->set_source_resolution(estimated_res);
2290  SavePixForCrash(estimated_res, *pix);
2291 }
2292 
2295  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
2296  tprintf("Please call SetImage before attempting recognition.");
2297  return -1;
2298  }
2299  if (recognition_done_)
2300  ClearResults();
2301  if (!block_list_->empty()) {
2302  return 0;
2303  }
2304  if (tesseract_ == NULL) {
2305  tesseract_ = new Tesseract;
2307  }
2308  if (tesseract_->pix_binary() == NULL)
2310  if (tesseract_->ImageWidth() > MAX_INT16 ||
2312  tprintf("Image too large: (%d, %d)\n",
2314  return -1;
2315  }
2316 
2318 
2320  if (equ_detect_ == NULL && datapath_ != NULL) {
2321  equ_detect_ = new EquationDetect(datapath_->string(), NULL);
2322  }
2324  }
2325 
2326  Tesseract* osd_tess = osd_tesseract_;
2327  OSResults osr;
2328  if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) {
2329  if (strcmp(language_->string(), "osd") == 0) {
2330  osd_tess = tesseract_;
2331  } else {
2332  osd_tesseract_ = new Tesseract;
2334  datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY,
2335  NULL, 0, NULL, NULL, false) == 0) {
2336  osd_tess = osd_tesseract_;
2339  } else {
2340  tprintf("Warning: Auto orientation and script detection requested,"
2341  " but osd language failed to load\n");
2342  delete osd_tesseract_;
2343  osd_tesseract_ = NULL;
2344  }
2345  }
2346  }
2347 
2348  if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0)
2349  return -1;
2350  // If Devanagari is being recognized, we use different images for page seg
2351  // and for OCR.
2352  tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr);
2353  return 0;
2354 }
2355 
2358  if (tesseract_ != NULL) {
2359  tesseract_->Clear();
2360  }
2361  if (page_res_ != NULL) {
2362  delete page_res_;
2363  page_res_ = NULL;
2364  }
2365  recognition_done_ = false;
2366  if (block_list_ == NULL)
2367  block_list_ = new BLOCK_LIST;
2368  else
2369  block_list_->clear();
2370  if (paragraph_models_ != NULL) {
2372  delete paragraph_models_;
2373  paragraph_models_ = NULL;
2374  }
2375  SavePixForCrash(0, NULL);
2376 }
2377 
2385 int TessBaseAPI::TextLength(int* blob_count) {
2386  if (tesseract_ == NULL || page_res_ == NULL)
2387  return 0;
2388 
2389  PAGE_RES_IT page_res_it(page_res_);
2390  int total_length = 2;
2391  int total_blobs = 0;
2392  // Iterate over the data structures to extract the recognition result.
2393  for (page_res_it.restart_page(); page_res_it.word () != NULL;
2394  page_res_it.forward()) {
2395  WERD_RES *word = page_res_it.word();
2396  WERD_CHOICE* choice = word->best_choice;
2397  if (choice != NULL) {
2398  total_blobs += choice->length() + 2;
2399  total_length += choice->unichar_string().length() + 2;
2400  for (int i = 0; i < word->reject_map.length(); ++i) {
2401  if (word->reject_map[i].rejected())
2402  ++total_length;
2403  }
2404  }
2405  }
2406  if (blob_count != NULL)
2407  *blob_count = total_blobs;
2408  return total_length;
2409 }
2410 
2416  if (tesseract_ == NULL)
2417  return false;
2418  ClearResults();
2419  if (tesseract_->pix_binary() == NULL)
2421  if (input_file_ == NULL)
2422  input_file_ = new STRING(kInputFile);
2424 }
2425 
2427  tesseract_->min_orientation_margin.set_value(margin);
2428 }
2429 
2444 void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
2445  bool** vertical_writing) {
2446  delete[] *block_orientation;
2447  *block_orientation = NULL;
2448  delete[] *vertical_writing;
2449  *vertical_writing = NULL;
2450  BLOCK_IT block_it(block_list_);
2451 
2452  block_it.move_to_first();
2453  int num_blocks = 0;
2454  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2455  if (!block_it.data()->poly_block()->IsText()) {
2456  continue;
2457  }
2458  ++num_blocks;
2459  }
2460  if (!num_blocks) {
2461  tprintf("WARNING: Found no blocks\n");
2462  return;
2463  }
2464  *block_orientation = new int[num_blocks];
2465  *vertical_writing = new bool[num_blocks];
2466  block_it.move_to_first();
2467  int i = 0;
2468  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2469  block_it.forward()) {
2470  if (!block_it.data()->poly_block()->IsText()) {
2471  continue;
2472  }
2473  FCOORD re_rotation = block_it.data()->re_rotation();
2474  float re_theta = re_rotation.angle();
2475  FCOORD classify_rotation = block_it.data()->classify_rotation();
2476  float classify_theta = classify_rotation.angle();
2477  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2478  if (rot_theta < 0) rot_theta += 4;
2479  int num_rotations = static_cast<int>(rot_theta + 0.5);
2480  (*block_orientation)[i] = num_rotations;
2481  // The classify_rotation is non-zero only if the text has vertical
2482  // writing direction.
2483  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2484  ++i;
2485  }
2486 }
2487 
2488 // ____________________________________________________________________________
2489 // Ocropus add-ons.
2490 
2493  FindLines();
2494  BLOCK_LIST* result = block_list_;
2495  block_list_ = NULL;
2496  return result;
2497 }
2498 
2504 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
2505  delete block_list;
2506 }
2507 
2508 
2510  float xheight,
2511  float descender,
2512  float ascender) {
2513  inT32 xstarts[] = {-32000};
2514  double quad_coeffs[] = {0, 0, baseline};
2515  return new ROW(1,
2516  xstarts,
2517  quad_coeffs,
2518  xheight,
2519  ascender - (baseline + xheight),
2520  descender - baseline,
2521  0,
2522  0);
2523 }
2524 
2527  int width = pixGetWidth(pix);
2528  int height = pixGetHeight(pix);
2529  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2530 
2531  // Create C_BLOBs from the page
2532  extract_edges(pix, &block);
2533 
2534  // Merge all C_BLOBs
2535  C_BLOB_LIST *list = block.blob_list();
2536  C_BLOB_IT c_blob_it(list);
2537  if (c_blob_it.empty())
2538  return NULL;
2539  // Move all the outlines to the first blob.
2540  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2541  for (c_blob_it.forward();
2542  !c_blob_it.at_first();
2543  c_blob_it.forward()) {
2544  C_BLOB *c_blob = c_blob_it.data();
2545  ol_it.add_list_after(c_blob->out_list());
2546  }
2547  // Convert the first blob to the output TBLOB.
2548  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2549 }
2550 
2556 void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) {
2557  TBOX box = tblob->bounding_box();
2558  float x_center = (box.left() + box.right()) / 2.0f;
2559  float baseline = row->base_line(x_center);
2560  float scale = kBlnXHeight / row->x_height();
2561  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2562  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2563 }
2564 
2569 TBLOB *make_tesseract_blob(float baseline, float xheight,
2570  float descender, float ascender,
2571  bool numeric_mode, Pix* pix) {
2572  TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
2573 
2574  // Normalize TBLOB
2575  ROW *row =
2576  TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender);
2577  TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
2578  delete row;
2579  return tblob;
2580 }
2581 
2587 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr,
2588  int length,
2589  float baseline,
2590  float xheight,
2591  float descender,
2592  float ascender) {
2593  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
2594  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
2596  tesseract_->pix_binary());
2597  float threshold;
2598  float best_rating = -100;
2599 
2600 
2601  // Classify to get a raw choice.
2602  BLOB_CHOICE_LIST choices;
2603  tesseract_->AdaptiveClassifier(blob, &choices);
2604  BLOB_CHOICE_IT choice_it;
2605  choice_it.set_to_list(&choices);
2606  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2607  choice_it.forward()) {
2608  if (choice_it.data()->rating() > best_rating) {
2609  best_rating = choice_it.data()->rating();
2610  }
2611  }
2612 
2613  threshold = tesseract_->matcher_good_threshold;
2614 
2615  if (blob->outlines)
2616  tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold,
2618  delete blob;
2619 }
2620 
2621 
2622 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
2623  PAGE_RES *page_res = new PAGE_RES(false, block_list,
2625  tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
2626  return page_res;
2627 }
2628 
2629 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
2630  PAGE_RES* pass1_result) {
2631  if (!pass1_result)
2632  pass1_result = new PAGE_RES(false, block_list,
2634  tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
2635  return pass1_result;
2636 }
2637 
2638 void TessBaseAPI::DetectParagraphs(bool after_text_recognition) {
2639  int debug_level = 0;
2640  GetIntVariable("paragraph_debug_level", &debug_level);
2641  if (paragraph_models_ == NULL)
2643  MutableIterator *result_it = GetMutableIterator();
2644  do { // Detect paragraphs for this block
2646  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
2647  result_it, &models);
2648  *paragraph_models_ += models;
2649  } while (result_it->Next(RIL_BLOCK));
2650  delete result_it;
2651 }
2652 
2655  int length; // of unicode_repr
2656  float cost;
2658 
2659  TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) {
2660  length = (len == -1 ? strlen(repr) : len);
2661  unicode_repr = new char[length + 1];
2662  strncpy(unicode_repr, repr, length);
2663  }
2664 
2665  TESS_CHAR() { // Satisfies ELISTIZE.
2666  }
2668  delete [] unicode_repr;
2669  }
2670 };
2671 
2672 ELISTIZEH(TESS_CHAR)
2673 ELISTIZE(TESS_CHAR)
2674 
2675 static void add_space(TESS_CHAR_IT* it) {
2676  TESS_CHAR *t = new TESS_CHAR(0, " ");
2677  it->add_after_then_move(t);
2678 }
2679 
2680 
2681 static float rating_to_cost(float rating) {
2682  rating = 100 + rating;
2683  // cuddled that to save from coverage profiler
2684  // (I have never seen ratings worse than -100,
2685  // but the check won't hurt)
2686  if (rating < 0) rating = 0;
2687  return rating;
2688 }
2689 
2694 static void extract_result(TESS_CHAR_IT* out,
2695  PAGE_RES* page_res) {
2696  PAGE_RES_IT page_res_it(page_res);
2697  int word_count = 0;
2698  while (page_res_it.word() != NULL) {
2699  WERD_RES *word = page_res_it.word();
2700  const char *str = word->best_choice->unichar_string().string();
2701  const char *len = word->best_choice->unichar_lengths().string();
2702  TBOX real_rect = word->word->bounding_box();
2703 
2704  if (word_count)
2705  add_space(out);
2706  int n = strlen(len);
2707  for (int i = 0; i < n; i++) {
2708  TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()),
2709  str, *len);
2710  tc->box = real_rect.intersection(word->box_word->BlobBox(i));
2711  out->add_after_then_move(tc);
2712  str += *len;
2713  len++;
2714  }
2715  page_res_it.forward();
2716  word_count++;
2717  }
2718 }
2719 
2725  int** lengths,
2726  float** costs,
2727  int** x0,
2728  int** y0,
2729  int** x1,
2730  int** y1,
2731  PAGE_RES* page_res) {
2732  TESS_CHAR_LIST tess_chars;
2733  TESS_CHAR_IT tess_chars_it(&tess_chars);
2734  extract_result(&tess_chars_it, page_res);
2735  tess_chars_it.move_to_first();
2736  int n = tess_chars.length();
2737  int text_len = 0;
2738  *lengths = new int[n];
2739  *costs = new float[n];
2740  *x0 = new int[n];
2741  *y0 = new int[n];
2742  *x1 = new int[n];
2743  *y1 = new int[n];
2744  int i = 0;
2745  for (tess_chars_it.mark_cycle_pt();
2746  !tess_chars_it.cycled_list();
2747  tess_chars_it.forward(), i++) {
2748  TESS_CHAR *tc = tess_chars_it.data();
2749  text_len += (*lengths)[i] = tc->length;
2750  (*costs)[i] = tc->cost;
2751  (*x0)[i] = tc->box.left();
2752  (*y0)[i] = tc->box.bottom();
2753  (*x1)[i] = tc->box.right();
2754  (*y1)[i] = tc->box.top();
2755  }
2756  char *p = *text = new char[text_len];
2757 
2758  tess_chars_it.move_to_first();
2759  for (tess_chars_it.mark_cycle_pt();
2760  !tess_chars_it.cycled_list();
2761  tess_chars_it.forward()) {
2762  TESS_CHAR *tc = tess_chars_it.data();
2763  strncpy(p, tc->unicode_repr, tc->length);
2764  p += tc->length;
2765  }
2766  return n;
2767 }
2768 
2770 // The resulting features are returned in int_features, which must be
2771 // of size MAX_NUM_INT_FEATURES. The number of features is returned in
2772 // num_features (or 0 if there was a failure).
2773 // On return feature_outline_index is filled with an index of the outline
2774 // corresponding to each feature in int_features.
2775 // TODO(rays) Fix the caller to out outline_counts instead.
2777  INT_FEATURE_STRUCT* int_features,
2778  int* num_features,
2779  int* feature_outline_index) {
2780  GenericVector<int> outline_counts;
2783  INT_FX_RESULT_STRUCT fx_info;
2784  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2785  &cn_features, &fx_info, &outline_counts);
2786  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2787  *num_features = 0;
2788  return; // Feature extraction failed.
2789  }
2790  *num_features = cn_features.size();
2791  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2792  // TODO(rays) Pass outline_counts back and simplify the calling code.
2793  if (feature_outline_index != NULL) {
2794  int f = 0;
2795  for (int i = 0; i < outline_counts.size(); ++i) {
2796  while (f < outline_counts[i])
2797  feature_outline_index[f++] = i;
2798  }
2799  }
2800 }
2801 
2802 // This method returns the row to which a box of specified dimensions would
2803 // belong. If no good match is found, it returns NULL.
2804 ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
2805  int left, int top, int right, int bottom) {
2806  TBOX box(left, bottom, right, top);
2807  BLOCK_IT b_it(blocks);
2808  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2809  BLOCK* block = b_it.data();
2810  if (!box.major_overlap(block->bounding_box()))
2811  continue;
2812  ROW_IT r_it(block->row_list());
2813  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2814  ROW* row = r_it.data();
2815  if (!box.major_overlap(row->bounding_box()))
2816  continue;
2817  WERD_IT w_it(row->word_list());
2818  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2819  WERD* word = w_it.data();
2820  if (box.major_overlap(word->bounding_box()))
2821  return row;
2822  }
2823  }
2824  }
2825  return NULL;
2826 }
2827 
2830  int num_max_matches,
2831  int* unichar_ids,
2832  float* ratings,
2833  int* num_matches_returned) {
2834  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2835  tesseract_->AdaptiveClassifier(blob, choices);
2836  BLOB_CHOICE_IT choices_it(choices);
2837  int& index = *num_matches_returned;
2838  index = 0;
2839  for (choices_it.mark_cycle_pt();
2840  !choices_it.cycled_list() && index < num_max_matches;
2841  choices_it.forward()) {
2842  BLOB_CHOICE* choice = choices_it.data();
2843  unichar_ids[index] = choice->unichar_id();
2844  ratings[index] = choice->rating();
2845  ++index;
2846  }
2847  *num_matches_returned = index;
2848  delete choices;
2849 }
2850 
2852 const char* TessBaseAPI::GetUnichar(int unichar_id) {
2853  return tesseract_->unicharset.id_to_unichar(unichar_id);
2854 }
2855 
2857 const Dawg *TessBaseAPI::GetDawg(int i) const {
2858  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2859  return tesseract_->getDict().GetDawg(i);
2860 }
2861 
2864  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2865 }
2866 
2867 #ifndef NO_CUBE_BUILD
2868 
2870  return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
2871 }
2872 #endif // NO_CUBE_BUILD
2873 
2875 STRING HOcrEscape(const char* text) {
2876  STRING ret;
2877  const char *ptr;
2878  for (ptr = text; *ptr; ptr++) {
2879  switch (*ptr) {
2880  case '<': ret += "&lt;"; break;
2881  case '>': ret += "&gt;"; break;
2882  case '&': ret += "&amp;"; break;
2883  case '"': ret += "&quot;"; break;
2884  case '\'': ret += "&#39;"; break;
2885  default: ret += *ptr;
2886  }
2887  }
2888  return ret;
2889 }
2890 
2891 } // namespace tesseract.
Definition: werd.h:36
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
const char * kOldVarsFile
Definition: baseapi.cpp:99
OcrEngineMode oem() const
Definition: baseapi.h:761
const int kMinRectSize
Definition: baseapi.cpp:84
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
void * cancel_this
called whenever progress increases
Definition: ocrclass.h:127
static ROW * MakeTessOCRRow(float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2509
const char * GetInitLanguagesAsString() const
Definition: baseapi.cpp:344
void DeleteUnusedDawgs()
Definition: dawg_cache.h:46
virtual R Run()=0
ELISTIZEH(AmbigSpec)
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1730
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
int IsValidWord(const char *word)
Definition: baseapi.cpp:2157
void SavePixForCrash(int resolution, Pix *pix)
Definition: globaloc.cpp:34
int UNICHAR_ID
Definition: unichar.h:33
const char * GetUnichar(int unichar_id)
Definition: baseapi.cpp:2852
CubeRecoContext * GetCubeRecoContext() const
Definition: baseapi.cpp:2869
Orientation and script detection only.
Definition: publictypes.h:152
virtual void Run(A1, A2, A3, A4)=0
TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
Definition: baseapi.cpp:2587
#define TRUE
Definition: capi.h:45
Tesseract * tesseract() const
Definition: baseapi.h:759
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2234
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:465
const char kUNLVReject
Definition: baseapi.cpp:88
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
Definition: params.cpp:179
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:305
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:938
Pix * pix_original() const
void set_pix_grey(Pix *grey_pix)
void SetEquationDetect(EquationDetect *detector)
STRING lang
Definition: ccutil.h:67
inT16 bottom() const
Definition: rect.h:61
float Confidence(PageIteratorLevel level) const
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:364
#define MAX(x, y)
Definition: ndminx.h:24
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
STRING * language_
Last initialized language.
Definition: baseapi.h:875
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:97
void ClearAdaptiveClassifier()
Definition: baseapi.cpp:507
ResultIterator * GetIterator()
Definition: baseapi.cpp:1268
int GetSourceYResolution() const
Definition: thresholder.h:90
bool GetDoubleVariable(const char *name, double *value) const
Definition: baseapi.cpp:248
ELISTIZE(AmbigSpec)
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
static ROW * FindRowForBox(BLOCK_LIST *blocks, int left, int top, int right, int bottom)
Definition: baseapi.cpp:2804
const int kMaxIntSize
Definition: baseapi.cpp:101
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
Definition: tessedit.cpp:290
uinT8 space()
Definition: werd.h:104
Definition: strngs.h:44
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:354
int InitLangMod(const char *datapath, const char *language)
Definition: baseapi.cpp:420
virtual Pix * GetPixRectThresholds()
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:226
TESSLINE * outlines
Definition: blobs.h:377
const int kLatinChs[]
Definition: baseapi.cpp:1791
static size_t getOpenCLDevice(void **device)
Definition: baseapi.cpp:160
Boxa * GetConnectedComponents(Pixa **cc)
Definition: baseapi.cpp:627
OSBestResult best_result
Definition: osdetect.h:79
float oconfidence
Definition: osdetect.h:44
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
#define PERF_COUNT_END
float rating() const
Definition: ratngs.h:79
void BestChoiceToCorrectText()
Definition: pageres.cpp:917
bool BeginDocument(const char *title)
Definition: renderer.cpp:69
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:458
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:594
TruthCallback * truth_cb_
Definition: baseapi.h:878
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const STRING & unichar_lengths() const
Definition: ratngs.h:532
Definition: ocrrow.h:32
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
static void CatchSignals()
Definition: baseapi.cpp:180
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
Definition: werd.h:60
const int kNumbersPerBlob
Definition: baseapi.cpp:1717
int inT32
Definition: host.h:102
float x_height() const
Definition: ocrrow.h:61
const char * WordRecognitionLanguage() const
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:866
#define MAX_INT16
Definition: host.h:119
unsigned char uinT8
Definition: host.h:99
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
Definition: points.h:189
bool AdaptToWordStr(PageSegMode mode, const char *wordstr)
Definition: baseapi.cpp:2016
void set_source_resolution(int ppi)
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1251
int GetScaledEstimatedResolution() const
Definition: thresholder.h:106
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2875
bool empty() const
Definition: genericvector.h:84
const char * kInputFile
Definition: baseapi.cpp:95
static void DeleteBlockList(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2504
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
bool GetVariableAsString(const char *name, STRING *val)
Definition: baseapi.cpp:257
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
Pix * pix_binary() const
WERD * word
Definition: pageres.h:175
tesseract::BoxWord * box_word
Definition: pageres.h:250
TBOX bounding_box() const
Definition: blobs.cpp:482
int length() const
Definition: ratngs.h:301
WERD_RES * word() const
Definition: pageres.h:736
const char * string() const
Definition: params.h:203
WERD_LIST * word_list()
Definition: ocrrow.h:52
const int kMaxCredibleResolution
Definition: baseapi.cpp:108
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2385
void SetDictFunc(DictFunc f)
Definition: baseapi.cpp:2203
WERD_RES * forward()
Definition: pageres.h:716
bool wordrec_run_blamer
Definition: wordrec.h:168
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:114
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Pix * pix_grey() const
TBOX bounding_box() const
Definition: ocrrow.h:85
const int kBytesPerNumber
Definition: baseapi.cpp:1722
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
Definition: tessedit.cpp:465
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:75
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:242
const int kMinCredibleResolution
Definition: baseapi.cpp:106
char * TesseractRect(const unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height)
Definition: baseapi.cpp:485
virtual ~TessBaseAPI()
Definition: baseapi.cpp:133
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
Definition: params.cpp:141
#define tprintf(...)
Definition: tprintf.h:31
GenericVector< IntParam * > int_params
Definition: params.h:44
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1186
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:565
REJMAP reject_map
Definition: pageres.h:271
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:801
void SetSourceResolution(int ppi)
Definition: baseapi.cpp:531
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:85
void set_min_orientation_margin(double margin)
Definition: baseapi.cpp:2426
const TBOX & BlobBox(int index) const
Definition: boxword.h:86
void assign(const char *cstr, int len)
Definition: strngs.cpp:425
void set_pix_thresholds(Pix *thresholds)
int size() const
Definition: genericvector.h:72
ROW_RES * row() const
Definition: pageres.h:739
TBOX bounding_box() const
Definition: werd.cpp:160
int(Dict::* DictFunc)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:83
float base_line(float xpos) const
Definition: ocrrow.h:56
Definition: rect.h:30
Dict & getDict()
Definition: classify.h:65
void SetProbabilityInContextFunc(ProbabilityInContextFunc f)
Definition: baseapi.cpp:2217
void SetRectangle(int left, int top, int width, int height)
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1285
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
bool classify_bln_numeric_mode
Definition: classify.h:500
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
const char * c_str() const
Definition: strngs.cpp:212
BLOCK_LIST * FindLinesCreateBlockList()
Definition: baseapi.cpp:2492
#define FALSE
Definition: capi.h:46
void SetInputName(const char *name)
Definition: baseapi.cpp:199
const STRING & unichar_string() const
Definition: ratngs.h:525
CMD_EVENTS mode
Definition: pgedit.cpp:116
TBLOB * make_tesseract_blob(float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
Definition: baseapi.cpp:2569
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:521
static TBLOB * MakeTBLOB(Pix *pix)
Definition: baseapi.cpp:2526
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:132
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
inT16 right() const
Definition: rect.h:75
WERD_RES * restart_page()
Definition: pageres.h:683
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2415
int CubeAPITest(Boxa *boxa_blocks, Pixa *pixa_blocks, Boxa *boxa_words, Pixa *pixa_words, const FCOORD &reskew, Pix *page_pix, PAGE_RES *page_res)
Definition: baseapi.cpp:754
const char kTesseractReject
Definition: baseapi.cpp:86
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
PolyBlockType BlockType() const
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:444
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:191
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
#define PERF_COUNT_SUB(SUB)
inT32 length() const
Definition: strngs.cpp:196
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:420
Pix * GetThresholdedImage()
Definition: baseapi.cpp:569
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
void GetLoadedLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:354
int orientation_id
Definition: osdetect.h:41
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
TESS_CHAR(float _cost, const char *repr, int len=-1)
Definition: baseapi.cpp:2659
struct TessResultRenderer TessResultRenderer
Definition: capi.h:80
virtual void ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:867
virtual bool Next(PageIteratorLevel level)
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
inT16 left() const
Definition: rect.h:68
void add_str_int(const char *str, int number)
Definition: strngs.cpp:384
ROW * row
Definition: pageres.h:127
float y() const
Definition: points.h:212
const int kUniChs[]
Definition: baseapi.cpp:1787
inT32 length() const
Definition: rejctmap.h:236
static const char * Version()
Definition: baseapi.cpp:140
TESS_LOCAL PAGE_RES * RecognitionPass1(BLOCK_LIST *block_list)
Definition: baseapi.cpp:2622
UNICHARSET unicharset
Definition: ccutil.h:70
FILE * init_recog_training(const STRING &fname)
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
float rating() const
Definition: ratngs.h:325
static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode)
Definition: baseapi.cpp:2556
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:886
STRING datadir
Definition: ccutil.h:65
void delete_data_pointers()
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:125
virtual Pix * GetPixRectGrey()
const char kUNLVSuspect
Definition: baseapi.cpp:90
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Boxa * GetStrips(Pixa **pixa, int **blockids)
Definition: baseapi.cpp:608
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198
virtual bool Next(PageIteratorLevel level)
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
GenericVector< BoolParam * > bool_params
Definition: params.h:45
const int kMaxBytesPerLine
Definition: baseapi.cpp:1737
#define PERF_COUNT_START(FUNCT_NAME)
Definition: werd.h:35
void RunAdaptiveClassifier(TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
Definition: baseapi.cpp:2829
const char * GetDatapath()
Definition: baseapi.cpp:948
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2638
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
BLOCK_RES * block() const
Definition: pageres.h:742
static void ClearPersistentCache()
Definition: baseapi.cpp:2149
int RecognizeForChopTest(ETEXT_DESC *monitor)
Definition: baseapi.cpp:906
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:873
ParamsVectors * params()
Definition: ccutil.h:63
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1728
WERD_CHOICE * best_choice
Definition: pageres.h:219
StrongScriptDirection WordDirection() const
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:289
void GetFeaturesForBlob(TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
Definition: baseapi.cpp:2776
bool major_overlap(const TBOX &box) const
Definition: rect.h:358
void read_config_file(const char *filename, SetParamConstraint constraint)
Definition: tessedit.cpp:57
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:730
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:876
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:293
TESS_LOCAL PAGE_RES * RecognitionPass2(BLOCK_LIST *block_list, PAGE_RES *pass1_result)
Definition: baseapi.cpp:2629
Tesseract * get_sub_lang(int index) const
void InitAdaptiveClassifier(bool load_pre_trained_templates)
Definition: adaptmatch.cpp:527
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
void add_str_double(const char *str, double number)
Definition: strngs.cpp:394
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:412
void chomp_string(char *str)
Definition: helpers.h:75
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
const int kBlnXHeight
Definition: normalis.h:28
void SetFillLatticeFunc(FillLatticeFunc f)
Definition: baseapi.cpp:2229
int push_back(T object)
int num_sub_langs() const
GenericVector< DoubleParam * > double_params
Definition: params.h:47
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1942
inT16 top() const
Definition: rect.h:54
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1909
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
const char * WordFontAttributes(bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
void GetAvailableLanguagesAsVector(GenericVector< STRING > *langs) const
Definition: baseapi.cpp:368
int IntCastRounded(double x)
Definition: helpers.h:172
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
bool IsValidCharacter(const char *utf8_character)
Definition: baseapi.cpp:2161
float sconfidence
Definition: osdetect.h:43
float angle() const
find angle
Definition: points.h:249
unsigned char BOOL8
Definition: host.h:113
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1613
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1745
const int kBlnBaselineOffset
Definition: normalis.h:29
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1094
static TESS_LOCAL int TesseractExtractResult(char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
Definition: baseapi.cpp:2724
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
bool GetTextDirection(int *out_offset, float *out_slope)
Definition: baseapi.cpp:2168
bool ProcessPages(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1068
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
#define BOOL
Definition: capi.h:44
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:80
#define ASSERT_HOST(x)
Definition: errcode.h:84
virtual char * GetUTF8Text(PageIteratorLevel level) const
int ImageWidth() const
const char * string() const
Definition: strngs.cpp:201
Definition: blobs.h:261
void SetRectangle(int left, int top, int width, int height)
Definition: baseapi.cpp:558
int NumDawgs() const
Definition: baseapi.cpp:2863
void SetOutputName(const char *name)
Definition: baseapi.cpp:207
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
bool SetDebugVariable(const char *name, const char *value)
Definition: baseapi.cpp:220
GenericVector< StringParam * > string_params
Definition: params.h:46
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
#define UNICHAR_LEN
Definition: unichar.h:30
#define MAX_PATH
Definition: platform.h:47
void GetBlockTextOrientations(int **block_orientation, bool **vertical_writing)
Definition: baseapi.cpp:2444
const char * GetInputName()
Definition: baseapi.cpp:942
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:262
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:410
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1442
bool stream_filelist
Definition: baseapi.cpp:79
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
Boxa * GetWords(Pixa **pixa)
Definition: baseapi.cpp:617
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
int first_uni() const
Definition: unichar.cpp:97
int GetThresholdedImageScaleFactor() const
Definition: baseapi.cpp:721
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:869
Pix * GetBinaryImage(PageIteratorLevel level) const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool PSM_OSD_ENABLED(int pageseg_mode)
Definition: publictypes.h:179
bool Empty(PageIteratorLevel level) const
Definition: ocrblock.h:30
void signal_exit(int signal_code)
Definition: globaloc.cpp:52
void ReadDebugConfigFile(const char *filename)
Definition: baseapi.cpp:449
const Dawg * GetDawg(int i) const
Definition: baseapi.cpp:2857
bool tessedit_resegment_from_line_boxes
Boxa * GetRegions(Pixa **pixa)
Definition: baseapi.cpp:582
UNICHARSET * unicharset
Definition: osdetect.h:78
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:146
void set_pix_original(Pix *original_pix)
tesseract::ParamsVectors * GlobalParams()
Definition: params.cpp:33
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:274
#define DIR
Definition: polyaprx.cpp:39
BLOCK * block
Definition: pageres.h:99
char * GetUTF8Text(PageIteratorLevel level) const
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
STRING * input_file_
Name used by training code.
Definition: baseapi.h:872
float certainty() const
Definition: ratngs.h:328
int GetScaledYResolution() const
Definition: thresholder.h:93
static void ResetToDefaults(ParamsVectors *member_params)
Definition: params.cpp:204
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
int ImageHeight() const
void DumpPGM(const char *filename)
Definition: baseapi.cpp:729
CubeRecoContext * GetCubeRecoContext()
double matcher_good_threshold
Definition: classify.h:420
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2251
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:92
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:214
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:874
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
void set_text(const char *new_text)
Definition: werd.h:126
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define PI
Definition: const.h:19
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:234