tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
void tesseract::TessBaseAPI::DumpPGM (const char *filename)
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ()
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIterator * tesseract::TessBaseAPI::GetIterator ()
 
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (ETEXT_DESC *monitor, int page_number)
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetTSVText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
bool tesseract::TessBaseAPI::DetectOrientationScript (int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
 
char * tesseract::TessBaseAPI::GetOsdText (int page_number)
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
int * tesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawg * tesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
Tesseract * tesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext () const
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 2016 of file baseapi.cpp.

2016  {
2017  int debug = 0;
2018  GetIntVariable("applybox_debug", &debug);
2019  bool success = true;
2020  PageSegMode current_psm = GetPageSegMode();
2022  SetVariable("classify_enable_learning", "0");
2023  char* text = GetUTF8Text();
2024  if (debug) {
2025  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
2026  }
2027  if (text != NULL) {
2028  PAGE_RES_IT it(page_res_);
2029  WERD_RES* word_res = it.word();
2030  if (word_res != NULL) {
2031  word_res->word->set_text(wordstr);
2032  } else {
2033  success = false;
2034  }
2035  // Check to see if text matches wordstr.
2036  int w = 0;
2037  int t = 0;
2038  for (t = 0; text[t] != '\0'; ++t) {
2039  if (text[t] == '\n' || text[t] == ' ')
2040  continue;
2041  while (wordstr[w] != '\0' && wordstr[w] == ' ')
2042  ++w;
2043  if (text[t] != wordstr[w])
2044  break;
2045  ++w;
2046  }
2047  if (text[t] != '\0' || wordstr[w] != '\0') {
2048  // No match.
2049  delete page_res_;
2050  GenericVector<TBOX> boxes;
2054  PAGE_RES_IT pr_it(page_res_);
2055  if (pr_it.word() == NULL)
2056  success = false;
2057  else
2058  word_res = pr_it.word();
2059  } else {
2060  word_res->BestChoiceToCorrectText();
2061  }
2062  if (success) {
2063  tesseract_->EnableLearning = true;
2064  tesseract_->LearnWord(NULL, word_res);
2065  }
2066  delete [] text;
2067  } else {
2068  success = false;
2069  }
2070  SetPageSegMode(current_psm);
2071  return success;
2072 }
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:465
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:226
void BestChoiceToCorrectText()
Definition: pageres.cpp:917
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:458
WERD * word
Definition: pageres.h:175
#define tprintf(...)
Definition: tprintf.h:31
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
CMD_EVENTS mode
Definition: pgedit.cpp:116
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:214
void set_text(const char *new_text)
Definition: werd.h:126
int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1982 of file baseapi.cpp.

1982  {
1983  if (tesseract_ == NULL ||
1984  (!recognition_done_ && Recognize(NULL) < 0))
1985  return NULL;
1986  int n_word = 0;
1987  PAGE_RES_IT res_it(page_res_);
1988  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1989  n_word++;
1990 
1991  int* conf = new int[n_word+1];
1992  n_word = 0;
1993  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1994  WERD_RES *word = res_it.word();
1995  WERD_CHOICE* choice = word->best_choice;
1996  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1997  // This is the eq for converting Tesseract confidence to 1..100
1998  if (w_conf < 0) w_conf = 0;
1999  if (w_conf > 100) w_conf = 100;
2000  conf[n_word++] = w_conf;
2001  }
2002  conf[n_word] = -1;
2003  return conf;
2004 }
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
WERD * word
Definition: pageres.h:175
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
WERD_CHOICE * best_choice
Definition: pageres.h:219
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
float certainty() const
Definition: ratngs.h:328
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( )

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 809 of file baseapi.cpp.

809 { return AnalyseLayout(false); }
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Definition at line 811 of file baseapi.cpp.

811  {
812  if (FindLines() == 0) {
813  if (block_list_->empty())
814  return NULL; // The page was empty.
815  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
816  DetectParagraphs(false);
817  return new PageIterator(
821  }
822  return NULL;
823 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2638
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int GetScaledYResolution() const
Definition: thresholder.h:93
void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 2080 of file baseapi.cpp.

2080  {
2081  if (thresholder_ != NULL)
2082  thresholder_->Clear();
2083  ClearResults();
2084  if (tesseract_ != NULL) SetInputImage(NULL);
2085 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:938
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 2149 of file baseapi.cpp.

2149  {
2151 }
void DeleteUnusedDawgs()
Definition: dawg_cache.h:46
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198
void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2504 of file baseapi.cpp.

2504  {
2505  delete block_list;
2506 }
bool tesseract::TessBaseAPI::DetectOrientationScript ( int *  orient_deg,
float *  orient_conf,
const char **  script_name,
float *  script_conf 
)

Detect the orientation of the input image and apparent script (alphabet). orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) orient_conf is the confidence (15.0 is reasonably confident) script_name is an ASCII string, the name of the script, e.g. "Latin" script_conf is confidence level in the script Returns true on success and writes values to each parameter as an output

Definition at line 1909 of file baseapi.cpp.

1909  {
1910  OSResults osr;
1911 
1912  bool osd = DetectOS(&osr);
1913  if (!osd) {
1914  return false;
1915  }
1916 
1917  int orient_id = osr.best_result.orientation_id;
1918  int script_id = osr.get_best_script(orient_id);
1919  if (orient_conf)
1920  *orient_conf = osr.best_result.oconfidence;
1921  if (orient_deg)
1922  *orient_deg = orient_id * 90; // convert quadrant to degrees
1923 
1924  if (script_name) {
1925  const char* script =
1926  osr.unicharset->get_script_from_script_id(script_id);
1927 
1928  *script_name = script;
1929  }
1930 
1931  if (script_conf)
1932  *script_conf = osr.best_result.sconfidence;
1933 
1934  return true;
1935 }
OSBestResult best_result
Definition: osdetect.h:79
float oconfidence
Definition: osdetect.h:44
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:114
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:801
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2415
int orientation_id
Definition: osdetect.h:41
float sconfidence
Definition: osdetect.h:43
UNICHARSET * unicharset
Definition: osdetect.h:78
bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2415 of file baseapi.cpp.

2415  {
2416  if (tesseract_ == NULL)
2417  return false;
2418  ClearResults();
2419  if (tesseract_->pix_binary() == NULL)
2421  if (input_file_ == NULL)
2422  input_file_ = new STRING(kInputFile);
2424 }
Definition: strngs.h:44
const char * kInputFile
Definition: baseapi.cpp:95
Pix * pix_binary() const
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:191
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
STRING * input_file_
Name used by training code.
Definition: baseapi.h:872
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2251
void tesseract::TessBaseAPI::DumpPGM ( const char *  filename)

Dump the internal binary image to a PGM file.

Deprecated:
Use GetThresholdedImage and write the image using pixWrite instead if possible.

Dump the internal binary image to a PGM file.

Definition at line 729 of file baseapi.cpp.

729  {
730  if (tesseract_ == NULL)
731  return;
732  FILE *fp = fopen(filename, "wb");
733  Pix* pix = tesseract_->pix_binary();
734  int width = pixGetWidth(pix);
735  int height = pixGetHeight(pix);
736  l_uint32* data = pixGetData(pix);
737  fprintf(fp, "P5 %d %d 255\n", width, height);
738  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
739  for (int x = 0; x < width; ++x) {
740  uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
741  fwrite(&b, 1, 1, fp);
742  }
743  }
744  fclose(fp);
745 }
unsigned char uinT8
Definition: host.h:99
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 2093 of file baseapi.cpp.

2093  {
2094  Clear();
2095  if (thresholder_ != NULL) {
2096  delete thresholder_;
2097  thresholder_ = NULL;
2098  }
2099  if (page_res_ != NULL) {
2100  delete page_res_;
2101  page_res_ = NULL;
2102  }
2103  if (block_list_ != NULL) {
2104  delete block_list_;
2105  block_list_ = NULL;
2106  }
2107  if (paragraph_models_ != NULL) {
2109  delete paragraph_models_;
2110  paragraph_models_ = NULL;
2111  }
2112  if (tesseract_ != NULL) {
2113  delete tesseract_;
2114  if (osd_tesseract_ == tesseract_)
2115  osd_tesseract_ = NULL;
2116  tesseract_ = NULL;
2117  }
2118  if (osd_tesseract_ != NULL) {
2119  delete osd_tesseract_;
2120  osd_tesseract_ = NULL;
2121  }
2122  if (equ_detect_ != NULL) {
2123  delete equ_detect_;
2124  equ_detect_ = NULL;
2125  }
2126  if (input_file_ != NULL) {
2127  delete input_file_;
2128  input_file_ = NULL;
2129  }
2130  if (output_file_ != NULL) {
2131  delete output_file_;
2132  output_file_ = NULL;
2133  }
2134  if (datapath_ != NULL) {
2135  delete datapath_;
2136  datapath_ = NULL;
2137  }
2138  if (language_ != NULL) {
2139  delete language_;
2140  language_ = NULL;
2141  }
2142 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
STRING * language_
Last initialized language.
Definition: baseapi.h:875
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:866
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:867
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
void delete_data_pointers()
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:873
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:869
STRING * input_file_
Name used by training code.
Definition: baseapi.h:872
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:874
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2492 of file baseapi.cpp.

2492  {
2493  FindLines();
2494  BLOCK_LIST* result = block_list_;
2495  block_list_ = NULL;
2496  return result;
2497 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns NULL.

Definition at line 2804 of file baseapi.cpp.

2805  {
2806  TBOX box(left, bottom, right, top);
2807  BLOCK_IT b_it(blocks);
2808  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2809  BLOCK* block = b_it.data();
2810  if (!box.major_overlap(block->bounding_box()))
2811  continue;
2812  ROW_IT r_it(block->row_list());
2813  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2814  ROW* row = r_it.data();
2815  if (!box.major_overlap(row->bounding_box()))
2816  continue;
2817  WERD_IT w_it(row->word_list());
2818  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2819  WERD* word = w_it.data();
2820  if (box.major_overlap(word->bounding_box()))
2821  return row;
2822  }
2823  }
2824  }
2825  return NULL;
2826 }
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: ocrrow.h:32
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52
TBOX bounding_box() const
Definition: ocrrow.h:85
TBOX bounding_box() const
Definition: werd.cpp:160
Definition: rect.h:30
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
Definition: ocrblock.h:30
void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2444 of file baseapi.cpp.

2445  {
2446  delete[] *block_orientation;
2447  *block_orientation = NULL;
2448  delete[] *vertical_writing;
2449  *vertical_writing = NULL;
2450  BLOCK_IT block_it(block_list_);
2451 
2452  block_it.move_to_first();
2453  int num_blocks = 0;
2454  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2455  if (!block_it.data()->poly_block()->IsText()) {
2456  continue;
2457  }
2458  ++num_blocks;
2459  }
2460  if (!num_blocks) {
2461  tprintf("WARNING: Found no blocks\n");
2462  return;
2463  }
2464  *block_orientation = new int[num_blocks];
2465  *vertical_writing = new bool[num_blocks];
2466  block_it.move_to_first();
2467  int i = 0;
2468  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2469  block_it.forward()) {
2470  if (!block_it.data()->poly_block()->IsText()) {
2471  continue;
2472  }
2473  FCOORD re_rotation = block_it.data()->re_rotation();
2474  float re_theta = re_rotation.angle();
2475  FCOORD classify_rotation = block_it.data()->classify_rotation();
2476  float classify_theta = classify_rotation.angle();
2477  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2478  if (rot_theta < 0) rot_theta += 4;
2479  int num_rotations = static_cast<int>(rot_theta + 0.5);
2480  (*block_orientation)[i] = num_rotations;
2481  // The classify_rotation is non-zero only if the text has vertical
2482  // writing direction.
2483  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2484  ++i;
2485  }
2486 }
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
Definition: points.h:189
#define tprintf(...)
Definition: tprintf.h:31
float y() const
Definition: points.h:212
float angle() const
find angle
Definition: points.h:249
#define PI
Definition: const.h:19
char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Returned string must be freed with the delete [] operator. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file.

The recognized text is returned as a char* which is coded as a UTF8 box file and must be freed with the delete [] operator. page_number is a 0-base page index that will appear in the box file.

Definition at line 1745 of file baseapi.cpp.

1745  {
1746  if (tesseract_ == NULL ||
1747  (!recognition_done_ && Recognize(NULL) < 0))
1748  return NULL;
1749  int blob_count;
1750  int utf8_length = TextLength(&blob_count);
1751  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1753  char* result = new char[total_length];
1754  result[0] = '\0';
1755  int output_length = 0;
1756  LTRResultIterator* it = GetLTRIterator();
1757  do {
1758  int left, top, right, bottom;
1759  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1760  char* text = it->GetUTF8Text(RIL_SYMBOL);
1761  // Tesseract uses space for recognition failure. Fix to a reject
1762  // character, kTesseractReject so we don't create illegal box files.
1763  for (int i = 0; text[i] != '\0'; ++i) {
1764  if (text[i] == ' ')
1765  text[i] = kTesseractReject;
1766  }
1767  snprintf(result + output_length, total_length - output_length,
1768  "%s %d %d %d %d %d\n",
1769  text, left, image_height_ - bottom,
1770  right, image_height_ - top, page_number);
1771  output_length += strlen(result + output_length);
1772  delete [] text;
1773  // Just in case...
1774  if (output_length + kMaxBytesPerLine > total_length)
1775  break;
1776  }
1777  } while (it->Next(RIL_SYMBOL));
1778  delete it;
1779  return result;
1780 }
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1251
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2385
const char kTesseractReject
Definition: baseapi.cpp:86
const int kMaxBytesPerLine
Definition: baseapi.cpp:1737
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1728
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not NULL, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 639 of file baseapi.cpp.

643  {
644  PageIterator* page_it = GetIterator();
645  if (page_it == NULL)
646  page_it = AnalyseLayout();
647  if (page_it == NULL)
648  return NULL; // Failed.
649 
650  // Count the components to get a size for the arrays.
651  int component_count = 0;
652  int left, top, right, bottom;
653 
654  TessResultCallback<bool>* get_bbox = NULL;
655  if (raw_image) {
656  // Get bounding box in original raw image with padding.
658  level, raw_padding,
659  &left, &top, &right, &bottom);
660  } else {
661  // Get bounding box from binarized imaged. Note that this could be
662  // differently scaled from the original image.
663  get_bbox = NewPermanentTessCallback(page_it,
665  level, &left, &top, &right, &bottom);
666  }
667  do {
668  if (get_bbox->Run() &&
669  (!text_only || PTIsTextType(page_it->BlockType())))
670  ++component_count;
671  } while (page_it->Next(level));
672 
673  Boxa* boxa = boxaCreate(component_count);
674  if (pixa != NULL)
675  *pixa = pixaCreate(component_count);
676  if (blockids != NULL)
677  *blockids = new int[component_count];
678  if (paraids != NULL)
679  *paraids = new int[component_count];
680 
681  int blockid = 0;
682  int paraid = 0;
683  int component_index = 0;
684  page_it->Begin();
685  do {
686  if (get_bbox->Run() &&
687  (!text_only || PTIsTextType(page_it->BlockType()))) {
688  Box* lbox = boxCreate(left, top, right - left, bottom - top);
689  boxaAddBox(boxa, lbox, L_INSERT);
690  if (pixa != NULL) {
691  Pix* pix = NULL;
692  if (raw_image) {
693  pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left,
694  &top);
695  } else {
696  pix = page_it->GetBinaryImage(level);
697  }
698  pixaAddPix(*pixa, pix, L_INSERT);
699  pixaAddBox(*pixa, lbox, L_CLONE);
700  }
701  if (paraids != NULL) {
702  (*paraids)[component_index] = paraid;
703  if (page_it->IsAtFinalElement(RIL_PARA, level))
704  ++paraid;
705  }
706  if (blockids != NULL) {
707  (*blockids)[component_index] = blockid;
708  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
709  ++blockid;
710  paraid = 0;
711  }
712  }
713  ++component_index;
714  }
715  } while (page_it->Next(level));
716  delete page_it;
717  delete get_bbox;
718  return boxa;
719 }
virtual R Run()=0
ResultIterator * GetIterator()
Definition: baseapi.cpp:1268
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 460 of file baseapi.h.

462  {
463  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
464  }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 627 of file baseapi.cpp.

627  {
628  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
629 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext ( ) const

Return a pointer to underlying CubeRecoContext object if present.

Definition at line 2869 of file baseapi.cpp.

2869  {
2870  return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
2871 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
CubeRecoContext * GetCubeRecoContext()
const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2857 of file baseapi.cpp.

2857  {
2858  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2859  return tesseract_->getDict().GetDawg(i);
2860 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:412
int NumDawgs() const
Definition: baseapi.cpp:2863
void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2776 of file baseapi.cpp.

2779  {
2780  GenericVector<int> outline_counts;
2783  INT_FX_RESULT_STRUCT fx_info;
2784  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2785  &cn_features, &fx_info, &outline_counts);
2786  if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) {
2787  *num_features = 0;
2788  return; // Feature extraction failed.
2789  }
2790  *num_features = cn_features.size();
2791  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2792  // TODO(rays) Pass outline_counts back and simplify the calling code.
2793  if (feature_outline_index != NULL) {
2794  int f = 0;
2795  for (int i = 0; i < outline_counts.size(); ++i) {
2796  while (f < outline_counts[i])
2797  feature_outline_index[f++] = i;
2798  }
2799  }
2800 }
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
bool empty() const
Definition: genericvector.h:84
int size() const
Definition: genericvector.h:72
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
char * tesseract::TessBaseAPI::GetHOCRText ( ETEXT_DESC monitor,
int  page_number 
)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. monitor can be used to cancel the recognition receive progress callbacks

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays.

Definition at line 1442 of file baseapi.cpp.

1442  {
1443  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0))
1444  return NULL;
1445 
1446  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1447  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1448  bool para_is_ltr = true; // Default direction is LTR
1449  const char* paragraph_lang = NULL;
1450  bool font_info = false;
1451  GetBoolVariable("hocr_font_info", &font_info);
1452 
1453  STRING hocr_str("");
1454 
1455  if (input_file_ == NULL)
1456  SetInputName(NULL);
1457 
1458 #ifdef _WIN32
1459  // convert input name from ANSI encoding to utf-8
1460  int str16_len =
1461  MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0);
1462  wchar_t *uni16_str = new WCHAR[str16_len];
1463  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1464  uni16_str, str16_len);
1465  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0,
1466  NULL, NULL);
1467  char *utf8_str = new char[utf8_len];
1468  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1469  utf8_len, NULL, NULL);
1470  *input_file_ = utf8_str;
1471  delete[] uni16_str;
1472  delete[] utf8_str;
1473 #endif
1474 
1475  hocr_str += " <div class='ocr_page'";
1476  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1477  hocr_str += " title='image \"";
1478  if (input_file_) {
1479  hocr_str += HOcrEscape(input_file_->string());
1480  } else {
1481  hocr_str += "unknown";
1482  }
1483  hocr_str.add_str_int("\"; bbox ", rect_left_);
1484  hocr_str.add_str_int(" ", rect_top_);
1485  hocr_str.add_str_int(" ", rect_width_);
1486  hocr_str.add_str_int(" ", rect_height_);
1487  hocr_str.add_str_int("; ppageno ", page_number);
1488  hocr_str += "'>\n";
1489 
1490  ResultIterator *res_it = GetIterator();
1491  while (!res_it->Empty(RIL_BLOCK)) {
1492  if (res_it->Empty(RIL_WORD)) {
1493  res_it->Next(RIL_WORD);
1494  continue;
1495  }
1496 
1497  // Open any new block/paragraph/textline.
1498  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1499  para_is_ltr = true; // reset to default direction
1500  hocr_str += " <div class='ocr_carea'";
1501  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1502  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1503  }
1504  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1505  hocr_str += "\n <p class='ocr_par'";
1506  para_is_ltr = res_it->ParagraphIsLtr();
1507  if (!para_is_ltr) {
1508  hocr_str += " dir='rtl'";
1509  }
1510  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1511  paragraph_lang = res_it->WordRecognitionLanguage();
1512  if (paragraph_lang) {
1513  hocr_str += " lang='";
1514  hocr_str += paragraph_lang;
1515  hocr_str += "'";
1516  }
1517  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1518  }
1519  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1520  hocr_str += "\n <span class='ocr_line'";
1521  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1522  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1523  }
1524 
1525  // Now, process the word...
1526  hocr_str += "<span class='ocrx_word'";
1527  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1528  int left, top, right, bottom;
1529  bool bold, italic, underlined, monospace, serif, smallcaps;
1530  int pointsize, font_id;
1531  const char *font_name;
1532  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1533  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1534  &monospace, &serif, &smallcaps,
1535  &pointsize, &font_id);
1536  hocr_str.add_str_int(" title='bbox ", left);
1537  hocr_str.add_str_int(" ", top);
1538  hocr_str.add_str_int(" ", right);
1539  hocr_str.add_str_int(" ", bottom);
1540  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1541  if (font_info) {
1542  if (font_name) {
1543  hocr_str += "; x_font ";
1544  hocr_str += HOcrEscape(font_name);
1545  }
1546  hocr_str.add_str_int("; x_fsize ", pointsize);
1547  }
1548  hocr_str += "'";
1549  const char* lang = res_it->WordRecognitionLanguage();
1550  if (lang && (!paragraph_lang || strcmp(lang, paragraph_lang))) {
1551  hocr_str += " lang='";
1552  hocr_str += lang;
1553  hocr_str += "'";
1554  }
1555  switch (res_it->WordDirection()) {
1556  // Only emit direction if different from current paragraph direction
1557  case DIR_LEFT_TO_RIGHT:
1558  if (!para_is_ltr) hocr_str += " dir='ltr'";
1559  break;
1560  case DIR_RIGHT_TO_LEFT:
1561  if (para_is_ltr) hocr_str += " dir='rtl'";
1562  break;
1563  case DIR_MIX:
1564  case DIR_NEUTRAL:
1565  default: // Do nothing.
1566  break;
1567  }
1568  hocr_str += ">";
1569  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1570  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1571  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1572  if (bold) hocr_str += "<strong>";
1573  if (italic) hocr_str += "<em>";
1574  do {
1575  const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
1576  if (grapheme && grapheme[0] != 0) {
1577  hocr_str += HOcrEscape(grapheme);
1578  }
1579  delete []grapheme;
1580  res_it->Next(RIL_SYMBOL);
1581  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1582  if (italic) hocr_str += "</em>";
1583  if (bold) hocr_str += "</strong>";
1584  hocr_str += "</span> ";
1585  wcnt++;
1586  // Close any ending block/paragraph/textline.
1587  if (last_word_in_line) {
1588  hocr_str += "\n </span>";
1589  lcnt++;
1590  }
1591  if (last_word_in_para) {
1592  hocr_str += "\n </p>\n";
1593  pcnt++;
1594  para_is_ltr = true; // back to default direction
1595  }
1596  if (last_word_in_block) {
1597  hocr_str += " </div>\n";
1598  bcnt++;
1599  }
1600  }
1601  hocr_str += " </div>\n";
1602 
1603  char *ret = new char[hocr_str.length() + 1];
1604  strcpy(ret, hocr_str.string());
1605  delete res_it;
1606  return ret;
1607 }
ResultIterator * GetIterator()
Definition: baseapi.cpp:1268
Definition: strngs.h:44
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2875
void SetInputName(const char *name)
Definition: baseapi.cpp:199
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
void add_str_int(const char *str, int number)
Definition: strngs.cpp:384
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
const char * string() const
Definition: strngs.cpp:201
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
STRING * input_file_
Name used by training code.
Definition: baseapi.h:872
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:234
char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays.

Definition at line 1430 of file baseapi.cpp.

1430  {
1431  return GetHOCRText(NULL, page_number);
1432 }
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1442
ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1268 of file baseapi.cpp.

1268  {
1269  if (tesseract_ == NULL || page_res_ == NULL)
1270  return NULL;
1271  return ResultIterator::StartOfParagraph(LTRResultIterator(
1275 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
int GetScaledYResolution() const
Definition: thresholder.h:93
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1285 of file baseapi.cpp.

1285  {
1286  if (tesseract_ == NULL || page_res_ == NULL)
1287  return NULL;
1288  return new MutableIterator(page_res_, tesseract_,
1292 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int GetScaledYResolution() const
Definition: thresholder.h:93
char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1942 of file baseapi.cpp.

1942  {
1943  int orient_deg;
1944  float orient_conf;
1945  const char* script_name;
1946  float script_conf;
1947 
1948  if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf))
1949  return NULL;
1950 
1951  // clockwise rotation needed to make the page upright
1952  int rotate = OrientationIdToValue(orient_deg / 90);
1953 
1954  const int kOsdBufsize = 255;
1955  char* osd_buf = new char[kOsdBufsize];
1956  snprintf(osd_buf, kOsdBufsize,
1957  "Page number: %d\n"
1958  "Orientation in degrees: %d\n"
1959  "Rotate: %d\n"
1960  "Orientation confidence: %.2f\n"
1961  "Script: %s\n"
1962  "Script confidence: %.2f\n",
1963  page_number, orient_deg, rotate, orient_conf, script_name,
1964  script_conf);
1965 
1966  return osd_buf;
1967 }
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:565
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
Definition: baseapi.cpp:1909
Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 582 of file baseapi.cpp.

582  {
583  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
584 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 608 of file baseapi.cpp.

608  {
609  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
610 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 2168 of file baseapi.cpp.

2168  {
2169  PageIterator* it = AnalyseLayout();
2170  if (it == NULL) {
2171  return false;
2172  }
2173  int x1, x2, y1, y2;
2174  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2175  // Calculate offset and slope (NOTE: Kind of ugly)
2176  if (x2 <= x1) x2 = x1 + 1;
2177  // Convert the point pair to slope/offset of the baseline (in image coords.)
2178  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2179  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2180  // Get the y-coord of the baseline at the left and right edges of the
2181  // textline's bounding box.
2182  int left, top, right, bottom;
2183  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2184  delete it;
2185  return false;
2186  }
2187  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2188  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2189  // Shift the baseline down so it passes through the nearest bottom-corner
2190  // of the textline's bounding box. This is the difference between the y
2191  // at the lowest (max) edge of the box and the actual box bottom.
2192  *out_offset += bottom - MAX(left_y, right_y);
2193  // Switch back to bottom-up tesseract coordinates. Requires negation of
2194  // the slope and height - offset for the offset.
2195  *out_slope = -*out_slope;
2196  *out_offset = rect_height_ - *out_offset;
2197  delete it;
2198 
2199  return true;
2200 }
#define MAX(x, y)
Definition: ndminx.h:24
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
int IntCastRounded(double x)
Definition: helpers.h:172
Boxa * tesseract::TessBaseAPI::GetTextlines ( const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 594 of file baseapi.cpp.

595  {
596  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
597  pixa, blockids, paraids);
598 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 411 of file baseapi.h.

411  {
412  return GetTextlines(false, 0, pixa, blockids, NULL);
413  }
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:594
Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 569 of file baseapi.cpp.

569  {
570  if (tesseract_ == NULL || thresholder_ == NULL)
571  return NULL;
572  if (tesseract_->pix_binary() == NULL)
574  return pixClone(tesseract_->pix_binary());
575 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2251
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 721 of file baseapi.cpp.

721  {
722  if (thresholder_ == NULL) {
723  return 0;
724  }
725  return thresholder_->GetScaleFactor();
726 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
char * tesseract::TessBaseAPI::GetTSVText ( int  page_number)

Make a TSV-formatted string from the internal data structures. page_number is 0-based but will appear in the output as 1-based.

Definition at line 1613 of file baseapi.cpp.

1613  {
1614  if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0))
1615  return NULL;
1616 
1617  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1618  int page_id = page_number + 1; // we use 1-based page numbers.
1619 
1620  STRING tsv_str("");
1621 
1622  int page_num = page_id, block_num = 0, par_num = 0, line_num = 0,
1623  word_num = 0;
1624 
1625  tsv_str.add_str_int("1\t", page_num); // level 1 - page
1626  tsv_str.add_str_int("\t", block_num);
1627  tsv_str.add_str_int("\t", par_num);
1628  tsv_str.add_str_int("\t", line_num);
1629  tsv_str.add_str_int("\t", word_num);
1630  tsv_str.add_str_int("\t", rect_left_);
1631  tsv_str.add_str_int("\t", rect_top_);
1632  tsv_str.add_str_int("\t", rect_width_);
1633  tsv_str.add_str_int("\t", rect_height_);
1634  tsv_str += "\t-1\t\n";
1635 
1636  ResultIterator* res_it = GetIterator();
1637  while (!res_it->Empty(RIL_BLOCK)) {
1638  if (res_it->Empty(RIL_WORD)) {
1639  res_it->Next(RIL_WORD);
1640  continue;
1641  }
1642 
1643  // Add rows for any new block/paragraph/textline.
1644  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1645  block_num++, par_num = 0, line_num = 0, word_num = 0;
1646  tsv_str.add_str_int("2\t", page_num); // level 2 - block
1647  tsv_str.add_str_int("\t", block_num);
1648  tsv_str.add_str_int("\t", par_num);
1649  tsv_str.add_str_int("\t", line_num);
1650  tsv_str.add_str_int("\t", word_num);
1651  AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str);
1652  tsv_str += "\t-1\t\n"; // end of row for block
1653  }
1654  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1655  par_num++, line_num = 0, word_num = 0;
1656  tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph
1657  tsv_str.add_str_int("\t", block_num);
1658  tsv_str.add_str_int("\t", par_num);
1659  tsv_str.add_str_int("\t", line_num);
1660  tsv_str.add_str_int("\t", word_num);
1661  AddBoxToTSV(res_it, RIL_PARA, &tsv_str);
1662  tsv_str += "\t-1\t\n"; // end of row for para
1663  }
1664  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1665  line_num++, word_num = 0;
1666  tsv_str.add_str_int("4\t", page_num); // level 4 - line
1667  tsv_str.add_str_int("\t", block_num);
1668  tsv_str.add_str_int("\t", par_num);
1669  tsv_str.add_str_int("\t", line_num);
1670  tsv_str.add_str_int("\t", word_num);
1671  AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str);
1672  tsv_str += "\t-1\t\n"; // end of row for line
1673  }
1674 
1675  // Now, process the word...
1676  int left, top, right, bottom;
1677  bool bold, italic, underlined, monospace, serif, smallcaps;
1678  int pointsize, font_id;
1679  const char* font_name;
1680  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1681  font_name =
1682  res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
1683  &serif, &smallcaps, &pointsize, &font_id);
1684  word_num++;
1685  tsv_str.add_str_int("5\t", page_num); // level 5 - word
1686  tsv_str.add_str_int("\t", block_num);
1687  tsv_str.add_str_int("\t", par_num);
1688  tsv_str.add_str_int("\t", line_num);
1689  tsv_str.add_str_int("\t", word_num);
1690  tsv_str.add_str_int("\t", left);
1691  tsv_str.add_str_int("\t", top);
1692  tsv_str.add_str_int("\t", right - left);
1693  tsv_str.add_str_int("\t", bottom - top);
1694  tsv_str.add_str_int("\t", res_it->Confidence(RIL_WORD));
1695  tsv_str += "\t";
1696 
1697  // Increment counts if at end of block/paragraph/textline.
1698  if (res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD)) lcnt++;
1699  if (res_it->IsAtFinalElement(RIL_PARA, RIL_WORD)) pcnt++;
1700  if (res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD)) bcnt++;
1701 
1702  do {
1703  tsv_str += res_it->GetUTF8Text(RIL_SYMBOL);
1704  res_it->Next(RIL_SYMBOL);
1705  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1706  tsv_str += "\n"; // end of row
1707  wcnt++;
1708  }
1709 
1710  char* ret = new char[tsv_str.length() + 1];
1711  strcpy(ret, tsv_str.string());
1712  delete res_it;
1713  return ret;
1714 }
ResultIterator * GetIterator()
Definition: baseapi.cpp:1268
Definition: strngs.h:44
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2852 of file baseapi.cpp.

2852  {
2853  return tesseract_->unicharset.id_to_unichar(unichar_id);
2854 }
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
UNICHARSET unicharset
Definition: ccutil.h:70
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes and must be freed with the delete [] operator.

Definition at line 1800 of file baseapi.cpp.

1800  {
1801  if (tesseract_ == NULL ||
1802  (!recognition_done_ && Recognize(NULL) < 0))
1803  return NULL;
1804  bool tilde_crunch_written = false;
1805  bool last_char_was_newline = true;
1806  bool last_char_was_tilde = false;
1807 
1808  int total_length = TextLength(NULL);
1809  PAGE_RES_IT page_res_it(page_res_);
1810  char* result = new char[total_length];
1811  char* ptr = result;
1812  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1813  page_res_it.forward()) {
1814  WERD_RES *word = page_res_it.word();
1815  // Process the current word.
1816  if (word->unlv_crunch_mode != CR_NONE) {
1817  if (word->unlv_crunch_mode != CR_DELETE &&
1818  (!tilde_crunch_written ||
1819  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1820  word->word->space() > 0 &&
1821  !word->word->flag(W_FUZZY_NON) &&
1822  !word->word->flag(W_FUZZY_SP)))) {
1823  if (!word->word->flag(W_BOL) &&
1824  word->word->space() > 0 &&
1825  !word->word->flag(W_FUZZY_NON) &&
1826  !word->word->flag(W_FUZZY_SP)) {
1827  /* Write a space to separate from preceding good text */
1828  *ptr++ = ' ';
1829  last_char_was_tilde = false;
1830  }
1831  if (!last_char_was_tilde) {
1832  // Write a reject char.
1833  last_char_was_tilde = true;
1834  *ptr++ = kUNLVReject;
1835  tilde_crunch_written = true;
1836  last_char_was_newline = false;
1837  }
1838  }
1839  } else {
1840  // NORMAL PROCESSING of non tilde crunched words.
1841  tilde_crunch_written = false;
1843  const char* wordstr = word->best_choice->unichar_string().string();
1844  const STRING& lengths = word->best_choice->unichar_lengths();
1845  int length = lengths.length();
1846  int i = 0;
1847  int offset = 0;
1848 
1849  if (last_char_was_tilde &&
1850  word->word->space() == 0 && wordstr[offset] == ' ') {
1851  // Prevent adjacent tilde across words - we know that adjacent tildes
1852  // within words have been removed.
1853  // Skip the first character.
1854  offset = lengths[i++];
1855  }
1856  if (i < length && wordstr[offset] != 0) {
1857  if (!last_char_was_newline)
1858  *ptr++ = ' ';
1859  else
1860  last_char_was_newline = false;
1861  for (; i < length; offset += lengths[i++]) {
1862  if (wordstr[offset] == ' ' ||
1863  wordstr[offset] == kTesseractReject) {
1864  *ptr++ = kUNLVReject;
1865  last_char_was_tilde = true;
1866  } else {
1867  if (word->reject_map[i].rejected())
1868  *ptr++ = kUNLVSuspect;
1869  UNICHAR ch(wordstr + offset, lengths[i]);
1870  int uni_ch = ch.first_uni();
1871  for (int j = 0; kUniChs[j] != 0; ++j) {
1872  if (kUniChs[j] == uni_ch) {
1873  uni_ch = kLatinChs[j];
1874  break;
1875  }
1876  }
1877  if (uni_ch <= 0xff) {
1878  *ptr++ = static_cast<char>(uni_ch);
1879  last_char_was_tilde = false;
1880  } else {
1881  *ptr++ = kUNLVReject;
1882  last_char_was_tilde = true;
1883  }
1884  }
1885  }
1886  }
1887  }
1888  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1889  /* Add a new line output */
1890  *ptr++ = '\n';
1891  tilde_crunch_written = false;
1892  last_char_was_newline = true;
1893  last_char_was_tilde = false;
1894  }
1895  }
1896  *ptr++ = '\n';
1897  *ptr = '\0';
1898  return result;
1899 }
Definition: werd.h:36
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
const char kUNLVReject
Definition: baseapi.cpp:88
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:305
uinT8 space()
Definition: werd.h:104
Definition: strngs.h:44
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
const int kLatinChs[]
Definition: baseapi.cpp:1791
const STRING & unichar_lengths() const
Definition: ratngs.h:532
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
WERD * word
Definition: pageres.h:175
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2385
REJMAP reject_map
Definition: pageres.h:271
const STRING & unichar_string() const
Definition: ratngs.h:525
const char kTesseractReject
Definition: baseapi.cpp:86
inT32 length() const
Definition: strngs.cpp:196
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
const int kUniChs[]
Definition: baseapi.cpp:1787
const char kUNLVSuspect
Definition: baseapi.cpp:90
Definition: werd.h:35
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
WERD_CHOICE * best_choice
Definition: pageres.h:219
const char * string() const
Definition: strngs.cpp:201
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1295 of file baseapi.cpp.

1295  {
1296  if (tesseract_ == NULL ||
1297  (!recognition_done_ && Recognize(NULL) < 0))
1298  return NULL;
1299  STRING text("");
1300  ResultIterator *it = GetIterator();
1301  do {
1302  if (it->Empty(RIL_PARA)) continue;
1303  char *para_text = it->GetUTF8Text(RIL_PARA);
1304  text += para_text;
1305  delete []para_text;
1306  } while (it->Next(RIL_PARA));
1307  char* result = new char[text.length() + 1];
1308  strncpy(result, text.string(), text.length() + 1);
1309  delete it;
1310  return result;
1311 }
ResultIterator * GetIterator()
Definition: baseapi.cpp:1268
Definition: strngs.h:44
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 617 of file baseapi.cpp.

617  {
618  return GetComponentImages(RIL_WORD, true, pixa, NULL);
619 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:639
void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 763 of file baseapi.h.

763 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:878
bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 2161 of file baseapi.cpp.

2161  {
2162  return tesseract_->unicharset.contains_unichar(utf8_character);
2163 }
UNICHARSET unicharset
Definition: ccutil.h:70
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 2157 of file baseapi.cpp.

2157  {
2158  return tesseract_->getDict().valid_word(word);
2159 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:730
TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2526 of file baseapi.cpp.

2526  {
2527  int width = pixGetWidth(pix);
2528  int height = pixGetHeight(pix);
2529  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2530 
2531  // Create C_BLOBs from the page
2532  extract_edges(pix, &block);
2533 
2534  // Merge all C_BLOBs
2535  C_BLOB_LIST *list = block.blob_list();
2536  C_BLOB_IT c_blob_it(list);
2537  if (c_blob_it.empty())
2538  return NULL;
2539  // Move all the outlines to the first blob.
2540  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2541  for (c_blob_it.forward();
2542  !c_blob_it.at_first();
2543  c_blob_it.forward()) {
2544  C_BLOB *c_blob = c_blob_it.data();
2545  ol_it.add_list_after(c_blob->out_list());
2546  }
2547  // Convert the first blob to the output TBLOB.
2548  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2549 }
#define TRUE
Definition: capi.h:45
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Definition: ocrblock.h:30
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2509 of file baseapi.cpp.

2512  {
2513  inT32 xstarts[] = {-32000};
2514  double quad_coeffs[] = {0, 0, baseline};
2515  return new ROW(1,
2516  xstarts,
2517  quad_coeffs,
2518  xheight,
2519  ascender - (baseline + xheight),
2520  descender - baseline,
2521  0,
2522  0);
2523 }
Definition: ocrrow.h:32
int inT32
Definition: host.h:102
int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1970 of file baseapi.cpp.

1970  {
1971  int* conf = AllWordConfidences();
1972  if (!conf) return 0;
1973  int sum = 0;
1974  int *pt = conf;
1975  while (*pt >= 0) sum += *pt++;
1976  if (pt != conf) sum /= pt - conf;
1977  delete [] conf;
1978  return sum;
1979 }
void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2556 of file baseapi.cpp.

2556  {
2557  TBOX box = tblob->bounding_box();
2558  float x_center = (box.left() + box.right()) / 2.0f;
2559  float baseline = row->base_line(x_center);
2560  float scale = kBlnXHeight / row->x_height();
2561  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2562  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2563 }
float x_height() const
Definition: ocrrow.h:61
TBOX bounding_box() const
Definition: blobs.cpp:482
float base_line(float xpos) const
Definition: ocrrow.h:56
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
const int kBlnXHeight
Definition: normalis.h:28
const int kBlnBaselineOffset
Definition: normalis.h:29
int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2863 of file baseapi.cpp.

2863  {
2864  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2865 }
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:410
OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 761 of file baseapi.h.

761 { return last_oem_requested_; }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:876
bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1186 of file baseapi.cpp.

1188  {
1189  PERF_COUNT_START("ProcessPage")
1191  SetImage(pix);
1192  bool failed = false;
1193 
1194  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1195  // Disabled character recognition
1196  PageIterator* it = AnalyseLayout();
1197 
1198  if (it == NULL) {
1199  failed = true;
1200  } else {
1201  delete it;
1202  }
1204  failed = FindLines() != 0;
1205  } else if (timeout_millisec > 0) {
1206  // Running with a timeout.
1207  ETEXT_DESC monitor;
1208  monitor.cancel = NULL;
1209  monitor.cancel_this = NULL;
1210  monitor.set_deadline_msecs(timeout_millisec);
1211 
1212  // Now run the main recognition.
1213  failed = Recognize(&monitor) < 0;
1214  } else {
1215  // Normal layout and character recognition with no timeout.
1216  failed = Recognize(NULL) < 0;
1217  }
1218 
1220 #ifndef ANDROID_BUILD
1221  Pix* page_pix = GetThresholdedImage();
1222  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1223 #endif // ANDROID_BUILD
1224  }
1225 
1226  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1227  // Save current config variables before switching modes.
1228  FILE* fp = fopen(kOldVarsFile, "wb");
1229  PrintVariables(fp);
1230  fclose(fp);
1231  // Switch to alternate mode for retry.
1232  ReadConfigFile(retry_config);
1233  SetImage(pix);
1234  Recognize(NULL);
1235  // Restore saved config variables.
1237  }
1238 
1239  if (renderer && !failed) {
1240  failed = !renderer->AddImage(this);
1241  }
1242 
1244  return !failed;
1245 }
const char * kOldVarsFile
Definition: baseapi.cpp:99
void * cancel_this
called whenever progress increases
Definition: ocrclass.h:127
Orientation and script detection only.
Definition: publictypes.h:152
#define PERF_COUNT_END
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
void SetInputName(const char *name)
Definition: baseapi.cpp:199
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:521
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:444
PageIterator * AnalyseLayout()
Definition: baseapi.cpp:809
Pix * GetThresholdedImage()
Definition: baseapi.cpp:569
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:125
#define PERF_COUNT_START(FUNCT_NAME)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:262
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:829
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:146
bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not NULL, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1068 of file baseapi.cpp.

1070  {
1071  bool result =
1072  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1073  if (result) {
1076  tprintf("Write of TR file failed: %s\n", output_file_->string());
1077  return false;
1078  }
1079  }
1080  return result;
1081 }
#define tprintf(...)
Definition: tprintf.h:31
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:873
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1094
const char * string() const
Definition: strngs.cpp:201
bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1094 of file baseapi.cpp.

1097  {
1098  PERF_COUNT_START("ProcessPages")
1099  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1100  if (stdInput) {
1101 #ifdef WIN32
1102  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1103  tprintf("ERROR: cin to binary: %s", strerror(errno));
1104 #endif // WIN32
1105  }
1106 
1107  if (stream_filelist) {
1108  return ProcessPagesFileList(stdin, NULL, retry_config,
1109  timeout_millisec, renderer,
1111  }
1112 
1113  // At this point we are officially in autodection territory.
1114  // That means any data in stdin must be buffered, to make it
1115  // seekable.
1116  std::string buf;
1117  const l_uint8 *data = NULL;
1118  if (stdInput) {
1119  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1120  (std::istreambuf_iterator<char>()));
1121  data = reinterpret_cast<const l_uint8 *>(buf.data());
1122  }
1123 
1124  // Here is our autodetection
1125  int format;
1126  int r = (stdInput) ?
1127  findFileFormatBuffer(data, &format) :
1128  findFileFormat(filename, &format);
1129 
1130  // Maybe we have a filelist
1131  if (r != 0 || format == IFF_UNKNOWN) {
1132  STRING s;
1133  if (stdInput) {
1134  s = buf.c_str();
1135  } else {
1136  std::ifstream t(filename);
1137  std::string u((std::istreambuf_iterator<char>(t)),
1138  std::istreambuf_iterator<char>());
1139  s = u.c_str();
1140  }
1141  return ProcessPagesFileList(NULL, &s, retry_config,
1142  timeout_millisec, renderer,
1144  }
1145 
1146  // Maybe we have a TIFF which is potentially multipage
1147  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1148  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1149  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1150  format == IFF_TIFF_ZIP);
1151 
1152  // Fail early if we can, before producing any output
1153  Pix *pix = NULL;
1154  if (!tiff) {
1155  pix = (stdInput) ? pixReadMem(data, buf.size()) : pixRead(filename);
1156  if (pix == NULL) {
1157  return false;
1158  }
1159  }
1160 
1161  // Begin the output
1162  if (renderer && !renderer->BeginDocument(unknown_title_)) {
1163  pixDestroy(&pix);
1164  return false;
1165  }
1166 
1167  // Produce output
1168  r = (tiff) ?
1169  ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1170  timeout_millisec, renderer,
1172  ProcessPage(pix, 0, filename, retry_config,
1173  timeout_millisec, renderer);
1174 
1175  // Clean up memory as needed
1176  pixDestroy(&pix);
1177 
1178  // End the output
1179  if (!r || (renderer && !renderer->EndDocument())) {
1180  return false;
1181  }
1183  return true;
1184 }
Definition: strngs.h:44
#define PERF_COUNT_END
#define tprintf(...)
Definition: tprintf.h:31
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1186
const char * c_str() const
Definition: strngs.cpp:212
#define PERF_COUNT_START(FUNCT_NAME)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
bool stream_filelist
Definition: baseapi.cpp:79
int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 829 of file baseapi.cpp.

829  {
830  if (tesseract_ == NULL)
831  return -1;
832  if (FindLines() != 0)
833  return -1;
834  delete page_res_;
835  if (block_list_->empty()) {
836  page_res_ = new PAGE_RES(false, block_list_,
838  return 0; // Empty page.
839  }
840 
842  recognition_done_ = true;
847  } else {
848  // TODO(rays) LSTM here.
849  page_res_ = new PAGE_RES(false,
851  }
852  if (page_res_ == NULL) {
853  return -1;
854  }
857  return 0;
858  }
859 
860  if (truth_cb_ != NULL) {
861  tesseract_->wordrec_run_blamer.set_value(true);
862  PageIterator *page_it = new PageIterator(
867  image_height_, page_it, this->tesseract()->pix_grey());
868  delete page_it;
869  }
870 
871  int result = 0;
873  #ifndef GRAPHICS_DISABLED
875  #endif // GRAPHICS_DISABLED
876  // The page_res is invalid after an interactive session, so cleanup
877  // in a way that lets us continue to the next page without crashing.
878  delete page_res_;
879  page_res_ = NULL;
880  return -1;
882  STRING fontname;
883  ExtractFontName(*output_file_, &fontname);
885  } else if (tesseract_->tessedit_ambigs_training) {
886  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
887  // OCR the page segmented into words by tesseract.
889  *input_file_, page_res_, monitor, training_output_file);
890  fclose(training_output_file);
891  } else {
892  // Now run the main recognition.
893  bool wait_for_text = true;
894  GetBoolVariable("paragraph_text_based", &wait_for_text);
895  if (!wait_for_text) DetectParagraphs(false);
896  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
897  if (wait_for_text) DetectParagraphs(true);
898  } else {
899  result = -1;
900  }
901  }
902  return result;
903 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
virtual void Run(A1, A2, A3, A4)=0
Tesseract * tesseract() const
Definition: baseapi.h:759
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
Definition: strngs.h:44
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
TruthCallback * truth_cb_
Definition: baseapi.h:878
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
bool wordrec_run_blamer
Definition: wordrec.h:168
Pix * pix_grey() const
Dict & getDict()
Definition: classify.h:65
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
FILE * init_recog_training(const STRING &fname)
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2638
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:873
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:293
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
bool tessedit_resegment_from_line_boxes
STRING * input_file_
Name used by training code.
Definition: baseapi.h:872
int GetScaledYResolution() const
Definition: thresholder.h:93
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:234
int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 906 of file baseapi.cpp.

906  {
907  if (tesseract_ == NULL)
908  return -1;
909  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
910  tprintf("Please call SetImage before attempting recognition.");
911  return -1;
912  }
913  if (page_res_ != NULL)
914  ClearResults();
915  if (FindLines() != 0)
916  return -1;
917  // Additional conditions under which chopper test cannot be run
918  if (tesseract_->interactive_display_mode) return -1;
919 
920  recognition_done_ = true;
921 
922  page_res_ = new PAGE_RES(false, block_list_,
924 
925  PAGE_RES_IT page_res_it(page_res_);
926 
927  while (page_res_it.word() != NULL) {
928  WERD_RES *word_res = page_res_it.word();
929  GenericVector<TBOX> boxes;
930  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
931  page_res_it.row()->row, word_res);
932  page_res_it.forward();
933  }
934  return 0;
935 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:870
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:877
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2294
WERD * word
Definition: pageres.h:175
#define tprintf(...)
Definition: tprintf.h:31
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:871
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2829 of file baseapi.cpp.

2833  {
2834  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2835  tesseract_->AdaptiveClassifier(blob, choices);
2836  BLOB_CHOICE_IT choices_it(choices);
2837  int& index = *num_matches_returned;
2838  index = 0;
2839  for (choices_it.mark_cycle_pt();
2840  !choices_it.cycled_list() && index < num_max_matches;
2841  choices_it.forward()) {
2842  BLOB_CHOICE* choice = choices_it.data();
2843  unichar_ids[index] = choice->unichar_id();
2844  ratings[index] = choice->rating();
2845  ++index;
2846  }
2847  *num_matches_returned = index;
2848  delete choices;
2849 }
float rating() const
Definition: ratngs.h:79
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2426 of file baseapi.cpp.

2426  {
2427  tesseract_->min_orientation_margin.set_value(margin);
2428 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 2203 of file baseapi.cpp.

2203  {
2204  if (tesseract_ != NULL) {
2206  }
2207 }
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:354
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 2229 of file baseapi.cpp.

2229  {
2230  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2231 }
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:420
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Copies the image buffer and converts to Pix. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 521 of file baseapi.cpp.

523  {
524  if (InternalSetImage()) {
525  thresholder_->SetImage(imagedata, width, height,
526  bytes_per_pixel, bytes_per_line);
528  }
529 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2234
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:938
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract takes its own copy of the image, so it need not persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. Tesseract uses Pix as its internal representation and it is therefore more efficient to provide a Pix directly.

Definition at line 546 of file baseapi.cpp.

546  {
547  if (InternalSetImage()) {
548  thresholder_->SetImage(pix);
550  }
551 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2234
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:938
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 2217 of file baseapi.cpp.

2217  {
2218  if (tesseract_ != NULL) {
2220  // Set it for the sublangs too.
2221  int num_subs = tesseract_->num_sub_langs();
2222  for (int i = 0; i < num_subs; ++i) {
2224  }
2225  }
2226 }
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:364
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const
void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 558 of file baseapi.cpp.

558  {
559  if (thresholder_ == NULL)
560  return;
561  thresholder_->SetRectangle(left, top, width, height);
562  ClearResults();
563 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
void SetRectangle(int left, int top, int width, int height)
void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 531 of file baseapi.cpp.

531  {
532  if (thresholder_)
534  else
535  tprintf("Please call SetImage before SetSourceResolution.\n");
536 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
#define tprintf(...)
Definition: tprintf.h:31
void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 375 of file baseapi.h.

375  {
376  delete thresholder_;
377  thresholder_ = thresholder;
378  ClearResults();
379  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:868
Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 759 of file baseapi.h.

759 { return tesseract_; }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:865