tesseract  3.04.01
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Advanced API

Functions

void tesseract::TessBaseAPI::SetImage (const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
 
void tesseract::TessBaseAPI::SetImage (Pix *pix)
 
void tesseract::TessBaseAPI::SetSourceResolution (int ppi)
 
void tesseract::TessBaseAPI::SetRectangle (int left, int top, int width, int height)
 
void tesseract::TessBaseAPI::SetThresholder (ImageThresholder *thresholder)
 
Pix * tesseract::TessBaseAPI::GetThresholdedImage ()
 
Boxa * tesseract::TessBaseAPI::GetRegions (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetTextlines (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetStrips (Pixa **pixa, int **blockids)
 
Boxa * tesseract::TessBaseAPI::GetWords (Pixa **pixa)
 
Boxa * tesseract::TessBaseAPI::GetConnectedComponents (Pixa **cc)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
 
Boxa * tesseract::TessBaseAPI::GetComponentImages (const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
 
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor () const
 
void tesseract::TessBaseAPI::DumpPGM (const char *filename)
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ()
 
PageIterator * tesseract::TessBaseAPI::AnalyseLayout (bool merge_similar_words)
 
int tesseract::TessBaseAPI::Recognize (ETEXT_DESC *monitor)
 
int tesseract::TessBaseAPI::RecognizeForChopTest (ETEXT_DESC *monitor)
 
bool tesseract::TessBaseAPI::ProcessPages (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPagesInternal (const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
bool tesseract::TessBaseAPI::ProcessPage (Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
 
ResultIterator * tesseract::TessBaseAPI::GetIterator ()
 
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ()
 
char * tesseract::TessBaseAPI::GetUTF8Text ()
 
char * tesseract::TessBaseAPI::GetHOCRText (int page_number)
 
char * tesseract::TessBaseAPI::GetBoxText (int page_number)
 
char * tesseract::TessBaseAPI::GetUNLVText ()
 
char * tesseract::TessBaseAPI::GetOsdText (int page_number)
 
int tesseract::TessBaseAPI::MeanTextConf ()
 
int * tesseract::TessBaseAPI::AllWordConfidences ()
 
bool tesseract::TessBaseAPI::AdaptToWordStr (PageSegMode mode, const char *wordstr)
 
void tesseract::TessBaseAPI::Clear ()
 
void tesseract::TessBaseAPI::End ()
 
static void tesseract::TessBaseAPI::ClearPersistentCache ()
 
int tesseract::TessBaseAPI::IsValidWord (const char *word)
 
bool tesseract::TessBaseAPI::IsValidCharacter (const char *utf8_character)
 
bool tesseract::TessBaseAPI::GetTextDirection (int *out_offset, float *out_slope)
 
void tesseract::TessBaseAPI::SetDictFunc (DictFunc f)
 
void tesseract::TessBaseAPI::SetProbabilityInContextFunc (ProbabilityInContextFunc f)
 
void tesseract::TessBaseAPI::SetFillLatticeFunc (FillLatticeFunc f)
 
bool tesseract::TessBaseAPI::DetectOS (OSResults *)
 
void tesseract::TessBaseAPI::GetFeaturesForBlob (TBLOB *blob, INT_FEATURE_STRUCT *int_features, int *num_features, int *feature_outline_index)
 
static ROWtesseract::TessBaseAPI::FindRowForBox (BLOCK_LIST *blocks, int left, int top, int right, int bottom)
 
void tesseract::TessBaseAPI::RunAdaptiveClassifier (TBLOB *blob, int num_max_matches, int *unichar_ids, float *ratings, int *num_matches_returned)
 
const char * tesseract::TessBaseAPI::GetUnichar (int unichar_id)
 
const Dawg * tesseract::TessBaseAPI::GetDawg (int i) const
 
int tesseract::TessBaseAPI::NumDawgs () const
 
static ROWtesseract::TessBaseAPI::MakeTessOCRRow (float baseline, float xheight, float descender, float ascender)
 
static TBLOBtesseract::TessBaseAPI::MakeTBLOB (Pix *pix)
 
static void tesseract::TessBaseAPI::NormalizeTBLOB (TBLOB *tblob, ROW *row, bool numeric_mode)
 
Tesseract * tesseract::TessBaseAPI::tesseract () const
 
OcrEngineMode tesseract::TessBaseAPI::oem () const
 
void tesseract::TessBaseAPI::InitTruthCallback (TruthCallback *cb)
 
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext () const
 
void tesseract::TessBaseAPI::set_min_orientation_margin (double margin)
 
void tesseract::TessBaseAPI::GetBlockTextOrientations (int **block_orientation, bool **vertical_writing)
 
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ()
 
static void tesseract::TessBaseAPI::DeleteBlockList (BLOCK_LIST *block_list)
 

Detailed Description

The following methods break TesseractRect into pieces, so you can get hold of the thresholded image, get the text in different formats, get bounding boxes, confidences etc.

Function Documentation

bool tesseract::TessBaseAPI::AdaptToWordStr ( PageSegMode  mode,
const char *  wordstr 
)

Applies the given word to the adaptive classifier if possible. The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can tell the boundaries of the graphemes. Assumes that SetImage/SetRectangle have been used to set the image to the given word. The mode arg should be PSM_SINGLE_WORD or PSM_CIRCLE_WORD, as that will be used to control layout analysis. The currently set PageSegMode is preserved. Returns false if adaption was not possible for some reason.

Definition at line 1853 of file baseapi.cpp.

1853  {
1854  int debug = 0;
1855  GetIntVariable("applybox_debug", &debug);
1856  bool success = true;
1857  PageSegMode current_psm = GetPageSegMode();
1859  SetVariable("classify_enable_learning", "0");
1860  char* text = GetUTF8Text();
1861  if (debug) {
1862  tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
1863  }
1864  if (text != NULL) {
1865  PAGE_RES_IT it(page_res_);
1866  WERD_RES* word_res = it.word();
1867  if (word_res != NULL) {
1868  word_res->word->set_text(wordstr);
1869  } else {
1870  success = false;
1871  }
1872  // Check to see if text matches wordstr.
1873  int w = 0;
1874  int t = 0;
1875  for (t = 0; text[t] != '\0'; ++t) {
1876  if (text[t] == '\n' || text[t] == ' ')
1877  continue;
1878  while (wordstr[w] != '\0' && wordstr[w] == ' ')
1879  ++w;
1880  if (text[t] != wordstr[w])
1881  break;
1882  ++w;
1883  }
1884  if (text[t] != '\0' || wordstr[w] != '\0') {
1885  // No match.
1886  delete page_res_;
1887  GenericVector<TBOX> boxes;
1891  PAGE_RES_IT pr_it(page_res_);
1892  if (pr_it.word() == NULL)
1893  success = false;
1894  else
1895  word_res = pr_it.word();
1896  } else {
1897  word_res->BestChoiceToCorrectText();
1898  }
1899  if (success) {
1900  tesseract_->EnableLearning = true;
1901  tesseract_->LearnWord(NULL, word_res);
1902  }
1903  delete [] text;
1904  } else {
1905  success = false;
1906  }
1907  SetPageSegMode(current_psm);
1908  return success;
1909 }
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:228
void set_text(const char *new_text)
Definition: werd.h:126
#define tprintf(...)
Definition: tprintf.h:31
bool SetVariable(const char *name, const char *value)
Definition: baseapi.cpp:216
PageSegMode GetPageSegMode() const
Definition: baseapi.cpp:467
void TidyUp(PAGE_RES *page_res)
Definition: applybox.cpp:706
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
CMD_EVENTS mode
Definition: pgedit.cpp:116
void BestChoiceToCorrectText()
Definition: pageres.cpp:917
WERD * word
Definition: pageres.h:175
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Definition: applybox.cpp:217
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void SetPageSegMode(PageSegMode mode)
Definition: baseapi.cpp:460
void ReSegmentByClassification(PAGE_RES *page_res)
Definition: applybox.cpp:509
int * tesseract::TessBaseAPI::AllWordConfidences ( )

Returns all word confidences (between 0 and 100) in an array, terminated by -1. The calling function must delete [] after use. The number of confidences should correspond to the number of space- delimited words in GetUTF8Text.

Returns an array of all word confidences, terminated by -1.

Definition at line 1819 of file baseapi.cpp.

1819  {
1820  if (tesseract_ == NULL ||
1821  (!recognition_done_ && Recognize(NULL) < 0))
1822  return NULL;
1823  int n_word = 0;
1824  PAGE_RES_IT res_it(page_res_);
1825  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward())
1826  n_word++;
1827 
1828  int* conf = new int[n_word+1];
1829  n_word = 0;
1830  for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) {
1831  WERD_RES *word = res_it.word();
1832  WERD_CHOICE* choice = word->best_choice;
1833  int w_conf = static_cast<int>(100 + 5 * choice->certainty());
1834  // This is the eq for converting Tesseract confidence to 1..100
1835  if (w_conf < 0) w_conf = 0;
1836  if (w_conf > 100) w_conf = 100;
1837  conf[n_word++] = w_conf;
1838  }
1839  conf[n_word] = -1;
1840  return conf;
1841 }
float certainty() const
Definition: ratngs.h:327
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
WERD * word
Definition: pageres.h:175
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
PageIterator* tesseract::TessBaseAPI::AnalyseLayout ( )
inline

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 500 of file baseapi.h.

500  {
501  return AnalyseLayout(false);
502  }
PageIterator * AnalyseLayout()
Definition: baseapi.h:500
PageIterator * tesseract::TessBaseAPI::AnalyseLayout ( bool  merge_similar_words)

Runs page layout analysis in the mode set by SetPageSegMode. May optionally be called prior to Recognize to get access to just the page layout results. Returns an iterator to the results. If merge_similar_words is true, words are combined where suitable for use with a line recognizer. Use if you want to use AnalyseLayout to find the textlines, and then want to process textline fragments with an external line recognizer. Returns NULL on error or an empty page. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 812 of file baseapi.cpp.

812  {
813  if (FindLines() == 0) {
814  if (block_list_->empty())
815  return NULL; // The page was empty.
816  page_res_ = new PAGE_RES(merge_similar_words, block_list_, NULL);
817  DetectParagraphs(false);
818  return new PageIterator(
822  }
823  return NULL;
824 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2476
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::Clear ( )

Free up recognition results and any stored image data, without actually freeing any recognition data that would be time-consuming to reload. Afterwards, you must call SetImage or TesseractRect before doing any Recognize or Get* operation.

Definition at line 1917 of file baseapi.cpp.

1917  {
1918  if (thresholder_ != NULL)
1919  thresholder_->Clear();
1920  ClearResults();
1921  SetInputImage(NULL);
1922 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:936
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:45
void tesseract::TessBaseAPI::ClearPersistentCache ( )
static

Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.

Definition at line 1989 of file baseapi.cpp.

1989  {
1991 }
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:186
void DeleteUnusedDawgs()
Definition: dawg_cache.h:46
void tesseract::TessBaseAPI::DeleteBlockList ( BLOCK_LIST *  block_list)
static

Delete a block list. This is to keep BLOCK_LIST pointer opaque and let go of including the other headers.

Definition at line 2342 of file baseapi.cpp.

2342  {
2343  delete block_list;
2344 }
bool tesseract::TessBaseAPI::DetectOS ( OSResults osr)

Estimates the Orientation And Script of the image.

Returns
true if the image was processed successfully.

Estimates the Orientation And Script of the image. Returns true if the image was processed successfully.

Definition at line 2253 of file baseapi.cpp.

2253  {
2254  if (tesseract_ == NULL)
2255  return false;
2256  ClearResults();
2257  if (tesseract_->pix_binary() == NULL)
2259  if (input_file_ == NULL)
2260  input_file_ = new STRING(kInputFile);
2262 }
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
Definition: osdetect.cpp:189
const char * kInputFile
Definition: baseapi.cpp:97
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2091
Definition: strngs.h:44
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
STRING * input_file_
Name used by training code.
Definition: baseapi.h:858
void tesseract::TessBaseAPI::DumpPGM ( const char *  filename)

Dump the internal binary image to a PGM file.

Deprecated:
Use GetThresholdedImage and write the image using pixWrite instead if possible.

Dump the internal binary image to a PGM file.

Definition at line 732 of file baseapi.cpp.

732  {
733  if (tesseract_ == NULL)
734  return;
735  FILE *fp = fopen(filename, "wb");
736  Pix* pix = tesseract_->pix_binary();
737  int width = pixGetWidth(pix);
738  int height = pixGetHeight(pix);
739  l_uint32* data = pixGetData(pix);
740  fprintf(fp, "P5 %d %d 255\n", width, height);
741  for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) {
742  for (int x = 0; x < width; ++x) {
743  uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255;
744  fwrite(&b, 1, 1, fp);
745  }
746  }
747  fclose(fp);
748 }
unsigned char uinT8
Definition: host.h:99
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::End ( )

Close down tesseract and free up all memory. End() is equivalent to destructing and reconstructing your TessBaseAPI. Once End() has been used, none of the other API functions may be used other than Init and anything declared above it in the class definition.

Definition at line 1930 of file baseapi.cpp.

1930  {
1931  if (thresholder_ != NULL) {
1932  delete thresholder_;
1933  thresholder_ = NULL;
1934  }
1935  if (page_res_ != NULL) {
1936  delete page_res_;
1937  page_res_ = NULL;
1938  }
1939  if (block_list_ != NULL) {
1940  delete block_list_;
1941  block_list_ = NULL;
1942  }
1943  if (paragraph_models_ != NULL) {
1945  delete paragraph_models_;
1946  paragraph_models_ = NULL;
1947  }
1948  if (tesseract_ != NULL) {
1949  delete tesseract_;
1950  if (osd_tesseract_ == tesseract_)
1951  osd_tesseract_ = NULL;
1952  tesseract_ = NULL;
1953  }
1954  if (osd_tesseract_ != NULL) {
1955  delete osd_tesseract_;
1956  osd_tesseract_ = NULL;
1957  }
1958  if (equ_detect_ != NULL) {
1959  delete equ_detect_;
1960  equ_detect_ = NULL;
1961  }
1962  if (input_file_ != NULL) {
1963  delete input_file_;
1964  input_file_ = NULL;
1965  }
1966  if (input_image_ != NULL) {
1967  pixDestroy(&input_image_);
1968  input_image_ = NULL;
1969  }
1970  if (output_file_ != NULL) {
1971  delete output_file_;
1972  output_file_ = NULL;
1973  }
1974  if (datapath_ != NULL) {
1975  delete datapath_;
1976  datapath_ = NULL;
1977  }
1978  if (language_ != NULL) {
1979  delete language_;
1980  language_ = NULL;
1981  }
1982 }
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:853
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:855
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:859
STRING * language_
Last initialized language.
Definition: baseapi.h:862
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:860
void delete_data_pointers()
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:861
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:852
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
STRING * input_file_
Name used by training code.
Definition: baseapi.h:858
BLOCK_LIST * tesseract::TessBaseAPI::FindLinesCreateBlockList ( )

Find lines from the image making the BLOCK_LIST.

Definition at line 2330 of file baseapi.cpp.

2330  {
2331  FindLines();
2332  BLOCK_LIST* result = block_list_;
2333  block_list_ = NULL;
2334  return result;
2335 }
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
ROW * tesseract::TessBaseAPI::FindRowForBox ( BLOCK_LIST *  blocks,
int  left,
int  top,
int  right,
int  bottom 
)
static

This method returns the row to which a box of specified dimensions would belong. If no good match is found, it returns NULL.

Definition at line 2642 of file baseapi.cpp.

2643  {
2644  TBOX box(left, bottom, right, top);
2645  BLOCK_IT b_it(blocks);
2646  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2647  BLOCK* block = b_it.data();
2648  if (!box.major_overlap(block->bounding_box()))
2649  continue;
2650  ROW_IT r_it(block->row_list());
2651  for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2652  ROW* row = r_it.data();
2653  if (!box.major_overlap(row->bounding_box()))
2654  continue;
2655  WERD_IT w_it(row->word_list());
2656  for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2657  WERD* word = w_it.data();
2658  if (box.major_overlap(word->bounding_box()))
2659  return row;
2660  }
2661  }
2662  }
2663  return NULL;
2664 }
TBOX bounding_box() const
Definition: werd.cpp:160
Definition: werd.h:60
Definition: ocrrow.h:32
Definition: ocrblock.h:30
TBOX bounding_box() const
Definition: ocrrow.h:85
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:67
Definition: rect.h:30
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
WERD_LIST * word_list()
Definition: ocrrow.h:52
void tesseract::TessBaseAPI::GetBlockTextOrientations ( int **  block_orientation,
bool **  vertical_writing 
)

Return text orientation of each block as determined by an earlier run of layout analysis.

Return text orientation of each block as determined in an earlier page layout analysis operation. Orientation is returned as the number of ccw 90-degree rotations (in [0..3]) required to make the text in the block upright (readable). Note that this may not necessary be the block orientation preferred for recognition (such as the case of vertical CJK text).

Also returns whether the text in the block is believed to have vertical writing direction (when in an upright page orientation).

The returned array is of length equal to the number of text blocks, which may be less than the total number of blocks. The ordering is intended to be consistent with GetTextLines().

Definition at line 2282 of file baseapi.cpp.

2283  {
2284  delete[] *block_orientation;
2285  *block_orientation = NULL;
2286  delete[] *vertical_writing;
2287  *vertical_writing = NULL;
2288  BLOCK_IT block_it(block_list_);
2289 
2290  block_it.move_to_first();
2291  int num_blocks = 0;
2292  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2293  if (!block_it.data()->poly_block()->IsText()) {
2294  continue;
2295  }
2296  ++num_blocks;
2297  }
2298  if (!num_blocks) {
2299  tprintf("WARNING: Found no blocks\n");
2300  return;
2301  }
2302  *block_orientation = new int[num_blocks];
2303  *vertical_writing = new bool[num_blocks];
2304  block_it.move_to_first();
2305  int i = 0;
2306  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2307  block_it.forward()) {
2308  if (!block_it.data()->poly_block()->IsText()) {
2309  continue;
2310  }
2311  FCOORD re_rotation = block_it.data()->re_rotation();
2312  float re_theta = re_rotation.angle();
2313  FCOORD classify_rotation = block_it.data()->classify_rotation();
2314  float classify_theta = classify_rotation.angle();
2315  double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
2316  if (rot_theta < 0) rot_theta += 4;
2317  int num_rotations = static_cast<int>(rot_theta + 0.5);
2318  (*block_orientation)[i] = num_rotations;
2319  // The classify_rotation is non-zero only if the text has vertical
2320  // writing direction.
2321  (*vertical_writing)[i] = classify_rotation.y() != 0.0f;
2322  ++i;
2323  }
2324 }
#define PI
Definition: const.h:19
#define tprintf(...)
Definition: tprintf.h:31
float angle() const
find angle
Definition: points.h:249
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
float y() const
Definition: points.h:212
Definition: points.h:189
char * tesseract::TessBaseAPI::GetBoxText ( int  page_number)

The recognized text is returned as a char* which is coded in the same format as a box file used in training. Returned string must be freed with the delete [] operator. Constructs coordinates in the original image - not just the rectangle. page_number is a 0-based page index that will appear in the box file.

The recognized text is returned as a char* which is coded as a UTF8 box file and must be freed with the delete [] operator. page_number is a 0-base page index that will appear in the box file.

Definition at line 1609 of file baseapi.cpp.

1609  {
1610  if (tesseract_ == NULL ||
1611  (!recognition_done_ && Recognize(NULL) < 0))
1612  return NULL;
1613  int blob_count;
1614  int utf8_length = TextLength(&blob_count);
1615  int total_length = blob_count * kBytesPerBoxFileLine + utf8_length +
1617  char* result = new char[total_length];
1618  strcpy(result, "\0");
1619  int output_length = 0;
1620  LTRResultIterator* it = GetLTRIterator();
1621  do {
1622  int left, top, right, bottom;
1623  if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) {
1624  char* text = it->GetUTF8Text(RIL_SYMBOL);
1625  // Tesseract uses space for recognition failure. Fix to a reject
1626  // character, kTesseractReject so we don't create illegal box files.
1627  for (int i = 0; text[i] != '\0'; ++i) {
1628  if (text[i] == ' ')
1629  text[i] = kTesseractReject;
1630  }
1631  snprintf(result + output_length, total_length - output_length,
1632  "%s %d %d %d %d %d\n",
1633  text, left, image_height_ - bottom,
1634  right, image_height_ - top, page_number);
1635  output_length += strlen(result + output_length);
1636  delete [] text;
1637  // Just in case...
1638  if (output_length + kMaxBytesPerLine > total_length)
1639  break;
1640  }
1641  } while (it->Next(RIL_SYMBOL));
1642  delete it;
1643  return result;
1644 }
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2223
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
const char kTesseractReject
Definition: baseapi.cpp:88
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
const int kMaxBytesPerLine
Definition: baseapi.cpp:1601
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1259
const int kBytesPerBoxFileLine
Definition: baseapi.cpp:1592
Boxa * tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If blockids is not NULL, the paragraph-id of each component with its block is also returned as an array of one element per component. delete [] after use. If raw_image is true, then portions of the original image are extracted instead of the thresholded image and padded with raw_padding. If text_only is true, then only text components are returned.

Get the given level kind of components (block, textline, word etc.) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each component is also returned as an array of one element per component. delete [] after use. If text_only is true, then only text components are returned.

Definition at line 642 of file baseapi.cpp.

646  {
647  PageIterator* page_it = GetIterator();
648  if (page_it == NULL)
649  page_it = AnalyseLayout();
650  if (page_it == NULL)
651  return NULL; // Failed.
652 
653  // Count the components to get a size for the arrays.
654  int component_count = 0;
655  int left, top, right, bottom;
656 
657  TessResultCallback<bool>* get_bbox = NULL;
658  if (raw_image) {
659  // Get bounding box in original raw image with padding.
661  level, raw_padding,
662  &left, &top, &right, &bottom);
663  } else {
664  // Get bounding box from binarized imaged. Note that this could be
665  // differently scaled from the original image.
666  get_bbox = NewPermanentTessCallback(page_it,
668  level, &left, &top, &right, &bottom);
669  }
670  do {
671  if (get_bbox->Run() &&
672  (!text_only || PTIsTextType(page_it->BlockType())))
673  ++component_count;
674  } while (page_it->Next(level));
675 
676  Boxa* boxa = boxaCreate(component_count);
677  if (pixa != NULL)
678  *pixa = pixaCreate(component_count);
679  if (blockids != NULL)
680  *blockids = new int[component_count];
681  if (paraids != NULL)
682  *paraids = new int[component_count];
683 
684  int blockid = 0;
685  int paraid = 0;
686  int component_index = 0;
687  page_it->Begin();
688  do {
689  if (get_bbox->Run() &&
690  (!text_only || PTIsTextType(page_it->BlockType()))) {
691  Box* lbox = boxCreate(left, top, right - left, bottom - top);
692  boxaAddBox(boxa, lbox, L_INSERT);
693  if (pixa != NULL) {
694  Pix* pix = NULL;
695  if (raw_image) {
696  pix = page_it->GetImage(level, raw_padding, input_image_,
697  &left, &top);
698  } else {
699  pix = page_it->GetBinaryImage(level);
700  }
701  pixaAddPix(*pixa, pix, L_INSERT);
702  pixaAddBox(*pixa, lbox, L_CLONE);
703  }
704  if (paraids != NULL) {
705  (*paraids)[component_index] = paraid;
706  if (page_it->IsAtFinalElement(RIL_PARA, level))
707  ++paraid;
708  }
709  if (blockids != NULL) {
710  (*blockids)[component_index] = blockid;
711  if (page_it->IsAtFinalElement(RIL_BLOCK, level)) {
712  ++blockid;
713  paraid = 0;
714  }
715  }
716  ++component_index;
717  }
718  } while (page_it->Next(level));
719  delete page_it;
720  delete get_bbox;
721  return boxa;
722 }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
PageIterator * AnalyseLayout()
Definition: baseapi.h:500
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Pix * input_image_
Image used for searchable PDF.
Definition: baseapi.h:859
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
ResultIterator * GetIterator()
Definition: baseapi.cpp:1276
virtual R Run()=0
Boxa* tesseract::TessBaseAPI::GetComponentImages ( const PageIteratorLevel  level,
const bool  text_only,
Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 464 of file baseapi.h.

466  {
467  return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
468  }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
Boxa * tesseract::TessBaseAPI::GetConnectedComponents ( Pixa **  pixa)

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. Note: the caller is responsible for calling boxaDestroy() on the returned Boxa array and pixaDestroy() on cc array.

Gets the individual connected (text) components (created after pages segmentation step, but before recognition) as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 630 of file baseapi.cpp.

630  {
631  return GetComponentImages(RIL_SYMBOL, true, pixa, NULL);
632 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
CubeRecoContext * tesseract::TessBaseAPI::GetCubeRecoContext ( ) const

Return a pointer to underlying CubeRecoContext object if present.

Definition at line 2707 of file baseapi.cpp.

2707  {
2708  return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext();
2709 }
CubeRecoContext * GetCubeRecoContext()
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
const Dawg * tesseract::TessBaseAPI::GetDawg ( int  i) const

Return the pointer to the i-th dawg loaded into tesseract_ object.

Definition at line 2695 of file baseapi.cpp.

2695  {
2696  if (tesseract_ == NULL || i >= NumDawgs()) return NULL;
2697  return tesseract_->getDict().GetDawg(i);
2698 }
int NumDawgs() const
Definition: baseapi.cpp:2701
Dict & getDict()
Definition: classify.h:65
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
Definition: dict.h:406
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::GetFeaturesForBlob ( TBLOB blob,
INT_FEATURE_STRUCT int_features,
int *  num_features,
int *  feature_outline_index 
)

This method returns the features associated with the input image.

This method returns the features associated with the input blob.

Definition at line 2614 of file baseapi.cpp.

2617  {
2618  GenericVector<int> outline_counts;
2621  INT_FX_RESULT_STRUCT fx_info;
2622  tesseract_->ExtractFeatures(*blob, false, &bl_features,
2623  &cn_features, &fx_info, &outline_counts);
2624  if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) {
2625  *num_features = 0;
2626  return; // Feature extraction failed.
2627  }
2628  *num_features = cn_features.size();
2629  memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0]));
2630  // TODO(rays) Pass outline_counts back and simplify the calling code.
2631  if (feature_outline_index != NULL) {
2632  int f = 0;
2633  for (int i = 0; i < outline_counts.size(); ++i) {
2634  while (f < outline_counts[i])
2635  feature_outline_index[f++] = i;
2636  }
2637  }
2638 }
int size() const
Definition: genericvector.h:72
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:445
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
char * tesseract::TessBaseAPI::GetHOCRText ( int  page_number)

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based.

Make a HTML-formatted string with hOCR markup from the internal data structures. page_number is 0-based but will appear in the output as 1-based. Image name/input_file_ can be set by SetInputName before calling GetHOCRText STL removed from original patch submission and refactored by rays.

Definition at line 1428 of file baseapi.cpp.

1428  {
1429  if (tesseract_ == NULL ||
1430  (page_res_ == NULL && Recognize(NULL) < 0))
1431  return NULL;
1432 
1433  int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1434  int page_id = page_number + 1; // hOCR uses 1-based page numbers.
1435  bool font_info = false;
1436  GetBoolVariable("hocr_font_info", &font_info);
1437 
1438  STRING hocr_str("");
1439 
1440  if (input_file_ == NULL)
1441  SetInputName(NULL);
1442 
1443 #ifdef _WIN32
1444  // convert input name from ANSI encoding to utf-8
1445  int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1446  NULL, 0);
1447  wchar_t *uni16_str = new WCHAR[str16_len];
1448  str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1,
1449  uni16_str, str16_len);
1450  int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL,
1451  0, NULL, NULL);
1452  char *utf8_str = new char[utf8_len];
1453  WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str,
1454  utf8_len, NULL, NULL);
1455  *input_file_ = utf8_str;
1456  delete[] uni16_str;
1457  delete[] utf8_str;
1458 #endif
1459 
1460  hocr_str += " <div class='ocr_page'";
1461  AddIdTohOCR(&hocr_str, "page", page_id, -1);
1462  hocr_str += " title='image \"";
1463  if (input_file_) {
1464  hocr_str += HOcrEscape(input_file_->string());
1465  } else {
1466  hocr_str += "unknown";
1467  }
1468  hocr_str.add_str_int("\"; bbox ", rect_left_);
1469  hocr_str.add_str_int(" ", rect_top_);
1470  hocr_str.add_str_int(" ", rect_width_);
1471  hocr_str.add_str_int(" ", rect_height_);
1472  hocr_str.add_str_int("; ppageno ", page_number);
1473  hocr_str += "'>\n";
1474 
1475  ResultIterator *res_it = GetIterator();
1476  while (!res_it->Empty(RIL_BLOCK)) {
1477  if (res_it->Empty(RIL_WORD)) {
1478  res_it->Next(RIL_WORD);
1479  continue;
1480  }
1481 
1482  // Open any new block/paragraph/textline.
1483  if (res_it->IsAtBeginningOf(RIL_BLOCK)) {
1484  hocr_str += " <div class='ocr_carea'";
1485  AddIdTohOCR(&hocr_str, "block", page_id, bcnt);
1486  AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str);
1487  }
1488  if (res_it->IsAtBeginningOf(RIL_PARA)) {
1489  hocr_str += "\n <p class='ocr_par'";
1490  if (res_it->ParagraphIsLtr()) {
1491  hocr_str += " dir='ltr'";
1492  } else {
1493  hocr_str += " dir='rtl'";
1494  }
1495  AddIdTohOCR(&hocr_str, "par", page_id, pcnt);
1496  AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
1497  }
1498  if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
1499  hocr_str += "\n <span class='ocr_line'";
1500  AddIdTohOCR(&hocr_str, "line", page_id, lcnt);
1501  AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
1502  }
1503 
1504  // Now, process the word...
1505  hocr_str += "<span class='ocrx_word'";
1506  AddIdTohOCR(&hocr_str, "word", page_id, wcnt);
1507  int left, top, right, bottom;
1508  bool bold, italic, underlined, monospace, serif, smallcaps;
1509  int pointsize, font_id;
1510  const char *font_name;
1511  res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
1512  font_name = res_it->WordFontAttributes(&bold, &italic, &underlined,
1513  &monospace, &serif, &smallcaps,
1514  &pointsize, &font_id);
1515  hocr_str.add_str_int(" title='bbox ", left);
1516  hocr_str.add_str_int(" ", top);
1517  hocr_str.add_str_int(" ", right);
1518  hocr_str.add_str_int(" ", bottom);
1519  hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD));
1520  if (font_info) {
1521  if (font_name) {
1522  hocr_str += "; x_font ";
1523  hocr_str += HOcrEscape(font_name);
1524  }
1525  hocr_str.add_str_int("; x_fsize ", pointsize);
1526  }
1527  hocr_str += "'";
1528  if (res_it->WordRecognitionLanguage()) {
1529  hocr_str += " lang='";
1530  hocr_str += res_it->WordRecognitionLanguage();
1531  hocr_str += "'";
1532  }
1533  switch (res_it->WordDirection()) {
1534  case DIR_LEFT_TO_RIGHT: hocr_str += " dir='ltr'"; break;
1535  case DIR_RIGHT_TO_LEFT: hocr_str += " dir='rtl'"; break;
1536  default: // Do nothing.
1537  break;
1538  }
1539  hocr_str += ">";
1540  bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD);
1541  bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD);
1542  bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD);
1543  if (bold) hocr_str += "<strong>";
1544  if (italic) hocr_str += "<em>";
1545  do {
1546  const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL);
1547  if (grapheme && grapheme[0] != 0) {
1548  hocr_str += HOcrEscape(grapheme);
1549  }
1550  delete []grapheme;
1551  res_it->Next(RIL_SYMBOL);
1552  } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
1553  if (italic) hocr_str += "</em>";
1554  if (bold) hocr_str += "</strong>";
1555  hocr_str += "</span> ";
1556  wcnt++;
1557  // Close any ending block/paragraph/textline.
1558  if (last_word_in_line) {
1559  hocr_str += "\n </span>";
1560  lcnt++;
1561  }
1562  if (last_word_in_para) {
1563  hocr_str += "\n </p>\n";
1564  pcnt++;
1565  }
1566  if (last_word_in_block) {
1567  hocr_str += " </div>\n";
1568  bcnt++;
1569  }
1570  }
1571  hocr_str += " </div>\n";
1572 
1573  char *ret = new char[hocr_str.length() + 1];
1574  strcpy(ret, hocr_str.string());
1575  delete res_it;
1576  return ret;
1577 }
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
void SetInputName(const char *name)
Definition: baseapi.cpp:201
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
void add_str_int(const char *str, int number)
Definition: strngs.cpp:376
ResultIterator * GetIterator()
Definition: baseapi.cpp:1276
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2713
Definition: strngs.h:44
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
const char * string() const
Definition: strngs.cpp:193
STRING * input_file_
Name used by training code.
Definition: baseapi.h:858
ResultIterator * tesseract::TessBaseAPI::GetIterator ( )

Get a reading-order iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1276 of file baseapi.cpp.

1276  {
1277  if (tesseract_ == NULL || page_res_ == NULL)
1278  return NULL;
1279  return ResultIterator::StartOfParagraph(LTRResultIterator(
1283 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
MutableIterator * tesseract::TessBaseAPI::GetMutableIterator ( )

Get a mutable iterator to the results of LayoutAnalysis and/or Recognize. The returned iterator must be deleted after use. WARNING! This class points to data held within the TessBaseAPI class, and therefore can only be used while the TessBaseAPI class still exists and has not been subjected to a call of Init, SetImage, Recognize, Clear, End DetectOS, or anything else that changes the internal PAGE_RES.

Definition at line 1293 of file baseapi.cpp.

1293  {
1294  if (tesseract_ == NULL || page_res_ == NULL)
1295  return NULL;
1296  return new MutableIterator(page_res_, tesseract_,
1300 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
int GetScaledYResolution() const
Definition: thresholder.h:93
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
char * tesseract::TessBaseAPI::GetOsdText ( int  page_number)

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator. page_number is a 0-based page index that will appear in the osd file.

Definition at line 1770 of file baseapi.cpp.

1770  {
1771  OSResults osr;
1772 
1773  bool osd = DetectOS(&osr);
1774  if (!osd) {
1775  return NULL;
1776  }
1777 
1778  int orient_id = osr.best_result.orientation_id;
1779  int script_id = osr.get_best_script(orient_id);
1780  float orient_conf = osr.best_result.oconfidence;
1781  float script_conf = osr.best_result.sconfidence;
1782  const char* script_name =
1783  osr.unicharset->get_script_from_script_id(script_id);
1784 
1785  // clockwise orientation of the input image, in degrees
1786  int orient_deg = orient_id * 90;
1787 
1788  // clockwise rotation needed to make the page upright
1789  int rotate = OrientationIdToValue(orient_id);
1790 
1791  char* osd_buf = new char[255];
1792  snprintf(osd_buf, 255,
1793  "Page number: %d\n"
1794  "Orientation in degrees: %d\n"
1795  "Rotate: %d\n"
1796  "Orientation confidence: %.2f\n"
1797  "Script: %s\n"
1798  "Script confidence: %.2f\n",
1799  page_number,
1800  orient_deg, rotate, orient_conf,
1801  script_name, script_conf);
1802 
1803  return osd_buf;
1804 }
float sconfidence
Definition: osdetect.h:43
int OrientationIdToValue(const int &id)
Definition: osdetect.cpp:563
int orientation_id
Definition: osdetect.h:41
float oconfidence
Definition: osdetect.h:44
const char * get_script_from_script_id(int id) const
Definition: unicharset.h:802
bool DetectOS(OSResults *)
Definition: baseapi.cpp:2253
UNICHARSET * unicharset
Definition: osdetect.h:78
TESS_API int get_best_script(int orientation_id) const
Definition: osdetect.cpp:117
OSBestResult best_result
Definition: osdetect.h:79
Boxa * tesseract::TessBaseAPI::GetRegions ( Pixa **  pixa)

Get the result of page layout analysis as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 585 of file baseapi.cpp.

585  {
586  return GetComponentImages(RIL_BLOCK, false, pixa, NULL);
587 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
Boxa * tesseract::TessBaseAPI::GetStrips ( Pixa **  pixa,
int **  blockids 
)

Get textlines and strips of image regions as a leptonica-style Boxa, Pixa pair, in reading order. Enables downstream handling of non-rectangular regions. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use.

Definition at line 611 of file baseapi.cpp.

611  {
612  return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids);
613 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
bool tesseract::TessBaseAPI::GetTextDirection ( int *  out_offset,
float *  out_slope 
)

Definition at line 2008 of file baseapi.cpp.

2008  {
2009  PageIterator* it = AnalyseLayout();
2010  if (it == NULL) {
2011  return false;
2012  }
2013  int x1, x2, y1, y2;
2014  it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2);
2015  // Calculate offset and slope (NOTE: Kind of ugly)
2016  if (x2 <= x1) x2 = x1 + 1;
2017  // Convert the point pair to slope/offset of the baseline (in image coords.)
2018  *out_slope = static_cast<float>(y2 - y1) / (x2 - x1);
2019  *out_offset = static_cast<int>(y1 - *out_slope * x1);
2020  // Get the y-coord of the baseline at the left and right edges of the
2021  // textline's bounding box.
2022  int left, top, right, bottom;
2023  if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) {
2024  delete it;
2025  return false;
2026  }
2027  int left_y = IntCastRounded(*out_slope * left + *out_offset);
2028  int right_y = IntCastRounded(*out_slope * right + *out_offset);
2029  // Shift the baseline down so it passes through the nearest bottom-corner
2030  // of the textline's bounding box. This is the difference between the y
2031  // at the lowest (max) edge of the box and the actual box bottom.
2032  *out_offset += bottom - MAX(left_y, right_y);
2033  // Switch back to bottom-up tesseract coordinates. Requires negation of
2034  // the slope and height - offset for the offset.
2035  *out_slope = -*out_slope;
2036  *out_offset = rect_height_ - *out_offset;
2037  delete it;
2038 
2039  return true;
2040 }
int IntCastRounded(double x)
Definition: helpers.h:172
PageIterator * AnalyseLayout()
Definition: baseapi.h:500
#define MAX(x, y)
Definition: ndminx.h:24
Boxa * tesseract::TessBaseAPI::GetTextlines ( const bool  raw_image,
const int  raw_padding,
Pixa **  pixa,
int **  blockids,
int **  paraids 
)

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If raw_image is true, then extract from the original image instead of the thresholded image and pad by raw_padding pixels. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Get the textlines as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize. If blockids is not NULL, the block-id of each line is also returned as an array of one element per line. delete [] after use. If paraids is not NULL, the paragraph-id of each line within its block is also returned as an array of one element per line. delete [] after use.

Definition at line 597 of file baseapi.cpp.

598  {
599  return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding,
600  pixa, blockids, paraids);
601 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
Boxa* tesseract::TessBaseAPI::GetTextlines ( Pixa **  pixa,
int **  blockids 
)
inline

Definition at line 415 of file baseapi.h.

415  {
416  return GetTextlines(false, 0, pixa, blockids, NULL);
417  }
Boxa * GetTextlines(const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:597
Pix * tesseract::TessBaseAPI::GetThresholdedImage ( )

Get a copy of the internal thresholded image from Tesseract. Caller takes ownership of the Pix and must pixDestroy it. May be called any time after SetImage, or after TesseractRect.

ONLY available after SetImage if you have Leptonica installed. Get a copy of the internal thresholded image from Tesseract.

Definition at line 572 of file baseapi.cpp.

572  {
573  if (tesseract_ == NULL || thresholder_ == NULL)
574  return NULL;
575  if (tesseract_->pix_binary() == NULL)
577  return pixClone(tesseract_->pix_binary());
578 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
virtual TESS_LOCAL void Threshold(Pix **pix)
Definition: baseapi.cpp:2091
Pix * pix_binary() const
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
int tesseract::TessBaseAPI::GetThresholdedImageScaleFactor ( ) const

Returns the scale factor of the thresholded image that would be returned by GetThresholdedImage() and the various GetX() methods that call GetComponentImages(). Returns 0 if no thresholder has been set.

Definition at line 724 of file baseapi.cpp.

724  {
725  if (thresholder_ == NULL) {
726  return 0;
727  }
728  return thresholder_->GetScaleFactor();
729 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
const char * tesseract::TessBaseAPI::GetUnichar ( int  unichar_id)

This method returns the string form of the specified unichar.

Definition at line 2690 of file baseapi.cpp.

2690  {
2691  return tesseract_->unicharset.id_to_unichar(unichar_id);
2692 }
UNICHARSET unicharset
Definition: ccutil.h:72
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
char * tesseract::TessBaseAPI::GetUNLVText ( )

The recognized text is returned as a char* which is coded as UNLV format Latin-1 with specific reject and suspect codes and must be freed with the delete [] operator.

Definition at line 1664 of file baseapi.cpp.

1664  {
1665  if (tesseract_ == NULL ||
1666  (!recognition_done_ && Recognize(NULL) < 0))
1667  return NULL;
1668  bool tilde_crunch_written = false;
1669  bool last_char_was_newline = true;
1670  bool last_char_was_tilde = false;
1671 
1672  int total_length = TextLength(NULL);
1673  PAGE_RES_IT page_res_it(page_res_);
1674  char* result = new char[total_length];
1675  char* ptr = result;
1676  for (page_res_it.restart_page(); page_res_it.word () != NULL;
1677  page_res_it.forward()) {
1678  WERD_RES *word = page_res_it.word();
1679  // Process the current word.
1680  if (word->unlv_crunch_mode != CR_NONE) {
1681  if (word->unlv_crunch_mode != CR_DELETE &&
1682  (!tilde_crunch_written ||
1683  (word->unlv_crunch_mode == CR_KEEP_SPACE &&
1684  word->word->space() > 0 &&
1685  !word->word->flag(W_FUZZY_NON) &&
1686  !word->word->flag(W_FUZZY_SP)))) {
1687  if (!word->word->flag(W_BOL) &&
1688  word->word->space() > 0 &&
1689  !word->word->flag(W_FUZZY_NON) &&
1690  !word->word->flag(W_FUZZY_SP)) {
1691  /* Write a space to separate from preceding good text */
1692  *ptr++ = ' ';
1693  last_char_was_tilde = false;
1694  }
1695  if (!last_char_was_tilde) {
1696  // Write a reject char.
1697  last_char_was_tilde = true;
1698  *ptr++ = kUNLVReject;
1699  tilde_crunch_written = true;
1700  last_char_was_newline = false;
1701  }
1702  }
1703  } else {
1704  // NORMAL PROCESSING of non tilde crunched words.
1705  tilde_crunch_written = false;
1707  const char* wordstr = word->best_choice->unichar_string().string();
1708  const STRING& lengths = word->best_choice->unichar_lengths();
1709  int length = lengths.length();
1710  int i = 0;
1711  int offset = 0;
1712 
1713  if (last_char_was_tilde &&
1714  word->word->space() == 0 && wordstr[offset] == ' ') {
1715  // Prevent adjacent tilde across words - we know that adjacent tildes
1716  // within words have been removed.
1717  // Skip the first character.
1718  offset = lengths[i++];
1719  }
1720  if (i < length && wordstr[offset] != 0) {
1721  if (!last_char_was_newline)
1722  *ptr++ = ' ';
1723  else
1724  last_char_was_newline = false;
1725  for (; i < length; offset += lengths[i++]) {
1726  if (wordstr[offset] == ' ' ||
1727  wordstr[offset] == kTesseractReject) {
1728  *ptr++ = kUNLVReject;
1729  last_char_was_tilde = true;
1730  } else {
1731  if (word->reject_map[i].rejected())
1732  *ptr++ = kUNLVSuspect;
1733  UNICHAR ch(wordstr + offset, lengths[i]);
1734  int uni_ch = ch.first_uni();
1735  for (int j = 0; kUniChs[j] != 0; ++j) {
1736  if (kUniChs[j] == uni_ch) {
1737  uni_ch = kLatinChs[j];
1738  break;
1739  }
1740  }
1741  if (uni_ch <= 0xff) {
1742  *ptr++ = static_cast<char>(uni_ch);
1743  last_char_was_tilde = false;
1744  } else {
1745  *ptr++ = kUNLVReject;
1746  last_char_was_tilde = true;
1747  }
1748  }
1749  }
1750  }
1751  }
1752  if (word->word->flag(W_EOL) && !last_char_was_newline) {
1753  /* Add a new line output */
1754  *ptr++ = '\n';
1755  tilde_crunch_written = false;
1756  last_char_was_newline = true;
1757  last_char_was_tilde = false;
1758  }
1759  }
1760  *ptr++ = '\n';
1761  *ptr = '\0';
1762  return result;
1763 }
Definition: werd.h:35
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: werd.h:36
inT32 length() const
Definition: strngs.cpp:188
TESS_LOCAL int TextLength(int *blob_count)
Definition: baseapi.cpp:2223
const int kUniChs[]
Definition: baseapi.cpp:1651
const char kUNLVSuspect
Definition: baseapi.cpp:92
const char kUNLVReject
Definition: baseapi.cpp:90
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
const STRING & unichar_string() const
Definition: ratngs.h:524
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
void set_unlv_suspects(WERD_RES *word)
Definition: output.cpp:307
WERD * word
Definition: pageres.h:175
uinT8 space()
Definition: werd.h:104
const char kTesseractReject
Definition: baseapi.cpp:88
const STRING & unichar_lengths() const
Definition: ratngs.h:531
Definition: strngs.h:44
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
const char * string() const
Definition: strngs.cpp:193
WERD_CHOICE * best_choice
Definition: pageres.h:219
const int kLatinChs[]
Definition: baseapi.cpp:1655
REJMAP reject_map
Definition: pageres.h:271
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
char * tesseract::TessBaseAPI::GetUTF8Text ( )

The recognized text is returned as a char* which is coded as UTF8 and must be freed with the delete [] operator.

Make a text string from the internal data structures.

Definition at line 1303 of file baseapi.cpp.

1303  {
1304  if (tesseract_ == NULL ||
1305  (!recognition_done_ && Recognize(NULL) < 0))
1306  return NULL;
1307  STRING text("");
1308  ResultIterator *it = GetIterator();
1309  do {
1310  if (it->Empty(RIL_PARA)) continue;
1311  char *para_text = it->GetUTF8Text(RIL_PARA);
1312  text += para_text;
1313  delete []para_text;
1314  } while (it->Next(RIL_PARA));
1315  char* result = new char[text.length() + 1];
1316  strncpy(result, text.string(), text.length() + 1);
1317  delete it;
1318  return result;
1319 }
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
ResultIterator * GetIterator()
Definition: baseapi.cpp:1276
Definition: strngs.h:44
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
Boxa * tesseract::TessBaseAPI::GetWords ( Pixa **  pixa)

Get the words as a leptonica-style Boxa, Pixa pair, in reading order. Can be called before or after Recognize.

Definition at line 620 of file baseapi.cpp.

620  {
621  return GetComponentImages(RIL_WORD, true, pixa, NULL);
622 }
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, const bool raw_image, const int raw_padding, Pixa **pixa, int **blockids, int **paraids)
Definition: baseapi.cpp:642
void tesseract::TessBaseAPI::InitTruthCallback ( TruthCallback cb)
inline

Definition at line 747 of file baseapi.h.

747 { truth_cb_ = cb; }
TruthCallback * truth_cb_
Definition: baseapi.h:865
bool tesseract::TessBaseAPI::IsValidCharacter ( const char *  utf8_character)

Definition at line 2001 of file baseapi.cpp.

2001  {
2002  return tesseract_->unicharset.contains_unichar(utf8_character);
2003 }
UNICHARSET unicharset
Definition: ccutil.h:72
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
int tesseract::TessBaseAPI::IsValidWord ( const char *  word)

Check whether a word is valid according to Tesseract's language model

Returns
0 if the word is invalid, non-zero if valid.
Warning
temporary! This function will be removed from here and placed in a separate API at some future time.

Check whether a word is valid according to Tesseract's language model returns 0 if the word is invalid, non-zero if valid

Definition at line 1997 of file baseapi.cpp.

1997  {
1998  return tesseract_->getDict().valid_word(word);
1999 }
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:705
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
TBLOB * tesseract::TessBaseAPI::MakeTBLOB ( Pix *  pix)
static

Returns a TBLOB corresponding to the entire input image.

Creates a TBLOB* from the whole pix.

Definition at line 2364 of file baseapi.cpp.

2364  {
2365  int width = pixGetWidth(pix);
2366  int height = pixGetHeight(pix);
2367  BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height);
2368 
2369  // Create C_BLOBs from the page
2370  extract_edges(pix, &block);
2371 
2372  // Merge all C_BLOBs
2373  C_BLOB_LIST *list = block.blob_list();
2374  C_BLOB_IT c_blob_it(list);
2375  if (c_blob_it.empty())
2376  return NULL;
2377  // Move all the outlines to the first blob.
2378  C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2379  for (c_blob_it.forward();
2380  !c_blob_it.at_first();
2381  c_blob_it.forward()) {
2382  C_BLOB *c_blob = c_blob_it.data();
2383  ol_it.add_list_after(c_blob->out_list());
2384  }
2385  // Convert the first blob to the output TBLOB.
2386  return TBLOB::PolygonalCopy(false, c_blob_it.data());
2387 }
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
Definition: blobs.cpp:344
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
Definition: ocrblock.h:30
#define TRUE
Definition: capi.h:28
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:334
ROW * tesseract::TessBaseAPI::MakeTessOCRRow ( float  baseline,
float  xheight,
float  descender,
float  ascender 
)
static

Returns a ROW object created from the input row specification.

Definition at line 2347 of file baseapi.cpp.

2350  {
2351  inT32 xstarts[] = {-32000};
2352  double quad_coeffs[] = {0, 0, baseline};
2353  return new ROW(1,
2354  xstarts,
2355  quad_coeffs,
2356  xheight,
2357  ascender - (baseline + xheight),
2358  descender - baseline,
2359  0,
2360  0);
2361 }
int inT32
Definition: host.h:102
Definition: ocrrow.h:32
int tesseract::TessBaseAPI::MeanTextConf ( )

Returns the (average) confidence value between 0 and 100.

Returns the average word confidence for Tesseract page result.

Definition at line 1807 of file baseapi.cpp.

1807  {
1808  int* conf = AllWordConfidences();
1809  if (!conf) return 0;
1810  int sum = 0;
1811  int *pt = conf;
1812  while (*pt >= 0) sum += *pt++;
1813  if (pt != conf) sum /= pt - conf;
1814  delete [] conf;
1815  return sum;
1816 }
void tesseract::TessBaseAPI::NormalizeTBLOB ( TBLOB tblob,
ROW row,
bool  numeric_mode 
)
static

This method baseline normalizes a TBLOB in-place. The input row is used for normalization. The denorm is an optional parameter in which the normalization-antidote is returned.

Definition at line 2394 of file baseapi.cpp.

2394  {
2395  TBOX box = tblob->bounding_box();
2396  float x_center = (box.left() + box.right()) / 2.0f;
2397  float baseline = row->base_line(x_center);
2398  float scale = kBlnXHeight / row->x_height();
2399  tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale,
2400  0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL);
2401 }
const int kBlnBaselineOffset
Definition: normalis.h:29
const int kBlnXHeight
Definition: normalis.h:28
float x_height() const
Definition: ocrrow.h:61
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
Definition: blobs.cpp:413
inT16 right() const
Definition: rect.h:75
float base_line(float xpos) const
Definition: ocrrow.h:56
inT16 left() const
Definition: rect.h:68
Definition: rect.h:30
TBOX bounding_box() const
Definition: blobs.cpp:482
int tesseract::TessBaseAPI::NumDawgs ( ) const

Return the number of dawgs loaded into tesseract_ object.

Definition at line 2701 of file baseapi.cpp.

2701  {
2702  return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs();
2703 }
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
Definition: dict.h:404
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
OcrEngineMode tesseract::TessBaseAPI::oem ( ) const
inline

Definition at line 743 of file baseapi.h.

743  {
744  return last_oem_requested_;
745  }
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:863
bool tesseract::TessBaseAPI::ProcessPage ( Pix *  pix,
int  page_index,
const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turn a single image into symbolic text.

The pix is the image processed. filename and page_index are metadata used by side-effect processes, such as reading a box file or formatting as hOCR.

See ProcessPages for desciptions of other parameters.

Definition at line 1194 of file baseapi.cpp.

1196  {
1197  PERF_COUNT_START("ProcessPage")
1199  SetImage(pix);
1200  bool failed = false;
1201 
1202  if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
1203  // Disabled character recognition
1204  PageIterator* it = AnalyseLayout();
1205 
1206  if (it == NULL) {
1207  failed = true;
1208  } else {
1209  delete it;
1210  }
1212  failed = FindLines() != 0;
1213  } else if (timeout_millisec > 0) {
1214  // Running with a timeout.
1215  ETEXT_DESC monitor;
1216  monitor.cancel = NULL;
1217  monitor.cancel_this = NULL;
1218  monitor.set_deadline_msecs(timeout_millisec);
1219 
1220  // Now run the main recognition.
1221  failed = Recognize(&monitor) < 0;
1222  } else {
1223  // Normal layout and character recognition with no timeout.
1224  failed = Recognize(NULL) < 0;
1225  }
1226 
1228 #ifndef ANDROID_BUILD
1229  Pix* page_pix = GetThresholdedImage();
1230  pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
1231 #endif // ANDROID_BUILD
1232  }
1233 
1234  if (failed && retry_config != NULL && retry_config[0] != '\0') {
1235  // Save current config variables before switching modes.
1236  FILE* fp = fopen(kOldVarsFile, "wb");
1237  PrintVariables(fp);
1238  fclose(fp);
1239  // Switch to alternate mode for retry.
1240  ReadConfigFile(retry_config);
1241  SetImage(pix);
1242  Recognize(NULL);
1243  // Restore saved config variables.
1245  }
1246 
1247  if (renderer && !failed) {
1248  failed = !renderer->AddImage(this);
1249  }
1250 
1252  return !failed;
1253 }
void set_deadline_msecs(inT32 deadline_msecs)
Definition: ocrclass.h:132
void * cancel_this
Definition: ocrclass.h:120
PageIterator * AnalyseLayout()
Definition: baseapi.h:500
void ReadConfigFile(const char *filename)
Definition: baseapi.cpp:446
CANCEL_FUNC cancel
Definition: ocrclass.h:119
Pix * GetThresholdedImage()
Definition: baseapi.cpp:572
const char * kOldVarsFile
Definition: baseapi.cpp:101
void SetInputName(const char *name)
Definition: baseapi.cpp:201
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
Orientation and script detection only.
Definition: publictypes.h:152
#define PERF_COUNT_END
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: baseapi.cpp:525
#define PERF_COUNT_START(FUNCT_NAME)
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void PrintVariables(FILE *fp) const
Definition: baseapi.cpp:264
bool tesseract::TessBaseAPI::ProcessPages ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Turns images into symbolic text.

filename can point to a single image, a multi-page TIFF, or a plain text list of image filenames.

retry_config is useful for debugging. If not NULL, you can fall back to an alternate configuration if a page fails for some reason.

timeout_millisec terminates processing if any single page takes too long. Set to 0 for unlimited time.

renderer is responible for creating the output. For example, use the TessTextRenderer if you want plaintext output, or the TessPDFRender to produce searchable PDF.

If tessedit_page_number is non-negative, will only process that single page. Works for multi-page tiff file, or filelist.

Returns true if successful, false on error.

Definition at line 1070 of file baseapi.cpp.

1072  {
1073  bool result =
1074  ProcessPagesInternal(filename, retry_config, timeout_millisec, renderer);
1075  if (result) {
1078  tprintf("Write of TR file failed: %s\n", output_file_->string());
1079  return false;
1080  }
1081  }
1082  return result;
1083 }
#define tprintf(...)
Definition: tprintf.h:31
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1097
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:860
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:97
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
const char * string() const
Definition: strngs.cpp:193
bool tesseract::TessBaseAPI::ProcessPagesInternal ( const char *  filename,
const char *  retry_config,
int  timeout_millisec,
TessResultRenderer renderer 
)

Definition at line 1097 of file baseapi.cpp.

1100  {
1101 #ifndef ANDROID_BUILD
1102  PERF_COUNT_START("ProcessPages")
1103  bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-");
1104  if (stdInput) {
1105 #ifdef WIN32
1106  if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1107  tprintf("ERROR: cin to binary: %s", strerror(errno));
1108 #endif // WIN32
1109  }
1110 
1111  if (stream_filelist) {
1112  return ProcessPagesFileList(stdin, NULL, retry_config,
1113  timeout_millisec, renderer,
1115  }
1116 
1117  // At this point we are officially in autodection territory.
1118  // That means we are going to buffer stdin so that it is
1119  // seekable. To keep code simple we will also buffer data
1120  // coming from a file.
1121  std::string buf;
1122  if (stdInput) {
1123  buf.assign((std::istreambuf_iterator<char>(std::cin)),
1124  (std::istreambuf_iterator<char>()));
1125  } else {
1126  std::ifstream ifs(filename, std::ios::binary);
1127  if (ifs) {
1128  buf.assign((std::istreambuf_iterator<char>(ifs)),
1129  (std::istreambuf_iterator<char>()));
1130  } else {
1131  tprintf("ERROR: Can not open input file %s\n", filename);
1132  return false;
1133  }
1134  }
1135 
1136  // Here is our autodetection
1137  int format;
1138  const l_uint8 * data = reinterpret_cast<const l_uint8 *>(buf.c_str());
1139  findFileFormatBuffer(data, &format);
1140 
1141  // Maybe we have a filelist
1142  if (format == IFF_UNKNOWN) {
1143  STRING s(buf.c_str());
1144  return ProcessPagesFileList(NULL, &s, retry_config,
1145  timeout_millisec, renderer,
1147  }
1148 
1149  // Maybe we have a TIFF which is potentially multipage
1150  bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1151  format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1152  format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1153  format == IFF_TIFF_ZIP);
1154 
1155  // Fail early if we can, before producing any output
1156  Pix *pix = NULL;
1157  if (!tiff) {
1158  pix = pixReadMem(data, buf.size());
1159  if (pix == NULL) {
1160  return false;
1161  }
1162  }
1163 
1164  // Begin the output
1165  const char* kUnknownTitle = "";
1166  if (renderer && !renderer->BeginDocument(kUnknownTitle)) {
1167  pixDestroy(&pix);
1168  return false;
1169  }
1170 
1171  // Produce output
1172  bool r = false;
1173  if (tiff) {
1174  r = ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1175  timeout_millisec, renderer,
1177  } else {
1178  r = ProcessPage(pix, 0, filename, retry_config,
1179  timeout_millisec, renderer);
1180  pixDestroy(&pix);
1181  }
1182 
1183  // End the output
1184  if (!r || (renderer && !renderer->EndDocument())) {
1185  return false;
1186  }
1188  return true;
1189 #else
1190  return false;
1191 #endif
1192 }
#define tprintf(...)
Definition: tprintf.h:31
bool stream_filelist
Definition: baseapi.cpp:81
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
Definition: baseapi.cpp:1194
#define PERF_COUNT_END
#define PERF_COUNT_START(FUNCT_NAME)
Definition: strngs.h:44
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
int tesseract::TessBaseAPI::Recognize ( ETEXT_DESC monitor)

Recognize the image from SetAndThresholdImage, generating Tesseract internal structures. Returns 0 on success. Optional. The Get*Text functions below will call Recognize if needed. After Recognize, the output is kept internally until the next SetImage.

Recognize the tesseract global image and return the result as Tesseract internal structures.

Definition at line 830 of file baseapi.cpp.

830  {
831  if (tesseract_ == NULL)
832  return -1;
833  if (FindLines() != 0)
834  return -1;
835  if (page_res_ != NULL)
836  delete page_res_;
837  if (block_list_->empty()) {
838  page_res_ = new PAGE_RES(false, block_list_,
840  return 0; // Empty page.
841  }
842 
844  recognition_done_ = true;
849  } else {
850  // TODO(rays) LSTM here.
851  page_res_ = new PAGE_RES(false,
853  }
856  return 0;
857  }
858 
859  if (truth_cb_ != NULL) {
860  tesseract_->wordrec_run_blamer.set_value(true);
861  PageIterator *page_it = new PageIterator(
866  image_height_, page_it, this->tesseract()->pix_grey());
867  delete page_it;
868  }
869 
870  int result = 0;
872  #ifndef GRAPHICS_DISABLED
874  #endif // GRAPHICS_DISABLED
875  // The page_res is invalid after an interactive session, so cleanup
876  // in a way that lets us continue to the next page without crashing.
877  delete page_res_;
878  page_res_ = NULL;
879  return -1;
881  STRING fontname;
882  ExtractFontName(*output_file_, &fontname);
884  } else if (tesseract_->tessedit_ambigs_training) {
885  FILE *training_output_file = tesseract_->init_recog_training(*input_file_);
886  // OCR the page segmented into words by tesseract.
888  *input_file_, page_res_, monitor, training_output_file);
889  fclose(training_output_file);
890  } else {
891  // Now run the main recognition.
892  bool wait_for_text = true;
893  GetBoolVariable("paragraph_text_based", &wait_for_text);
894  if (!wait_for_text) DetectParagraphs(false);
895  if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) {
896  if (wait_for_text) DetectParagraphs(true);
897  } else {
898  result = -1;
899  }
900  }
901  return result;
902 }
void CorrectClassifyWords(PAGE_RES *page_res)
Definition: applybox.cpp:772
bool wordrec_run_blamer
Definition: wordrec.h:168
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
TruthCallback * truth_cb_
Definition: baseapi.h:865
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
FILE * init_recog_training(const STRING &fname)
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
Definition: applybox.cpp:117
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:287
Tesseract * tesseract() const
Definition: baseapi.h:739
int GetScaledYResolution() const
Definition: thresholder.h:93
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:860
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:236
bool tessedit_resegment_from_line_boxes
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
void pgeditor_main(int width, int height, PAGE_RES *page_res)
Definition: pgedit.cpp:337
Pix * pix_grey() const
Dict & getDict()
Definition: classify.h:65
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
virtual void Run(A1, A2, A3, A4)=0
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
Definition: applybox.cpp:796
void ExtractFontName(const STRING &filename, STRING *fontname)
Definition: blobclass.cpp:46
const UNICHARSET & getUnicharset() const
Definition: dict.h:96
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
Definition: strngs.h:44
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
Definition: baseapi.cpp:2476
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
STRING * input_file_
Name used by training code.
Definition: baseapi.h:858
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
int tesseract::TessBaseAPI::RecognizeForChopTest ( ETEXT_DESC monitor)

Methods to retrieve information after SetAndThresholdImage(), Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)Variant on Recognize used for testing chopper.

Tests the chopper by exhaustively running chop_one_blob.

Definition at line 905 of file baseapi.cpp.

905  {
906  if (tesseract_ == NULL)
907  return -1;
908  if (thresholder_ == NULL || thresholder_->IsEmpty()) {
909  tprintf("Please call SetImage before attempting recognition.");
910  return -1;
911  }
912  if (page_res_ != NULL)
913  ClearResults();
914  if (FindLines() != 0)
915  return -1;
916  // Additional conditions under which chopper test cannot be run
917  if (tesseract_->interactive_display_mode) return -1;
918 
919  recognition_done_ = true;
920 
921  page_res_ = new PAGE_RES(false, block_list_,
923 
924  PAGE_RES_IT page_res_it(page_res_);
925 
926  while (page_res_it.word() != NULL) {
927  WERD_RES *word_res = page_res_it.word();
928  GenericVector<TBOX> boxes;
929  tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block,
930  page_res_it.row()->row, word_res);
931  page_res_it.forward();
932  }
933  return 0;
934 }
#define tprintf(...)
Definition: tprintf.h:31
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:857
TESS_LOCAL int FindLines()
Definition: baseapi.cpp:2132
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:416
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:50
WERD * word
Definition: pageres.h:175
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:856
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
Definition: applybox.cpp:253
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:864
void tesseract::TessBaseAPI::RunAdaptiveClassifier ( TBLOB blob,
int  num_max_matches,
int *  unichar_ids,
float *  ratings,
int *  num_matches_returned 
)

Method to run adaptive classifier on a blob. It returns at max num_max_matches results.

Method to run adaptive classifier on a blob.

Definition at line 2667 of file baseapi.cpp.

2671  {
2672  BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
2673  tesseract_->AdaptiveClassifier(blob, choices);
2674  BLOB_CHOICE_IT choices_it(choices);
2675  int& index = *num_matches_returned;
2676  index = 0;
2677  for (choices_it.mark_cycle_pt();
2678  !choices_it.cycled_list() && index < num_max_matches;
2679  choices_it.forward()) {
2680  BLOB_CHOICE* choice = choices_it.data();
2681  unichar_ids[index] = choice->unichar_id();
2682  ratings[index] = choice->rating();
2683  ++index;
2684  }
2685  *num_matches_returned = index;
2686  delete choices;
2687 }
float rating() const
Definition: ratngs.h:79
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
void tesseract::TessBaseAPI::set_min_orientation_margin ( double  margin)

Definition at line 2264 of file baseapi.cpp.

2264  {
2265  tesseract_->min_orientation_margin.set_value(margin);
2266 }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::SetDictFunc ( DictFunc  f)

Sets Dict::letter_is_okay_ function to point to the given function.

Definition at line 2043 of file baseapi.cpp.

2043  {
2044  if (tesseract_ != NULL) {
2046  }
2047 }
Dict & getDict()
Definition: classify.h:65
int(Dict::* letter_is_okay_)(void *void_dawg_args, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.h:347
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::SetFillLatticeFunc ( FillLatticeFunc  f)

Sets Wordrec::fill_lattice_ function to point to the given function.

Definition at line 2069 of file baseapi.cpp.

2069  {
2070  if (tesseract_ != NULL) tesseract_->fill_lattice_ = f;
2071 }
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:420
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::SetImage ( const unsigned char *  imagedata,
int  width,
int  height,
int  bytes_per_pixel,
int  bytes_per_line 
)

Provide an image for Tesseract to recognize. Format is as TesseractRect above. Does not copy the image buffer, or take ownership. The source image may be destroyed after Recognize is called, either explicitly or implicitly via one of the Get*Text functions. SetImage clears all recognition results, and sets the rectangle to the full image, so it may be followed immediately by a GetUTF8Text, and it will automatically perform recognition.

Definition at line 525 of file baseapi.cpp.

527  {
528  if (InternalSetImage())
529  thresholder_->SetImage(imagedata, width, height,
530  bytes_per_pixel, bytes_per_line);
531 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2074
void tesseract::TessBaseAPI::SetImage ( Pix *  pix)

Provide an image for Tesseract to recognize. As with SetImage above, Tesseract doesn't take a copy or ownership or pixDestroy the image, so it must persist until after Recognize. Pix vs raw, which to use? Use Pix where possible. A future version of Tesseract may choose to use Pix as its internal representation and discard IMAGE altogether. Because of that, an implementation that sources and targets Pix may end up with less copies than an implementation that does not.

Definition at line 550 of file baseapi.cpp.

550  {
551  if (InternalSetImage())
552  thresholder_->SetImage(pix);
553  SetInputImage(pix);
554 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:62
TESS_LOCAL bool InternalSetImage()
Definition: baseapi.cpp:2074
void SetInputImage(Pix *pix)
Definition: baseapi.cpp:936
void tesseract::TessBaseAPI::SetProbabilityInContextFunc ( ProbabilityInContextFunc  f)

Sets Dict::probability_in_context_ function to point to the given function.

Sets Dict::probability_in_context_ function to point to the given function.

Parameters
fA single function that returns the probability of the current "character" (in general a utf-8 string), given the context of a previous utf-8 string.

Definition at line 2057 of file baseapi.cpp.

2057  {
2058  if (tesseract_ != NULL) {
2060  // Set it for the sublangs too.
2061  int num_subs = tesseract_->num_sub_langs();
2062  for (int i = 0; i < num_subs; ++i) {
2064  }
2065  }
2066 }
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
Definition: dict.h:357
Tesseract * get_sub_lang(int index) const
int num_sub_langs() const
Dict & getDict()
Definition: classify.h:65
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846
void tesseract::TessBaseAPI::SetRectangle ( int  left,
int  top,
int  width,
int  height 
)

Restrict recognition to a sub-rectangle of the image. Call after SetImage. Each SetRectangle clears the recogntion results so multiple rectangles can be recognized with the same image.

Definition at line 561 of file baseapi.cpp.

561  {
562  if (thresholder_ == NULL)
563  return;
564  thresholder_->SetRectangle(left, top, width, height);
565  ClearResults();
566 }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
void SetRectangle(int left, int top, int width, int height)
void tesseract::TessBaseAPI::SetSourceResolution ( int  ppi)

Set the resolution of the source image in pixels per inch so font size information can be calculated in results. Call this after SetImage().

Definition at line 533 of file baseapi.cpp.

533  {
534  if (thresholder_)
536  else
537  tprintf("Please call SetImage before SetSourceResolution.\n");
538 }
#define tprintf(...)
Definition: tprintf.h:31
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
void SetSourceYResolution(int ppi)
Definition: thresholder.h:86
void tesseract::TessBaseAPI::SetThresholder ( ImageThresholder thresholder)
inline

In extreme cases only, usually with a subclass of Thresholder, it is possible to provide a different Thresholder. The Thresholder may be preloaded with an image, settings etc, or they may be set after. Note that Tesseract takes ownership of the Thresholder and will delete it when it it is replaced or the API is destructed.

Definition at line 378 of file baseapi.h.

378  {
379  if (thresholder_ != NULL)
380  delete thresholder_;
381  thresholder_ = thresholder;
382  ClearResults();
383  }
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:854
Tesseract* tesseract::TessBaseAPI::tesseract ( ) const
inline

Definition at line 739 of file baseapi.h.

739  {
740  return tesseract_;
741  }
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:846