tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
text2image.cpp File Reference
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "allheaders.h"
#include "boxchar.h"
#include "commandlineflags.h"
#include "degradeimage.h"
#include "errcode.h"
#include "fileio.h"
#include "helpers.h"
#include "normstrngs.h"
#include "stringrenderer.h"
#include "tlog.h"
#include "unicharset.h"
#include "util.h"

Go to the source code of this file.

Classes

struct  tesseract::SpacingProperties
 

Namespaces

 tesseract
 

Functions

 STRING_PARAM_FLAG (text,"","File name of text input to process")
 
 STRING_PARAM_FLAG (outputbase,"","Basename for output image/box file")
 
 BOOL_PARAM_FLAG (degrade_image, true,"Degrade rendered image with speckle noise, dilation/erosion ""and rotation")
 
 BOOL_PARAM_FLAG (rotate_image, true,"Rotate the image in a random way.")
 
 INT_PARAM_FLAG (exposure, 0,"Exposure level in photocopier")
 
 INT_PARAM_FLAG (resolution, 300,"Pixels per inch")
 
 INT_PARAM_FLAG (xsize, 3600,"Width of output image")
 
 INT_PARAM_FLAG (ysize, 4800,"Height of output image")
 
 INT_PARAM_FLAG (margin, 100,"Margin round edges of image")
 
 INT_PARAM_FLAG (ptsize, 12,"Size of printed text")
 
 DOUBLE_PARAM_FLAG (char_spacing, 0,"Inter-character space in ems")
 
 DOUBLE_PARAM_FLAG (underline_start_prob, 0,"Fraction of words to underline (value in [0,1])")
 
 DOUBLE_PARAM_FLAG (underline_continuation_prob, 0,"Fraction of words to underline (value in [0,1])")
 
 INT_PARAM_FLAG (leading, 12,"Inter-line space (in pixels)")
 
 STRING_PARAM_FLAG (writing_mode,"horizontal","Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright.")
 
 INT_PARAM_FLAG (box_padding, 0,"Padding around produced bounding boxes")
 
 BOOL_PARAM_FLAG (strip_unrenderable_words, true,"Remove unrenderable words from source text")
 
 STRING_PARAM_FLAG (font,"Arial","Font description name to use")
 
 BOOL_PARAM_FLAG (ligatures, false,"Rebuild and render ligatures")
 
 BOOL_PARAM_FLAG (find_fonts, false,"Search for all fonts that can render the text")
 
 BOOL_PARAM_FLAG (render_per_font, true,"If find_fonts==true, render each font to its own image. ""Image filenames are of the form output_name.font_name.tif")
 
 DOUBLE_PARAM_FLAG (min_coverage, 1.0,"If find_fonts==true, the minimum coverage the font has of ""the characters in the text file to include it, between ""0 and 1.")
 
 BOOL_PARAM_FLAG (list_available_fonts, false,"List available fonts and quit.")
 
 BOOL_PARAM_FLAG (render_ngrams, false,"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line).")
 
 BOOL_PARAM_FLAG (output_word_boxes, false,"Output word bounding boxes instead of character boxes. ""This is used for Cube training, and implied by ""--render_ngrams.")
 
 STRING_PARAM_FLAG (unicharset_file,"","File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is specified, ngrams with"" characters that are not in unicharset will be omitted")
 
 BOOL_PARAM_FLAG (bidirectional_rotation, false,"Rotate the generated characters both ways.")
 
 BOOL_PARAM_FLAG (only_extract_font_properties, false,"Assumes that the input file contains a list of ngrams. Renders"" each ngram, extracts spacing properties and records them in"" output_base/[font_name].fontinfo file.")
 
 BOOL_PARAM_FLAG (output_individual_glyph_images, false,"If true also outputs individual character images")
 
 INT_PARAM_FLAG (glyph_resized_size, 0,"Each glyph is square with this side length in pixels")
 
 INT_PARAM_FLAG (glyph_num_border_pixels_to_pad, 0,"Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad")
 
void tesseract::ExtractFontProperties (const string &utf8_text, StringRenderer *render, const string &output_base)
 
bool tesseract::MakeIndividualGlyphs (Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page)
 
int main (int argc, char **argv)
 

Variables

const int kRandomSeed = 0x18273645
 

Function Documentation

BOOL_PARAM_FLAG ( degrade_image  ,
true  ,
"Degrade rendered image with speckle  noise,
dilation/erosion""and rotation"   
)
BOOL_PARAM_FLAG ( rotate_image  ,
true  ,
"Rotate the image in a random way."   
)
BOOL_PARAM_FLAG ( strip_unrenderable_words  ,
true  ,
"Remove unrenderable words from source text"   
)
BOOL_PARAM_FLAG ( ligatures  ,
false  ,
"Rebuild and render ligatures"   
)
BOOL_PARAM_FLAG ( find_fonts  ,
false  ,
"Search for all fonts that can render the text"   
)
BOOL_PARAM_FLAG ( render_per_font  ,
true  ,
"If  find_fonts = =true,
render each font to its own image.""Image filenames are of the form output_name.font_name.tif"   
)
BOOL_PARAM_FLAG ( list_available_fonts  ,
false  ,
"List available fonts and quit."   
)
BOOL_PARAM_FLAG ( render_ngrams  ,
false  ,
"Put each space-separated entity from the"" input file into one bounding box. The ngrams in the input"" file will be randomly permuted before rendering (so that"" there is sufficient variety of characters on each line)."   
)
BOOL_PARAM_FLAG ( output_word_boxes  ,
false  ,
"Output word bounding boxes instead of character boxes. ""This is used for Cube  training,
and implied by""--render_ngrams."   
)
BOOL_PARAM_FLAG ( bidirectional_rotation  ,
false  ,
"Rotate the generated characters both ways."   
)
BOOL_PARAM_FLAG ( only_extract_font_properties  ,
false  ,
"Assumes that the input file contains a list of ngrams. Renders"" each  ngram,
extracts spacing properties and records them in""output_base/.fontinfo file."  [font_name] 
)
BOOL_PARAM_FLAG ( output_individual_glyph_images  ,
false  ,
"If true also outputs individual character images"   
)
DOUBLE_PARAM_FLAG ( char_spacing  ,
,
"Inter-character space in ems"   
)
DOUBLE_PARAM_FLAG ( underline_start_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)
DOUBLE_PARAM_FLAG ( underline_continuation_prob  ,
,
"Fraction of words to underline (value in [0,1])"   
)
DOUBLE_PARAM_FLAG ( min_coverage  ,
1.  0,
"If  find_fonts = =true,
the minimum coverage the font has of""the characters in the text file to include  it,
between""0 and 1."   
)
INT_PARAM_FLAG ( exposure  ,
,
"Exposure level in photocopier"   
)
INT_PARAM_FLAG ( resolution  ,
300  ,
"Pixels per inch"   
)
INT_PARAM_FLAG ( xsize  ,
3600  ,
"Width of output image"   
)
INT_PARAM_FLAG ( ysize  ,
4800  ,
"Height of output image"   
)
INT_PARAM_FLAG ( margin  ,
100  ,
"Margin round edges of image"   
)
INT_PARAM_FLAG ( ptsize  ,
12  ,
"Size of printed text"   
)
INT_PARAM_FLAG ( leading  ,
12  ,
"Inter-line space (in pixels)"   
)
INT_PARAM_FLAG ( box_padding  ,
,
"Padding around produced bounding boxes"   
)
INT_PARAM_FLAG ( glyph_resized_size  ,
,
"Each glyph is square with this side length in pixels"   
)
INT_PARAM_FLAG ( glyph_num_border_pixels_to_pad  ,
 
)
int main ( int  argc,
char **  argv 
)

This program reads in a text file consisting of feature samples from a training page in the following format:

   FontName UTF8-char-str xmin ymin xmax ymax page-number
    NumberOfFeatureTypes(N)
      FeatureTypeName1 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      FeatureTypeName2 NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
      ...
      FeatureTypeNameN NumberOfFeatures(M)
         Feature1
         ...
         FeatureM
   FontName CharName ...

The result of this program is a binary inttemp file used by the OCR engine.

Parameters
argcnumber of command line arguments
argvarray of command line arguments
Returns
none
Note
Exceptions: none
History: Fri Aug 18 08:56:17 1989, DSJ, Created.
History: Mon May 18 1998, Christy Russson, Revistion started.

Definition at line 419 of file text2image.cpp.

419  {
420  tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
421 
422  if (FLAGS_list_available_fonts) {
423  const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
424  for (int i = 0; i < all_fonts.size(); ++i) {
425  printf("%3d: %s\n", i, all_fonts[i].c_str());
426  ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
427  "Font %s is unrecognized.\n", all_fonts[i].c_str());
428  }
429  return EXIT_SUCCESS;
430  }
431 
432  // Check validity of input flags.
433  if (FLAGS_text.empty()) {
434  tprintf("'--text' option is missing!\n");
435  exit(1);
436  }
437  if (FLAGS_outputbase.empty()) {
438  tprintf("'--outputbase' option is missing!\n");
439  exit(1);
440  }
441  if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
442  tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n");
443  exit(1);
444  }
445 
446  if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(FLAGS_font.c_str())) {
447  string pango_name;
448  if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) {
449  tprintf("Could not find font named %s.\n", FLAGS_font.c_str());
450  if (!pango_name.empty()) {
451  tprintf("Pango suggested font %s.\n", pango_name.c_str());
452  }
453  tprintf("Please correct --font arg.\n");
454  exit(1);
455  }
456  }
457 
458  if (FLAGS_render_ngrams)
459  FLAGS_output_word_boxes = true;
460 
461  char font_desc_name[1024];
462  snprintf(font_desc_name, 1024, "%s %d", FLAGS_font.c_str(),
463  static_cast<int>(FLAGS_ptsize));
464  StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
465  render.set_add_ligatures(FLAGS_ligatures);
466  render.set_leading(FLAGS_leading);
467  render.set_resolution(FLAGS_resolution);
468  render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
469  render.set_h_margin(FLAGS_margin);
470  render.set_v_margin(FLAGS_margin);
471  render.set_output_word_boxes(FLAGS_output_word_boxes);
472  render.set_box_padding(FLAGS_box_padding);
473  render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
474  render.set_underline_start_prob(FLAGS_underline_start_prob);
475  render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
476 
477  // Set text rendering orientation and their forms.
478  if (FLAGS_writing_mode == "horizontal") {
479  // Render regular horizontal text (default).
480  render.set_vertical_text(false);
481  render.set_gravity_hint_strong(false);
482  render.set_render_fullwidth_latin(false);
483  } else if (FLAGS_writing_mode == "vertical") {
484  // Render vertical text. Glyph orientation is selected by Pango.
485  render.set_vertical_text(true);
486  render.set_gravity_hint_strong(false);
487  render.set_render_fullwidth_latin(false);
488  } else if (FLAGS_writing_mode == "vertical-upright") {
489  // Render vertical text. Glyph orientation is set to be upright.
490  // Also Basic Latin characters are converted to their fullwidth forms
491  // on rendering, since fullwidth Latin characters are well designed to fit
492  // vertical text lines, while .box files store halfwidth Basic Latin
493  // unichars.
494  render.set_vertical_text(true);
495  render.set_gravity_hint_strong(true);
496  render.set_render_fullwidth_latin(true);
497  } else {
498  tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
499  exit(1);
500  }
501 
502  string src_utf8;
503  // This c_str is NOT redundant!
504  if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
505  tprintf("Failed to read file: %s\n", FLAGS_text.c_str());
506  exit(1);
507  }
508 
509  // Remove the unicode mark if present.
510  if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
511  src_utf8.erase(0, 3);
512  }
513  tlog(1, "Render string of size %d\n", src_utf8.length());
514 
515  if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
516  // Try to preserve behavior of old text2image by expanding inter-word
517  // spaces by a factor of 4.
518  const string kSeparator = FLAGS_render_ngrams ? " " : " ";
519  // Also restrict the number of charactes per line to try and avoid
520  // line-breaking in the middle of words like "-A", "R$" etc. which are
521  // otherwise allowed by the standard unicode line-breaking rules.
522  const int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
523  string rand_utf8;
524  UNICHARSET unicharset;
525  if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
526  !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
527  tprintf("Failed to load unicharset from file %s\n",
528  FLAGS_unicharset_file.c_str());
529  exit(1);
530  }
531 
532  // If we are rendering ngrams that will be OCRed later, shuffle them so that
533  // tesseract does not have difficulties finding correct baseline, word
534  // spaces, etc.
535  const char *str8 = src_utf8.c_str();
536  int len = src_utf8.length();
537  int step;
538  vector<pair<int, int> > offsets;
539  int offset = SpanUTF8Whitespace(str8);
540  while (offset < len) {
541  step = SpanUTF8NotWhitespace(str8 + offset);
542  offsets.push_back(make_pair(offset, step));
543  offset += step;
544  offset += SpanUTF8Whitespace(str8 + offset);
545  }
546  if (FLAGS_render_ngrams)
547  std::random_shuffle(offsets.begin(), offsets.end());
548 
549  for (int i = 0, line = 1; i < offsets.size(); ++i) {
550  const char *curr_pos = str8 + offsets[i].first;
551  int ngram_len = offsets[i].second;
552  // Skip words that contain characters not in found in unicharset.
553  if (!FLAGS_unicharset_file.empty() &&
554  !unicharset.encodable_string(curr_pos, NULL)) {
555  continue;
556  }
557  rand_utf8.append(curr_pos, ngram_len);
558  if (rand_utf8.length() > line * kCharsPerLine) {
559  rand_utf8.append(" \n");
560  ++line;
561  if (line & 0x1) rand_utf8.append(kSeparator);
562  } else {
563  rand_utf8.append(kSeparator);
564  }
565  }
566  tlog(1, "Rendered ngram string of size %d\n", rand_utf8.length());
567  src_utf8.swap(rand_utf8);
568  }
569  if (FLAGS_only_extract_font_properties) {
570  tprintf("Extracting font properties only\n");
571  ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
572  tprintf("Done!\n");
573  return 0;
574  }
575 
576  int im = 0;
577  vector<float> page_rotation;
578  const char* to_render_utf8 = src_utf8.c_str();
579 
580  tesseract::TRand randomizer;
581  randomizer.set_seed(kRandomSeed);
582  vector<string> font_names;
583  // We use a two pass mechanism to rotate images in both direction.
584  // The first pass(0) will rotate the images in random directions and
585  // the second pass(1) will mirror those rotations.
586  int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
587  for (int pass = 0; pass < num_pass; ++pass) {
588  int page_num = 0;
589  string font_used;
590  for (int offset = 0; offset < strlen(to_render_utf8); ++im, ++page_num) {
591  tlog(1, "Starting page %d\n", im);
592  Pix* pix = NULL;
593  if (FLAGS_find_fonts) {
594  offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
595  to_render_utf8 + offset,
596  strlen(to_render_utf8 + offset),
597  &font_used, &pix);
598  } else {
599  offset += render.RenderToImage(to_render_utf8 + offset,
600  strlen(to_render_utf8 + offset), &pix);
601  }
602  if (pix != NULL) {
603  float rotation = 0;
604  if (pass == 1) {
605  // Pass 2, do mirror rotation.
606  rotation = -1 * page_rotation[page_num];
607  }
608  if (FLAGS_degrade_image) {
609  pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
610  FLAGS_rotate_image ? &rotation : NULL);
611  }
612  render.RotatePageBoxes(rotation);
613 
614  if (pass == 0) {
615  // Pass 1, rotate randomly and store the rotation..
616  page_rotation.push_back(rotation);
617  }
618 
619  Pix* gray_pix = pixConvertTo8(pix, false);
620  pixDestroy(&pix);
621  Pix* binary = pixThresholdToBinary(gray_pix, 128);
622  pixDestroy(&gray_pix);
623  char tiff_name[1024];
624  if (FLAGS_find_fonts) {
625  if (FLAGS_render_per_font) {
626  string fontname_for_file = tesseract::StringReplace(
627  font_used, " ", "_");
628  snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
629  fontname_for_file.c_str());
630  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
631  tprintf("Rendered page %d to file %s\n", im, tiff_name);
632  } else {
633  font_names.push_back(font_used);
634  }
635  } else {
636  snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
637  pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
638  tprintf("Rendered page %d to file %s\n", im, tiff_name);
639  }
640  // Make individual glyphs
641  if (FLAGS_output_individual_glyph_images) {
642  if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
643  tprintf("ERROR: Individual glyphs not saved\n");
644  }
645  }
646  pixDestroy(&binary);
647  }
648  if (FLAGS_find_fonts && offset != 0) {
649  // We just want a list of names, or some sample images so we don't need
650  // to render more than the first page of the text.
651  break;
652  }
653  }
654  }
655  if (!FLAGS_find_fonts) {
656  string box_name = FLAGS_outputbase.c_str();
657  box_name += ".box";
658  render.WriteAllBoxes(box_name);
659  } else if (!FLAGS_render_per_font && !font_names.empty()) {
660  string filename = FLAGS_outputbase.c_str();
661  filename += ".fontlist.txt";
662  FILE* fp = fopen(filename.c_str(), "wb");
663  if (fp == NULL) {
664  tprintf("Failed to create output font list %s\n", filename.c_str());
665  } else {
666  for (int i = 0; i < font_names.size(); ++i) {
667  fprintf(fp, "%s\n", font_names[i].c_str());
668  }
669  fclose(fp);
670  }
671  }
672 
673  return 0;
674 }
void set_seed(uinT64 seed)
Definition: helpers.h:43
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:345
const int kRandomSeed
Definition: text2image.cpp:60
int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:186
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:90
int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:197
#define tprintf(...)
Definition: tprintf.h:31
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:222
#define tlog(level,...)
Definition: tlog.h:33
void ExtractFontProperties(const string &utf8_text, StringRenderer *render, const string &output_base)
Definition: text2image.cpp:215
bool MakeIndividualGlyphs(Pix *pix, const vector< BoxChar * > &vbox, const int input_tiff_page)
Definition: text2image.cpp:314
STRING_PARAM_FLAG ( text  ,
""  ,
"File name of text input to process"   
)
STRING_PARAM_FLAG ( outputbase  ,
""  ,
"Basename for output image/box file"   
)
STRING_PARAM_FLAG ( writing_mode  ,
"horizontal"  ,
"Specify one of the following writing"" modes.\n""'horizontal' : Render regular horizontal text. (default)\n""'vertical' : Render vertical text. Glyph orientation is"" selected by Pango.\n""'vertical-upright' : Render vertical text. Glyph "" orientation is set to be upright."   
)
STRING_PARAM_FLAG ( font  ,
"Arial"  ,
"Font description name to use"   
)
STRING_PARAM_FLAG ( unicharset_file  ,
""  ,
"File with characters in the unicharset. If --render_ngrams"" is true and --unicharset_file is  specified,
ngrams with""characters that are not in unicharset will be omitted"   
)

Variable Documentation

const int kRandomSeed = 0x18273645

Definition at line 60 of file text2image.cpp.