tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
makerow.cpp File Reference
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

float MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
 
make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
 
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, `, ", ', etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = FALSE
 
bool textord_show_initial_rows = FALSE
 
bool textord_show_parallel_rows = FALSE
 
bool textord_show_expanded_rows = FALSE
 
bool textord_show_final_rows = FALSE
 
bool textord_show_final_blobs = FALSE
 
bool textord_test_landscape = FALSE
 
bool textord_parallel_baselines = TRUE
 
bool textord_straight_baselines = FALSE
 
bool textord_old_baselines = TRUE
 
bool textord_old_xheight = FALSE
 
bool textord_fix_xheight_bug = TRUE
 
bool textord_fix_makerow_bug = TRUE
 
bool textord_debug_xheights = FALSE
 
bool textord_biased_skewcalc = TRUE
 
bool textord_interpolating_skew = TRUE
 
int textord_skewsmooth_offset = 4
 
int textord_skewsmooth_offset2 = 1
 
int textord_test_x = -MAX_INT32
 
int textord_test_y = -MAX_INT32
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_max_blob_overlaps = 4
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_expansion_factor = 1.0
 
double textord_overlap_x = 0.375
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_descheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = TRUE
 
bool textord_debug_blob = FALSE
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 

Macro Definition Documentation

#define MAX_HEIGHT_MODES   12

Definition at line 105 of file makerow.cpp.

Function Documentation

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1133 of file makerow.cpp.

1135  {
1136  TO_ROW *row; //current row
1137  float size; //size of row
1138  float ymax; //top of row
1139  float ymin; //bottom of row
1140  TO_ROW_IT row_it = block->get_rows ();
1141 
1143  tprintf("Adjusting row limits for block(%d,%d)\n",
1144  block->block->bounding_box().left(),
1145  block->block->bounding_box().top());
1146  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1147  row = row_it.data ();
1148  size = row->max_y () - row->min_y ();
1150  tprintf("Row at %f has min %f, max %f, size %f\n",
1151  row->intercept(), row->min_y(), row->max_y(), size);
1155  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1158  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1159  row->merged = FALSE;
1160  }
1161 }
bool textord_show_expanded_rows
Definition: makerow.cpp:47
float min_y() const
Definition: blobbox.h:557
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float max_y() const
Definition: blobbox.h:554
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
static const double kDescenderFraction
Definition: ccstruct.h:33
#define FALSE
Definition: capi.h:46
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
BLOCK * block
Definition: blobbox.h:773
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float intercept() const
Definition: blobbox.h:584
static const double kXHeightFraction
Definition: ccstruct.h:34
BOOL8 merged
Definition: blobbox.h:641
void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2309 of file makerow.cpp.

2316  {
2317  OVERLAP_STATE overlap_result; //what to do with it
2318  float ycoord; //current y
2319  float top, bottom; //of blob
2320  float g_length = 1.0f; //from gradient
2321  inT16 row_count; //no of rows
2322  inT16 left_x; //left edge
2323  inT16 last_x; //previous edge
2324  float block_skew; //y delta
2325  float smooth_factor; //for new coords
2326  float near_dist; //dist to nearest row
2327  ICOORD testpt; //testing only
2328  BLOBNBOX *blob; //current blob
2329  TO_ROW *row; //current row
2330  TO_ROW *dest_row = NULL; //row to put blob in
2331  //iterators
2332  BLOBNBOX_IT blob_it = &block->blobs;
2333  TO_ROW_IT row_it = block->get_rows ();
2334 
2335  ycoord =
2336  (block->block->bounding_box ().bottom () +
2337  block->block->bounding_box ().top ()) / 2.0f;
2338  if (gradient != NULL)
2339  g_length = sqrt (1 + *gradient * *gradient);
2340 #ifndef GRAPHICS_DISABLED
2341  if (drawing_skew)
2342  to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
2343 #endif
2344  testpt = ICOORD (textord_test_x, textord_test_y);
2345  blob_it.sort (blob_x_order);
2346  smooth_factor = 1.0;
2347  block_skew = 0.0f;
2348  row_count = row_it.length (); //might have rows
2349  if (!blob_it.empty ()) {
2350  left_x = blob_it.data ()->bounding_box ().left ();
2351  }
2352  else {
2353  left_x = block->block->bounding_box ().left ();
2354  }
2355  last_x = left_x;
2356  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2357  blob = blob_it.data ();
2358  if (gradient != NULL) {
2359  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2360  + *gradient / g_length * blob->bounding_box ().left ();
2361  }
2362  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2363  && last_x - left_x > block->line_size * 2
2365  // tprintf("Interpolating skew from %g",block_skew);
2366  block_skew *= (float) (blob->bounding_box ().left () - left_x)
2367  / (last_x - left_x);
2368  // tprintf("to %g\n",block_skew);
2369  }
2370  last_x = blob->bounding_box ().left ();
2371  top = blob->bounding_box ().top () - block_skew;
2372  bottom = blob->bounding_box ().bottom () - block_skew;
2373 #ifndef GRAPHICS_DISABLED
2374  if (drawing_skew)
2375  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2376 #endif
2377  if (!row_it.empty ()) {
2378  for (row_it.move_to_first ();
2379  !row_it.at_last () && row_it.data ()->min_y () > top;
2380  row_it.forward ());
2381  row = row_it.data ();
2382  if (row->min_y () <= top && row->max_y () >= bottom) {
2383  //any overlap
2384  dest_row = row;
2385  overlap_result = most_overlapping_row (&row_it, dest_row,
2386  top, bottom,
2387  block->line_size,
2388  blob->bounding_box ().
2389  contains (testpt));
2390  if (overlap_result == NEW_ROW && !reject_misses)
2391  overlap_result = ASSIGN;
2392  }
2393  else {
2394  overlap_result = NEW_ROW;
2395  if (!make_new_rows) {
2396  near_dist = row_it.data_relative (-1)->min_y () - top;
2397  //below bottom
2398  if (bottom < row->min_y ()) {
2399  if (row->min_y () - bottom <=
2400  (block->line_spacing -
2402  //done it
2403  overlap_result = ASSIGN;
2404  dest_row = row;
2405  }
2406  }
2407  else if (near_dist > 0
2408  && near_dist < bottom - row->max_y ()) {
2409  row_it.backward ();
2410  dest_row = row_it.data ();
2411  if (dest_row->min_y () - bottom <=
2412  (block->line_spacing -
2414  //done it
2415  overlap_result = ASSIGN;
2416  }
2417  }
2418  else {
2419  if (top - row->max_y () <=
2420  (block->line_spacing -
2421  block->line_size) * (textord_overlap_x +
2423  //done it
2424  overlap_result = ASSIGN;
2425  dest_row = row;
2426  }
2427  }
2428  }
2429  }
2430  if (overlap_result == ASSIGN)
2431  dest_row->add_blob (blob_it.extract (), top, bottom,
2432  block->line_size);
2433  if (overlap_result == NEW_ROW) {
2434  if (make_new_rows && top - bottom < block->max_blob_size) {
2435  dest_row =
2436  new TO_ROW (blob_it.extract (), top, bottom,
2437  block->line_size);
2438  row_count++;
2439  if (bottom > row_it.data ()->min_y ())
2440  row_it.add_before_then_move (dest_row);
2441  //insert in right place
2442  else
2443  row_it.add_after_then_move (dest_row);
2444  smooth_factor =
2445  1.0 / (row_count * textord_skew_lag +
2447  }
2448  else
2449  overlap_result = REJECT;
2450  }
2451  }
2452  else if (make_new_rows && top - bottom < block->max_blob_size) {
2453  overlap_result = NEW_ROW;
2454  dest_row =
2455  new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2456  row_count++;
2457  row_it.add_after_then_move(dest_row);
2458  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2460  }
2461  else
2462  overlap_result = REJECT;
2463  if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
2464  if (overlap_result != REJECT) {
2465  tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
2466  dest_row->min_y(), dest_row->max_y(), pass);
2467  }
2468  else {
2469  tprintf("Test blob assigned to no row on pass %d\n", pass);
2470  }
2471  }
2472  if (overlap_result != REJECT) {
2473  while (!row_it.at_first() &&
2474  row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2475  row = row_it.extract();
2476  row_it.backward();
2477  row_it.add_before_then_move(row);
2478  }
2479  while (!row_it.at_last() &&
2480  row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2481  row = row_it.extract();
2482  row_it.forward();
2483  // Keep rows in order.
2484  row_it.add_after_then_move(row);
2485  }
2486  BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2487  added_blob_it.move_to_last();
2488  TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2489  if (dest_row->blob_list()->singleton() ||
2490  !prev_box.major_x_overlap(blob->bounding_box())) {
2491  block_skew = (1 - smooth_factor) * block_skew
2492  + smooth_factor * (blob->bounding_box().bottom() -
2493  dest_row->initial_min_y());
2494  }
2495  }
2496  }
2497  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2498  if (row_it.data()->blob_list()->empty())
2499  delete row_it.extract(); // Discard empty rows.
2500  }
2501 }
bool textord_debug_blob
Definition: makerow.cpp:103
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
OVERLAP_STATE
Definition: makerow.h:29
float line_spacing
Definition: blobbox.h:775
float min_y() const
Definition: blobbox.h:557
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2604
float max_y() const
Definition: blobbox.h:554
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
bool textord_interpolating_skew
Definition: makerow.cpp:59
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
static const double kDescenderFraction
Definition: ccstruct.h:33
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
Definition: makerow.cpp:2509
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
short inT16
Definition: host.h:100
int textord_skewsmooth_offset2
Definition: makerow.cpp:61
int textord_test_x
Definition: makerow.cpp:62
BLOCK * block
Definition: blobbox.h:773
inT16 left() const
Definition: rect.h:68
int textord_test_y
Definition: makerow.cpp:63
integer coordinate
Definition: points.h:30
int textord_skewsmooth_offset
Definition: makerow.cpp:60
Definition: makerow.h:31
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
Definition: makerow.h:32
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float initial_min_y() const
Definition: blobbox.h:563
inT16 top() const
Definition: rect.h:54
bool contains(const FCOORD pt) const
Definition: rect.h:323
double textord_skew_lag
Definition: makerow.cpp:75
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
double textord_overlap_x
Definition: makerow.cpp:81
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:728
int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2604 of file makerow.cpp.

2606  {
2607  //converted ptr
2608  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
2609  //converted ptr
2610  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
2611 
2612  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2613  return -1;
2614  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2615  return 1;
2616  else
2617  return 0;
2618 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 left() const
Definition: rect.h:68
void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 524 of file makerow.cpp.

531  {
532  //iterators
533  BLOBNBOX_IT blob_it = &block->blobs;
534  TO_ROW_IT row_it = block->get_rows ();
535 
536 #ifndef GRAPHICS_DISABLED
537  if (textord_show_parallel_rows && testing_on) {
538  if (to_win == NULL)
539  create_to_win(page_tr);
540  }
541 #endif
542  //get row coords
543  fit_parallel_rows(block,
544  gradient,
545  rotation,
546  block_edge,
547  textord_show_parallel_rows &&testing_on);
549  gradient,
550  rotation,
551  block_edge,
552  textord_show_parallel_rows &&testing_on);
553  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
554  blob_it.set_to_list (&block->blobs);
555  row_it.set_to_list (block->get_rows ());
556  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
557  blob_it.add_list_after (row_it.data ()->blob_list ());
558  //give blobs back
559  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
560  //now new rows must be genuine
561  blob_it.set_to_list (&block->blobs);
562  blob_it.add_list_after (&block->large_blobs);
563  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
564  //safe to use big ones now
565  blob_it.set_to_list (&block->blobs);
566  //throw all blobs in
567  blob_it.add_list_after (&block->noise_blobs);
568  blob_it.add_list_after (&block->small_blobs);
569  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
570 }
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:975
#define TRUE
Definition: capi.h:45
bool textord_show_parallel_rows
Definition: makerow.cpp:46
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
#define FALSE
Definition: capi.h:46
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2309
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1961
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:577
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 928 of file makerow.cpp.

932  {
933  inT32 line_index; //of thresholds line
934  inT32 distance; //from prev dropout
935  inT32 next_dist; //to next dropout
936  inT32 back_index; //for back filling
937  inT32 prev_threshold; //before overwrite
938 
939  distance = -line_count;
940  line_index = 0;
941  do {
942  do {
943  distance--;
944  prev_threshold = thresholds[line_index];
945  //distance from prev
946  thresholds[line_index] = distance;
947  line_index++;
948  }
949  while (line_index < line_count
950  && (occupation[line_index] < thresholds[line_index]
951  || occupation[line_index - 1] >= prev_threshold));
952  if (line_index < line_count) {
953  back_index = line_index - 1;
954  next_dist = 1;
955  while (next_dist < -distance && back_index >= 0) {
956  thresholds[back_index] = next_dist;
957  back_index--;
958  next_dist++;
959  distance++;
960  }
961  distance = 1;
962  }
963  }
964  while (line_index < line_count);
965 }
int inT32
Definition: host.h:102
inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1653 of file makerow.cpp.

1657  { // size of modes
1658  inT32 pile_count; // no in source pile
1659  inT32 src_count; // no of source entries
1660  inT32 src_index; // current entry
1661  inT32 least_count; // height of smalllest
1662  inT32 least_index; // index of least
1663  inT32 dest_count; // index in modes
1664 
1665  src_count = max_height + 1 - min_height;
1666  dest_count = 0;
1667  least_count = MAX_INT32;
1668  least_index = -1;
1669  for (src_index = 0; src_index < src_count; src_index++) {
1670  pile_count = heights->pile_count(min_height + src_index);
1671  if (pile_count > 0) {
1672  if (dest_count < maxmodes) {
1673  if (pile_count < least_count) {
1674  // find smallest in array
1675  least_count = pile_count;
1676  least_index = dest_count;
1677  }
1678  modes[dest_count++] = min_height + src_index;
1679  } else if (pile_count >= least_count) {
1680  while (least_index < maxmodes - 1) {
1681  modes[least_index] = modes[least_index + 1];
1682  // shuffle up
1683  least_index++;
1684  }
1685  // new one on end
1686  modes[maxmodes - 1] = min_height + src_index;
1687  if (pile_count == least_count) {
1688  // new smallest
1689  least_index = maxmodes - 1;
1690  } else {
1691  least_count = heights->pile_count(modes[0]);
1692  least_index = 0;
1693  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1694  pile_count = heights->pile_count(modes[dest_count]);
1695  if (pile_count < least_count) {
1696  // find smallest
1697  least_count = pile_count;
1698  least_index = dest_count;
1699  }
1700  }
1701  }
1702  }
1703  }
1704  }
1705  return dest_count;
1706 }
#define MAX_INT32
Definition: host.h:120
int inT32
Definition: host.h:102
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 781 of file makerow.cpp.

788  {
789  inT32 line_count; //maxy-miny+1
790  inT32 line_index; //of scan line
791  int index; //array index for daft compilers
792  float top, bottom; //coords of blob
793  inT32 width; //of blob
794  TO_ROW *row; //current row
795  TO_ROW_IT row_it = block->get_rows ();
796  BLOBNBOX *blob; //current blob
797  BLOBNBOX_IT blob_it; //iterator
798  float length; //of skew vector
799  TBOX blob_box; //bounding box
800  FCOORD rotation; //inverse of skew
801 
802  line_count = max_y - min_y + 1;
803  length = sqrt (gradient * gradient + 1);
804  rotation = FCOORD (1 / length, -gradient / length);
805  for (line_index = 0; line_index < line_count; line_index++)
806  deltas[line_index] = 0;
807  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
808  row = row_it.data ();
809  blob_it.set_to_list (row->blob_list ());
810  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
811  blob_it.forward ()) {
812  blob = blob_it.data ();
813  blob_box = blob->bounding_box ();
814  blob_box.rotate (rotation);//de-skew it
815  top = blob_box.top ();
816  bottom = blob_box.bottom ();
817  width =
818  (inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
819  if ((inT32) floor (bottom) < min_y
820  || (inT32) floor (bottom) - min_y >= line_count)
821  fprintf (stderr,
822  "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
823  INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
824  //count transitions
825  index = (inT32) floor (bottom) - min_y;
826  deltas[index] += width;
827  if ((inT32) floor (top) < min_y
828  || (inT32) floor (top) - min_y >= line_count)
829  fprintf (stderr,
830  "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
831  INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
832  index = (inT32) floor (top) - min_y;
833  deltas[index] -= width;
834  }
835  }
836  occupation[0] = deltas[0];
837  for (line_index = 1; line_index < line_count; line_index++)
838  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
839 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void rotate(const FCOORD &vec)
Definition: rect.h:189
inT16 bottom() const
Definition: rect.h:61
int inT32
Definition: host.h:102
Definition: points.h:189
float FLOAT32
Definition: host.h:111
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
#define INT32FORMAT
Definition: host.h:115
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
inT16 top() const
Definition: rect.h:54
void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 847 of file makerow.cpp.

853  {
854  inT32 line_index; //of thresholds line
855  inT32 low_index; //in occupation
856  inT32 high_index; //in occupation
857  inT32 sum; //current average
858  inT32 divisor; //to get thresholds
859  inT32 min_index; //of min occ
860  inT32 min_occ; //min in locality
861  inT32 test_index; //for finding min
862 
863  divisor =
864  (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
865  if (low_window + high_window < line_count) {
866  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
867  sum += occupation[high_index];
868  for (low_index = 0; low_index < high_window; low_index++, high_index++)
869  sum += occupation[high_index];
870  min_occ = occupation[0];
871  min_index = 0;
872  for (test_index = 1; test_index < high_index; test_index++) {
873  if (occupation[test_index] <= min_occ) {
874  min_occ = occupation[test_index];
875  min_index = test_index; //find min in region
876  }
877  }
878  for (line_index = 0; line_index < low_window; line_index++)
879  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
880  //same out to end
881  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
882  sum -= occupation[low_index];
883  sum += occupation[high_index];
884  if (occupation[high_index] <= min_occ) {
885  //find min in region
886  min_occ = occupation[high_index];
887  min_index = high_index;
888  }
889  //lost min from region
890  if (min_index <= low_index) {
891  min_occ = occupation[low_index + 1];
892  min_index = low_index + 1;
893  for (test_index = low_index + 2; test_index <= high_index;
894  test_index++) {
895  if (occupation[test_index] <= min_occ) {
896  min_occ = occupation[test_index];
897  //find min in region
898  min_index = test_index;
899  }
900  }
901  }
902  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
903  }
904  }
905  else {
906  min_occ = occupation[0];
907  min_index = 0;
908  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
909  if (occupation[low_index] < min_occ) {
910  min_occ = occupation[low_index];
911  min_index = low_index;
912  }
913  sum += occupation[low_index];
914  }
915  line_index = 0;
916  }
917  for (; line_index < line_count; line_index++)
918  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
919  //same out to end
920 }
double textord_occupancy_threshold
Definition: makerow.cpp:86
int inT32
Definition: host.h:102
void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 287 of file makerow.cpp.

291  {
292  inT32 row_count; //total rows
293  inT32 blob_count; //total_blobs
294  inT32 row_err; //integer error
295  float *gradients; //of rows
296  float *errors; //of rows
297  inT32 row_index; //of total
298  TO_ROW *row; //current row
299  TO_BLOCK_IT block_it = blocks; //iterator
300  TO_ROW_IT row_it;
301 
302  row_count = 0;
303  blob_count = 0;
304  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
305  block_it.forward ()) {
306  POLY_BLOCK* pb = block_it.data()->block->poly_block();
307  if (pb != NULL && !pb->IsText())
308  continue; // Pretend non-text blocks don't exist.
309  row_count += block_it.data ()->get_rows ()->length ();
310  //count up rows
311  row_it.set_to_list (block_it.data ()->get_rows ());
312  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
313  blob_count += row_it.data ()->blob_list ()->length ();
314  }
315  if (row_count == 0) {
316  page_m = 0.0f;
317  page_err = 0.0f;
318  return;
319  }
320  gradients = (float *) alloc_mem (blob_count * sizeof (float));
321  //get mem
322  errors = (float *) alloc_mem (blob_count * sizeof (float));
323  if (gradients == NULL || errors == NULL)
324  MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
325 
326  row_index = 0;
327  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
328  block_it.forward ()) {
329  POLY_BLOCK* pb = block_it.data()->block->poly_block();
330  if (pb != NULL && !pb->IsText())
331  continue; // Pretend non-text blocks don't exist.
332  row_it.set_to_list (block_it.data ()->get_rows ());
333  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
334  row = row_it.data ();
335  blob_count = row->blob_list ()->length ();
336  row_err = (inT32) ceil (row->line_error ());
337  if (row_err <= 0)
338  row_err = 1;
340  blob_count /= row_err;
341  for (blob_count /= row_err; blob_count > 0; blob_count--) {
342  gradients[row_index] = row->line_m ();
343  errors[row_index] = row->line_error ();
344  row_index++;
345  }
346  }
347  else if (blob_count >= textord_min_blobs_in_row) {
348  //get gradient
349  gradients[row_index] = row->line_m ();
350  errors[row_index] = row->line_error ();
351  row_index++;
352  }
353  }
354  }
355  if (row_index == 0) {
356  //desperate
357  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
358  block_it.forward ()) {
359  POLY_BLOCK* pb = block_it.data()->block->poly_block();
360  if (pb != NULL && !pb->IsText())
361  continue; // Pretend non-text blocks don't exist.
362  row_it.set_to_list (block_it.data ()->get_rows ());
363  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
364  row_it.forward ()) {
365  row = row_it.data ();
366  gradients[row_index] = row->line_m ();
367  errors[row_index] = row->line_error ();
368  row_index++;
369  }
370  }
371  }
372  row_count = row_index;
373  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
374  gradients, row_count);
375  page_m = gradients[row_index];
376  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
377  errors, row_count);
378  page_err = errors[row_index];
379  free_mem(gradients);
380  free_mem(errors);
381 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:641
int inT32
Definition: host.h:102
double textord_skew_ile
Definition: makerow.cpp:74
int textord_min_blobs_in_row
Definition: makerow.cpp:64
Definition: errcode.h:30
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
float line_error() const
Definition: blobbox.h:572
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
float line_m() const
Definition: blobbox.h:566
bool textord_biased_skewcalc
Definition: makerow.cpp:58
bool IsText() const
Definition: polyblk.h:52
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
void free_mem(void *oldchunk)
Definition: memry.cpp:55
inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1593 of file makerow.cpp.

1594  {
1595  // Count how many potential ascenders are in this row.
1596  int i_min = asc_heights->min_bucket();
1597  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1598  i_min = static_cast<int>(
1599  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1600  }
1601  int i_max = asc_heights->max_bucket();
1602  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1603  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1604  }
1605  int num_potential_asc = 0;
1606  for (int i = i_min; i <= i_max; ++i) {
1607  num_potential_asc += asc_heights->pile_count(i);
1608  }
1609  inT32 min_height =
1610  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1611  inT32 max_height =
1612  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
1613  float xcentre; // centre of blob
1614  float height; // height of blob
1615  BLOBNBOX_IT blob_it = row->blob_list();
1616  BLOBNBOX *blob; // current blob
1617  STATS heights (min_height, max_height + 1);
1618  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1619  blob = blob_it.data();
1620  if (!blob->joined_to_prev()) {
1621  xcentre = (blob->bounding_box().left() +
1622  blob->bounding_box().right()) / 2.0f;
1623  height = (gradient * xcentre + row->parallel_c() -
1624  blob->bounding_box().bottom());
1625  if (height >= min_height && height <= max_height)
1626  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1627  }
1628  }
1629  int blob_index = heights.mode(); // find mode
1630  int blob_count = heights.pile_count(blob_index); // get count of mode
1631  float total_fraction =
1633  if (static_cast<float>(blob_count + num_potential_asc) <
1634  xheight_blob_count * total_fraction) {
1635  blob_count = 0;
1636  }
1637  int descdrop = blob_count > 0 ? -blob_index : 0;
1638  if (textord_debug_xheights) {
1639  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1640  descdrop, num_potential_asc, blob_count);
1641  heights.print();
1642  }
1643  return descdrop;
1644 }
bool joined_to_prev() const
Definition: blobbox.h:241
double textord_descheight_mode_fraction
Definition: makerow.cpp:95
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
inT32 max_bucket() const
Definition: statistc.cpp:224
int inT32
Definition: host.h:102
bool textord_debug_xheights
Definition: makerow.cpp:57
double textord_descx_ratio_min
Definition: makerow.cpp:98
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
float xheight
Definition: blobbox.h:653
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT32 min_bucket() const
Definition: statistc.cpp:209
double textord_ascx_ratio_min
Definition: makerow.cpp:96
inT16 right() const
Definition: rect.h:75
Definition: statistc.h:33
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
inT16 left() const
Definition: rect.h:68
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
double textord_ascx_ratio_max
Definition: makerow.cpp:97
double textord_descx_ratio_max
Definition: makerow.cpp:99
void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1169 of file makerow.cpp.

1172  {
1173  inT32 row_index; //of median
1174  TO_ROW *row; //current row
1175  TO_ROW *prev_row; //previous row
1176  float iqr; //inter quartile range
1177  TO_ROW_IT row_it = block->get_rows ();
1178  //number of rows
1179  inT16 rowcount = row_it.length ();
1180  TO_ROW **rows; //for choose nth
1181 
1182  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
1183  if (rows == NULL)
1184  MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
1185  rowcount = 0;
1186  prev_row = NULL;
1187  row_it.move_to_last (); //start at bottom
1188  do {
1189  row = row_it.data ();
1190  if (prev_row != NULL) {
1191  rows[rowcount++] = prev_row;
1192  prev_row->spacing = row->intercept () - prev_row->intercept ();
1193  if (testing_on)
1194  tprintf ("Row at %g yields spacing of %g\n",
1195  row->intercept (), prev_row->spacing);
1196  }
1197  prev_row = row;
1198  row_it.backward ();
1199  }
1200  while (!row_it.at_last ());
1201  block->key_row = prev_row;
1202  block->baseline_offset =
1203  fmod (prev_row->parallel_c (), block->line_spacing);
1204  if (testing_on)
1205  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1206  block->line_size, block->line_spacing, block->baseline_offset);
1207  if (rowcount > 0) {
1208  row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
1209  sizeof (TO_ROW *), row_spacing_order);
1210  iqr = rows[row_index]->spacing;
1211  row_index = choose_nth_item (rowcount / 4, rows, rowcount,
1212  sizeof (TO_ROW *), row_spacing_order);
1213  iqr -= rows[row_index]->spacing;
1214  row_index = choose_nth_item (rowcount / 2, rows, rowcount,
1215  sizeof (TO_ROW *), row_spacing_order);
1216  block->key_row = rows[row_index];
1217  if (testing_on)
1218  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1219  if (rowcount > 2
1220  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1222  if (rows[row_index]->spacing < block->line_spacing
1223  && rows[row_index]->spacing > block->line_size)
1224  //within range
1225  block->line_size = rows[row_index]->spacing;
1226  //spacing=size
1227  else if (rows[row_index]->spacing > block->line_spacing)
1228  block->line_size = block->line_spacing;
1229  //too big so use max
1230  }
1231  else {
1232  if (rows[row_index]->spacing < block->line_spacing)
1233  block->line_size = rows[row_index]->spacing;
1234  else
1235  block->line_size = block->line_spacing;
1236  //too big so use max
1237  }
1238  if (block->line_size < textord_min_xheight)
1239  block->line_size = (float) textord_min_xheight;
1240  block->line_spacing = rows[row_index]->spacing;
1241  block->max_blob_size =
1243  }
1244  block->baseline_offset = fmod (rows[row_index]->intercept (),
1245  block->line_spacing);
1246  }
1247  if (testing_on)
1248  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1249  block->line_size, block->line_spacing, block->baseline_offset);
1250  free_mem(rows);
1251 }
float line_spacing
Definition: blobbox.h:775
int textord_min_xheight
Definition: makerow.cpp:69
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:641
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2648
int inT32
Definition: host.h:102
float max_blob_size
Definition: blobbox.h:782
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
bool textord_new_initial_xheight
Definition: makerow.cpp:102
TO_ROW * key_row
Definition: blobbox.h:794
double textord_excess_blobsize
Definition: makerow.cpp:85
short inT16
Definition: host.h:100
Definition: errcode.h:30
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
double textord_linespace_iqrlimit
Definition: makerow.cpp:76
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float intercept() const
Definition: blobbox.h:584
float baseline_offset
Definition: blobbox.h:783
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
void free_mem(void *oldchunk)
Definition: memry.cpp:55
float spacing
Definition: blobbox.h:652
int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1497 of file makerow.cpp.

1499  {
1500  int blob_index = heights->mode(); // find mode
1501  int blob_count = heights->pile_count(blob_index); // get count of mode
1502  if (textord_debug_xheights) {
1503  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1504  min_height, max_height, blob_index, blob_count,
1505  heights->get_total());
1506  heights->print();
1507  floating_heights->print();
1508  }
1509  if (blob_count == 0) return 0;
1510  int modes[MAX_HEIGHT_MODES]; // biggest piles
1511  bool in_best_pile = FALSE;
1512  int prev_size = -MAX_INT32;
1513  int best_count = 0;
1514  int mode_count = compute_height_modes(heights, min_height, max_height,
1515  modes, MAX_HEIGHT_MODES);
1516  if (cap_only && mode_count > 1)
1517  mode_count = 1;
1518  int x;
1519  if (textord_debug_xheights) {
1520  tprintf("found %d modes: ", mode_count);
1521  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1522  tprintf("\n");
1523  }
1524 
1525  for (x = 0; x < mode_count - 1; x++) {
1526  if (modes[x] != prev_size + 1)
1527  in_best_pile = FALSE; // had empty height
1528  int modes_x_count = heights->pile_count(modes[x]) -
1529  floating_heights->pile_count(modes[x]);
1530  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1531  (in_best_pile || modes_x_count > best_count)) {
1532  for (int asc = x + 1; asc < mode_count; asc++) {
1533  float ratio =
1534  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1535  if (textord_ascx_ratio_min < ratio &&
1536  ratio < textord_ascx_ratio_max &&
1537  (heights->pile_count(modes[asc]) >=
1538  blob_count * textord_ascheight_mode_fraction)) {
1539  if (modes_x_count > best_count) {
1540  in_best_pile = true;
1541  best_count = modes_x_count;
1542  }
1543  if (textord_debug_xheights) {
1544  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1545  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1546  }
1547  prev_size = modes[x];
1548  *xheight = static_cast<float>(modes[x]);
1549  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1550  }
1551  }
1552  }
1553  }
1554  if (*xheight == 0) { // single mode
1555  // Remove counts of the "floating" blobs (the one whose height is too
1556  // small in relation to it's top end of the bounding box) from heights
1557  // before computing the single-mode xheight.
1558  // Restore the counts in heights after the mode is found, since
1559  // floating blobs might be useful for determining potential ascenders
1560  // in compute_row_descdrop().
1561  if (floating_heights->get_total() > 0) {
1562  for (x = min_height; x < max_height; ++x) {
1563  heights->add(x, -(floating_heights->pile_count(x)));
1564  }
1565  blob_index = heights->mode(); // find the modified mode
1566  for (x = min_height; x < max_height; ++x) {
1567  heights->add(x, floating_heights->pile_count(x));
1568  }
1569  }
1570  *xheight = static_cast<float>(blob_index);
1571  *ascrise = 0.0f;
1572  best_count = heights->pile_count(blob_index);
1574  tprintf("Single mode xheight set to %g\n", *xheight);
1575  } else if (textord_debug_xheights) {
1576  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1577  }
1578  return best_count;
1579 }
inT32 mode() const
Definition: statistc.cpp:118
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:105
#define MAX_INT32
Definition: host.h:120
double textord_xheight_mode_fraction
Definition: makerow.cpp:91
inT32 get_total() const
Definition: statistc.h:86
bool textord_debug_xheights
Definition: makerow.cpp:57
#define tprintf(...)
Definition: tprintf.h:31
#define FALSE
Definition: capi.h:46
double textord_ascx_ratio_min
Definition: makerow.cpp:96
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
inT32 compute_height_modes(STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
Definition: makerow.cpp:1653
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
double textord_ascx_ratio_max
Definition: makerow.cpp:97
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
void print() const
Definition: statistc.cpp:537
void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1715 of file makerow.cpp.

1716  {
1717  ROW_CATEGORY row_category = get_row_category(row);
1718  if (textord_debug_xheights) {
1719  tprintf("correcting row xheight: row->xheight %.4f"
1720  ", row->acrise %.4f row->descdrop %.4f\n",
1721  row->xheight, row->ascrise, row->descdrop);
1722  }
1723  bool normal_xheight =
1725  bool cap_xheight =
1726  within_error_margin(row->xheight, xheight + ascrise,
1728  // Use the average xheight/ascrise for the following cases:
1729  // -- the xheight of the row could not be determined at all
1730  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1731  // and its xheight is close to either cap height or average xheight
1732  // -- the row does not have ascenders or descenders, but its xheight
1733  // is close to the average block xheight (e.g. row with "www.mmm.com")
1734  if (row_category == ROW_ASCENDERS_FOUND) {
1735  if (row->descdrop >= 0.0) {
1736  row->descdrop = row->xheight * (descdrop / xheight);
1737  }
1738  } else if (row_category == ROW_INVALID ||
1739  (row_category == ROW_DESCENDERS_FOUND &&
1740  (normal_xheight || cap_xheight)) ||
1741  (row_category == ROW_UNKNOWN && normal_xheight)) {
1742  if (textord_debug_xheights) tprintf("using average xheight\n");
1743  row->xheight = xheight;
1744  row->ascrise = ascrise;
1745  row->descdrop = descdrop;
1746  } else if (row_category == ROW_DESCENDERS_FOUND) {
1747  // Assume this is a row with mostly lowercase letters and it's xheight
1748  // is computed correctly (unfortunately there is no way to distinguish
1749  // this from the case when descenders are found, but the most common
1750  // height is capheight).
1751  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1752  row->ascrise = row->xheight * (ascrise / xheight);
1753  } else if (row_category == ROW_UNKNOWN) {
1754  // Otherwise assume this row is an all-caps or small-caps row
1755  // and adjust xheight and ascrise of the row.
1756 
1757  row->all_caps = true;
1758  if (cap_xheight) { // regular all caps
1759  if (textord_debug_xheights) tprintf("all caps\n");
1760  row->xheight = xheight;
1761  row->ascrise = ascrise;
1762  row->descdrop = descdrop;
1763  } else { // small caps or caps with an odd xheight
1764  if (textord_debug_xheights) {
1765  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1766  tprintf("small caps\n");
1767  } else {
1768  tprintf("all caps with irregular xheight\n");
1769  }
1770  }
1771  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1772  row->xheight -= row->ascrise;
1773  row->descdrop = row->xheight * (descdrop / xheight);
1774  }
1775  }
1776  if (textord_debug_xheights) {
1777  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1778  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1779  }
1780 }
bool textord_debug_xheights
Definition: makerow.cpp:57
#define tprintf(...)
Definition: tprintf.h:31
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
float xheight
Definition: blobbox.h:653
ROW_CATEGORY
Definition: makerow.h:36
double textord_xheight_error_margin
Definition: makerow.cpp:100
float descdrop
Definition: blobbox.h:656
BOOL8 all_caps
Definition: blobbox.h:642
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:129
float ascrise
Definition: blobbox.h:655
void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 577 of file makerow.cpp.

583  {
584  TBOX block_box; //deskewed block
585  inT32 *deltas; //change in occupation
586  inT32 *occupation; //of pixel coords
587  inT32 max_y; //in block
588  inT32 min_y;
589  inT32 line_index; //of scan line
590  inT32 line_count; //no of scan lines
591  inT32 distance; //to drop-out
592  inT32 xleft; //of block
593  inT32 ybottom; //of block
594  TO_ROW *row; //current row
595  TO_ROW_IT row_it = block->get_rows ();
596  BLOBNBOX_IT blob_it = &block->blobs;
597 
598  if (row_it.length () == 0)
599  return; //empty block
600  block_box = deskew_block_coords (block, gradient);
601  xleft = block->block->bounding_box ().left ();
602  ybottom = block->block->bounding_box ().bottom ();
603  min_y = block_box.bottom () - 1;
604  max_y = block_box.top () + 1;
605  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
606  line_index = (inT32) floor (row_it.data ()->intercept ());
607  if (line_index <= min_y)
608  min_y = line_index - 1;
609  if (line_index >= max_y)
610  max_y = line_index + 1;
611  }
612  line_count = max_y - min_y + 1;
613  if (line_count <= 0)
614  return; //empty block
615  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
616  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
617  if (deltas == NULL || occupation == NULL)
618  MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
619 
620  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
622  ceil (block->line_spacing *
625  (inT32) ceil (block->line_spacing *
628  max_y - min_y + 1, occupation, deltas);
629 #ifndef GRAPHICS_DISABLED
630  if (testing_on) {
631  draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
632  }
633 #endif
634  compute_dropout_distances(occupation, deltas, line_count);
635  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
636  row = row_it.data ();
637  line_index = (inT32) floor (row->intercept ());
638  distance = deltas[line_index - min_y];
639  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
640  line_index, &row_it, testing_on)) {
641 #ifndef GRAPHICS_DISABLED
642  if (testing_on)
643  plot_parallel_row(row, gradient, block_edge,
644  ScrollView::WHITE, rotation);
645 #endif
646  blob_it.add_list_after (row_it.data ()->blob_list ());
647  delete row_it.extract (); //too far away
648  }
649  }
650  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
651  blob_it.add_list_after (row_it.data ()->blob_list ());
652  }
653 
654  free_mem(deltas);
655  free_mem(occupation);
656 }
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:745
inT16 bottom() const
Definition: rect.h:61
void compute_dropout_distances(inT32 *occupation, inT32 *thresholds, inT32 line_count)
Definition: makerow.cpp:928
float line_spacing
Definition: blobbox.h:775
void compute_line_occupation(TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
Definition: makerow.cpp:781
int inT32
Definition: host.h:102
static const double kAscenderFraction
Definition: ccstruct.h:35
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
Definition: rect.h:30
static const double kDescenderFraction
Definition: ccstruct.h:33
void draw_occupation(inT32 xleft, inT32 ybottom, inT32 min_y, inT32 max_y, inT32 occupation[], inT32 thresholds[])
Definition: drawtord.cpp:166
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
Definition: errcode.h:30
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
BLOCK * block
Definition: blobbox.h:773
void compute_occupation_threshold(inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
Definition: makerow.cpp:847
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
BOOL8 find_best_dropout_row(TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
Definition: makerow.cpp:665
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float intercept() const
Definition: blobbox.h:584
inT16 top() const
Definition: rect.h:54
static const double kXHeightFraction
Definition: ccstruct.h:34
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
void free_mem(void *oldchunk)
Definition: memry.cpp:55
TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 745 of file makerow.cpp.

748  {
749  TBOX result; //block bounds
750  TBOX blob_box; //of block
751  FCOORD rotation; //deskew vector
752  float length; //of gradient vector
753  TO_ROW_IT row_it = block->get_rows ();
754  TO_ROW *row; //current row
755  BLOBNBOX *blob; //current blob
756  BLOBNBOX_IT blob_it; //iterator
757 
758  length = sqrt (gradient * gradient + 1);
759  rotation = FCOORD (1 / length, -gradient / length);
760  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
761  row = row_it.data ();
762  blob_it.set_to_list (row->blob_list ());
763  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
764  blob_it.forward ()) {
765  blob = blob_it.data ();
766  blob_box = blob->bounding_box ();
767  blob_box.rotate (rotation);//de-skew it
768  result += blob_box;
769  }
770  }
771  return result;
772 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void rotate(const FCOORD &vec)
Definition: rect.h:189
Definition: points.h:189
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 975 of file makerow.cpp.

982  {
983  BOOL8 swallowed_row; //eaten a neighbour
984  float y_max, y_min; //new row limits
985  float y_bottom, y_top; //allowed limits
986  TO_ROW *test_row; //next row
987  TO_ROW *row; //current row
988  //iterators
989  BLOBNBOX_IT blob_it = &block->blobs;
990  TO_ROW_IT row_it = block->get_rows ();
991 
992 #ifndef GRAPHICS_DISABLED
993  if (textord_show_expanded_rows && testing_on) {
994  if (to_win == NULL)
995  create_to_win(page_tr);
996  }
997 #endif
998 
999  adjust_row_limits(block); //shift min,max.
1001  if (block->get_rows ()->length () == 0)
1002  return;
1003  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1004  }
1005  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
1006  //get real membership
1007  if (block->get_rows ()->length () == 0)
1008  return;
1009  fit_parallel_rows(block,
1010  gradient,
1011  rotation,
1012  block_edge,
1013  textord_show_expanded_rows &&testing_on);
1015  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1016  row_it.move_to_last ();
1017  do {
1018  row = row_it.data ();
1019  y_max = row->max_y (); //get current limits
1020  y_min = row->min_y ();
1021  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
1023  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1026  if (y_min > y_bottom) { //expansion allowed
1027  if (textord_show_expanded_rows && testing_on)
1028  tprintf("Expanding bottom of row at %f from %f to %f\n",
1029  row->intercept(), y_min, y_bottom);
1030  //expandable
1031  swallowed_row = TRUE;
1032  while (swallowed_row && !row_it.at_last ()) {
1033  swallowed_row = FALSE;
1034  //get next one
1035  test_row = row_it.data_relative (1);
1036  //overlaps space
1037  if (test_row->max_y () > y_bottom) {
1038  if (test_row->min_y () > y_bottom) {
1039  if (textord_show_expanded_rows && testing_on)
1040  tprintf("Eating row below at %f\n", test_row->intercept());
1041  row_it.forward ();
1042 #ifndef GRAPHICS_DISABLED
1043  if (textord_show_expanded_rows && testing_on)
1044  plot_parallel_row(test_row,
1045  gradient,
1046  block_edge,
1048  rotation);
1049 #endif
1050  blob_it.set_to_list (row->blob_list ());
1051  blob_it.add_list_after (test_row->blob_list ());
1052  //swallow complete row
1053  delete row_it.extract ();
1054  row_it.backward ();
1055  swallowed_row = TRUE;
1056  }
1057  else if (test_row->max_y () < y_min) {
1058  //shorter limit
1059  y_bottom = test_row->max_y ();
1060  if (textord_show_expanded_rows && testing_on)
1061  tprintf("Truncating limit to %f due to touching row at %f\n",
1062  y_bottom, test_row->intercept());
1063  }
1064  else {
1065  y_bottom = y_min; //can't expand it
1066  if (textord_show_expanded_rows && testing_on)
1067  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1068  y_bottom, test_row->intercept());
1069  }
1070  }
1071  }
1072  y_min = y_bottom; //expand it
1073  }
1074  if (y_max < y_top) { //expansion allowed
1075  if (textord_show_expanded_rows && testing_on)
1076  tprintf("Expanding top of row at %f from %f to %f\n",
1077  row->intercept(), y_max, y_top);
1078  swallowed_row = TRUE;
1079  while (swallowed_row && !row_it.at_first ()) {
1080  swallowed_row = FALSE;
1081  //get one above
1082  test_row = row_it.data_relative (-1);
1083  if (test_row->min_y () < y_top) {
1084  if (test_row->max_y () < y_top) {
1085  if (textord_show_expanded_rows && testing_on)
1086  tprintf("Eating row above at %f\n", test_row->intercept());
1087  row_it.backward ();
1088  blob_it.set_to_list (row->blob_list ());
1089 #ifndef GRAPHICS_DISABLED
1090  if (textord_show_expanded_rows && testing_on)
1091  plot_parallel_row(test_row,
1092  gradient,
1093  block_edge,
1095  rotation);
1096 #endif
1097  blob_it.add_list_after (test_row->blob_list ());
1098  //swallow complete row
1099  delete row_it.extract ();
1100  row_it.forward ();
1101  swallowed_row = TRUE;
1102  }
1103  else if (test_row->min_y () < y_max) {
1104  //shorter limit
1105  y_top = test_row->min_y ();
1106  if (textord_show_expanded_rows && testing_on)
1107  tprintf("Truncating limit to %f due to touching row at %f\n",
1108  y_top, test_row->intercept());
1109  }
1110  else {
1111  y_top = y_max; //can't expand it
1112  if (textord_show_expanded_rows && testing_on)
1113  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1114  y_top, test_row->intercept());
1115  }
1116  }
1117  }
1118  y_max = y_top;
1119  }
1120  //new limits
1121  row->set_limits (y_min, y_max);
1122  row_it.backward ();
1123  }
1124  while (!row_it.at_last ());
1125 }
bool textord_show_expanded_rows
Definition: makerow.cpp:47
#define TRUE
Definition: capi.h:45
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1133
float min_y() const
Definition: blobbox.h:557
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float max_y() const
Definition: blobbox.h:554
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
bool textord_new_initial_xheight
Definition: makerow.cpp:102
static const double kDescenderFraction
Definition: ccstruct.h:33
#define FALSE
Definition: capi.h:46
double textord_expansion_factor
Definition: makerow.cpp:80
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2309
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float intercept() const
Definition: blobbox.h:584
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1961
unsigned char BOOL8
Definition: host.h:113
static const double kXHeightFraction
Definition: ccstruct.h:34
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void compute_row_stats(TO_BLOCK *block, BOOL8 testing_on)
Definition: makerow.cpp:1169
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1436 of file makerow.cpp.

1437  {
1438  float xcentre; // centre of blob
1439  float top; // top y coord of blob
1440  float height; // height of blob
1441  BLOBNBOX *blob; // current blob
1442  int repeated_set;
1443  BLOBNBOX_IT blob_it = row->blob_list();
1444  if (blob_it.empty()) return; // no blobs in this row
1445  bool has_rep_chars =
1446  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1447  do {
1448  blob = blob_it.data();
1449  if (!blob->joined_to_prev()) {
1450  xcentre = (blob->bounding_box().left() +
1451  blob->bounding_box().right()) / 2.0f;
1452  top = blob->bounding_box().top();
1453  height = blob->bounding_box().height();
1455  top -= row->baseline.y(xcentre);
1456  else
1457  top -= gradient * xcentre + row->parallel_c();
1458  if (top >= min_height && top <= max_height) {
1459  heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1460  if (height / top < textord_min_blob_height_fraction) {
1461  floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1462  }
1463  }
1464  }
1465  // Skip repeated chars, since they are likely to skew the height stats.
1466  if (has_rep_chars && blob->repeated_set() != 0) {
1467  repeated_set = blob->repeated_set();
1468  blob_it.forward();
1469  while (!blob_it.at_first() &&
1470  blob_it.data()->repeated_set() == repeated_set) {
1471  blob_it.forward();
1473  tprintf("Skipping repeated char when computing xheight\n");
1474  }
1475  } else {
1476  blob_it.forward();
1477  }
1478  } while (!blob_it.at_first());
1479 }
bool joined_to_prev() const
Definition: blobbox.h:241
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool textord_fix_xheight_bug
Definition: makerow.cpp:55
inT16 height() const
Definition: rect.h:104
bool textord_debug_xheights
Definition: makerow.cpp:57
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
const TBOX & bounding_box() const
Definition: blobbox.h:215
int repeated_set() const
Definition: blobbox.h:247
inT16 right() const
Definition: rect.h:75
double y(double x) const
Definition: quspline.cpp:217
QSPLINE baseline
Definition: blobbox.h:666
inT16 left() const
Definition: rect.h:68
int num_repeated_sets() const
Definition: blobbox.h:633
inT16 top() const
Definition: rect.h:54
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
bool rep_chars_marked() const
Definition: blobbox.h:627
void add(inT32 value, inT32 count)
Definition: statistc.cpp:104
BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 665 of file makerow.cpp.

672  {
673  inT32 next_index; // of neighbouring row
674  inT32 row_offset; //from current row
675  inT32 abs_dist; //absolute distance
676  inT8 row_inc; //increment to row_index
677  TO_ROW *next_row; //nextious row
678 
679  if (testing_on)
680  tprintf ("Row at %g(%g), dropout dist=%d,",
681  row->intercept (), row->parallel_c (), distance);
682  if (distance < 0) {
683  row_inc = 1;
684  abs_dist = -distance;
685  }
686  else {
687  row_inc = -1;
688  abs_dist = distance;
689  }
690  if (abs_dist > dist_limit) {
691  if (testing_on) {
692  tprintf (" too far - deleting\n");
693  }
694  return TRUE;
695  }
696  if ((distance < 0 && !row_it->at_last ())
697  || (distance >= 0 && !row_it->at_first ())) {
698  row_offset = row_inc;
699  do {
700  next_row = row_it->data_relative (row_offset);
701  next_index = (inT32) floor (next_row->intercept ());
702  if ((distance < 0
703  && next_index < line_index
704  && next_index > line_index + distance + distance)
705  || (distance >= 0
706  && next_index > line_index
707  && next_index < line_index + distance + distance)) {
708  if (testing_on) {
709  tprintf (" nearer neighbour (%d) at %g\n",
710  line_index + distance - next_index,
711  next_row->intercept ());
712  }
713  return TRUE; //other is nearer
714  }
715  else if (next_index == line_index
716  || next_index == line_index + distance + distance) {
717  if (row->believability () <= next_row->believability ()) {
718  if (testing_on) {
719  tprintf (" equal but more believable at %g (%g/%g)\n",
720  next_row->intercept (),
721  row->believability (),
722  next_row->believability ());
723  }
724  return TRUE; //other is more believable
725  }
726  }
727  row_offset += row_inc;
728  }
729  while ((next_index == line_index
730  || next_index == line_index + distance + distance)
731  && row_offset < row_it->length ());
732  if (testing_on)
733  tprintf (" keeping\n");
734  }
735  return FALSE;
736 }
#define TRUE
Definition: capi.h:45
SIGNED char inT8
Definition: host.h:98
int inT32
Definition: host.h:102
float believability() const
Definition: blobbox.h:581
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
#define FALSE
Definition: capi.h:46
float intercept() const
Definition: blobbox.h:584
void fit_lms_line ( TO_ROW row)

Definition at line 267 of file makerow.cpp.

267  {
268  float m, c; // fitted line
270  BLOBNBOX_IT blob_it = row->blob_list();
271 
272  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
273  const TBOX& box = blob_it.data()->bounding_box();
274  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
275  }
276  double error = lms.Fit(&m, &c);
277  row->set_line(m, c, error);
278 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
integer coordinate
Definition: points.h:30
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 2003 of file makerow.cpp.

2003  {
2004  float c; // fitted line
2005  int blobcount; // no of blobs
2007  BLOBNBOX_IT blob_it = row->blob_list();
2008 
2009  blobcount = 0;
2010  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
2011  if (!blob_it.data()->joined_to_prev()) {
2012  const TBOX& box = blob_it.data()->bounding_box();
2013  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
2014  blobcount++;
2015  }
2016  }
2017  double error = lms.ConstrainedFit(gradient, &c);
2018  row->set_parallel_line(gradient, c, error);
2020  error = lms.Fit(&gradient, &c);
2021  }
2022  //set the other too
2023  row->set_line(gradient, c, error);
2024 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
Definition: rect.h:30
bool textord_straight_baselines
Definition: makerow.cpp:52
inT16 right() const
Definition: rect.h:75
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:131
inT16 left() const
Definition: rect.h:68
int textord_lms_line_trials
Definition: makerow.cpp:101
integer coordinate
Definition: points.h:30
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:607
void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1961 of file makerow.cpp.

1967  {
1968 #ifndef GRAPHICS_DISABLED
1969  ScrollView::Color colour; //of row
1970 #endif
1971  TO_ROW_IT row_it = block->get_rows ();
1972 
1973  row_it.move_to_first ();
1974  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1975  if (row_it.data ()->blob_list ()->empty ())
1976  delete row_it.extract (); //nothing in it
1977  else
1978  fit_parallel_lms (gradient, row_it.data ());
1979  }
1980 #ifndef GRAPHICS_DISABLED
1981  if (testing_on) {
1982  colour = ScrollView::RED;
1983  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1984  plot_parallel_row (row_it.data (), gradient,
1985  block_edge, colour, rotation);
1986  colour = (ScrollView::Color) (colour + 1);
1987  if (colour > ScrollView::MAGENTA)
1988  colour = ScrollView::RED;
1989  }
1990  }
1991 #endif
1992  row_it.sort (row_y_order); //may have gone out of order
1993 }
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2626
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:2003
double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2218 of file makerow.cpp.

2223  {
2224  int blobcount; //no of blobs
2225  int blobindex; //current blob
2226  int index1, index2; //blob numbers
2227  int blobs_per_segment; //blobs in each
2228  TBOX box; //blob box
2229  TBOX new_box; //new_it box
2230  //blobs
2231  BLOBNBOX_IT blob_it = row->blob_list ();
2232  BLOBNBOX_IT new_it = blob_it; //front end
2233  float b, c; //fitted curve
2235  double *coeffs; //quadratic coeffs
2236  inT32 segment; //current segment
2237 
2238  box = box_next_pre_chopped (&blob_it);
2239  xstarts[0] = box.left ();
2240  blobcount = 1;
2241  while (!blob_it.at_first ()) {
2242  blobcount++;
2243  box = box_next_pre_chopped (&blob_it);
2244  }
2245  segments = blobcount / textord_spline_medianwin;
2246  if (segments < 1)
2247  segments = 1;
2248  blobs_per_segment = blobcount / segments;
2249  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
2250  if (textord_oldbl_debug)
2251  tprintf
2252  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2253  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2254  segment = 1;
2255  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2256  box_next_pre_chopped(&new_it);
2257  index1 = 0;
2258  blobindex = index2;
2259  do {
2260  blobindex += blobs_per_segment;
2261  lms.Clear();
2262  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2263  box = box_next_pre_chopped (&blob_it);
2264  int middle = (box.left() + box.right()) / 2;
2265  lms.Add(ICOORD(middle, box.bottom()));
2266  index1++;
2267  if (index1 == blobindex - blobs_per_segment / 2
2268  || index1 == blobcount - 1) {
2269  xstarts[segment] = box.left ();
2270  }
2271  }
2272  lms.Fit(&b, &c);
2273  coeffs[segment * 3 - 3] = 0;
2274  coeffs[segment * 3 - 2] = b;
2275  coeffs[segment * 3 - 1] = c;
2276  segment++;
2277  if (segment > segments)
2278  break;
2279 
2280  blobindex += blobs_per_segment;
2281  lms.Clear();
2282  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2283  new_box = box_next_pre_chopped (&new_it);
2284  int middle = (new_box.left() + new_box.right()) / 2;
2285  lms.Add(ICOORD (middle, new_box.bottom()));
2286  index2++;
2287  if (index2 == blobindex - blobs_per_segment / 2
2288  || index2 == blobcount - 1) {
2289  xstarts[segment] = new_box.left ();
2290  }
2291  }
2292  lms.Fit(&b, &c);
2293  coeffs[segment * 3 - 3] = 0;
2294  coeffs[segment * 3 - 2] = b;
2295  coeffs[segment * 3 - 1] = c;
2296  segment++;
2297  }
2298  while (segment <= segments);
2299  return coeffs;
2300 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
int textord_spline_medianwin
Definition: makerow.cpp:66
int inT32
Definition: host.h:102
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
#define tprintf(...)
Definition: tprintf.h:31
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:660
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
inT16 left() const
Definition: rect.h:68
integer coordinate
Definition: points.h:30
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2086 of file makerow.cpp.

2087  {
2088  inT32 *xstarts; // spline boundaries
2089  double *coeffs; // quadratic coeffs
2090  inT32 segments; // no of segments
2091 
2092  xstarts =
2093  (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
2094  if (segment_baseline(row, block, segments, xstarts)
2096  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2097  } else {
2098  xstarts[1] = xstarts[segments];
2099  segments = 1;
2100  coeffs = (double *) alloc_mem (3 * sizeof (double));
2101  coeffs[0] = 0;
2102  coeffs[1] = row->line_m ();
2103  coeffs[2] = row->line_c ();
2104  }
2105  row->baseline = QSPLINE (segments, xstarts, coeffs);
2106  free_mem(coeffs);
2107  free_mem(xstarts);
2108 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
int inT32
Definition: host.h:102
BOOL8 segment_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2119
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2218
bool textord_straight_baselines
Definition: makerow.cpp:52
QSPLINE baseline
Definition: blobbox.h:666
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
float line_m() const
Definition: blobbox.h:566
bool textord_parallel_baselines
Definition: makerow.cpp:51
float line_c() const
Definition: blobbox.h:569
void free_mem(void *oldchunk)
Definition: memry.cpp:55
void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 227 of file makerow.cpp.

232  {
233  TO_ROW_IT row_it = block->get_rows ();
234 
235 #ifndef GRAPHICS_DISABLED
236  ScrollView::Color colour; //of row
237 
238  if (textord_show_initial_rows && testing_on) {
239  if (to_win == NULL)
240  create_to_win(page_tr);
241  }
242 #endif
243  //guess skew
244  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
245  row_it.move_to_first ();
246  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247  fit_lms_line (row_it.data ());
248 #ifndef GRAPHICS_DISABLED
249  if (textord_show_initial_rows && testing_on) {
250  colour = ScrollView::RED;
251  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252  plot_to_row (row_it.data (), colour, rotation);
253  colour = (ScrollView::Color) (colour + 1);
254  if (colour > ScrollView::MAGENTA)
255  colour = ScrollView::RED;
256  }
257  }
258 #endif
259 }
#define TRUE
Definition: capi.h:45
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:91
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2309
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
bool textord_show_initial_rows
Definition: makerow.cpp:45
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 201 of file makerow.cpp.

201  {
202  float port_m; // global skew
203  float port_err; // global noise
204  TO_BLOCK_IT block_it; // iterator
205 
206  block_it.set_to_list(port_blocks);
207  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
208  block_it.forward())
209  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
211  // compute globally
212  compute_page_skew(port_blocks, port_m, port_err);
213  block_it.set_to_list(port_blocks);
214  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
216  block_it.data()->block->bounding_box().left(),
218  }
219  return port_m; // global skew
220 }
bool textord_test_landscape
Definition: makerow.cpp:50
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:227
Definition: points.h:189
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
unsigned char BOOL8
Definition: host.h:113
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:524
float make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 164 of file makerow.cpp.

165  {
166  BLOBNBOX_IT blob_it = &block->blobs;
167  TO_ROW_IT row_it = block->get_rows();
168 
169  // Include all the small blobs and large blobs.
170  blob_it.add_list_after(&block->small_blobs);
171  blob_it.add_list_after(&block->noise_blobs);
172  blob_it.add_list_after(&block->large_blobs);
173  if (block->blobs.singleton() && allow_sub_blobs) {
174  blob_it.move_to_first();
175  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
176  if (size > block->line_size)
177  block->line_size = size;
178  } else if (block->blobs.empty()) {
179  // Make a fake blob.
180  C_BLOB* blob = C_BLOB::FakeBlob(block->block->bounding_box());
181  // The blobnbox owns the blob.
182  BLOBNBOX* bblob = new BLOBNBOX(blob);
183  blob_it.add_after_then_move(bblob);
184  }
185  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
186  // Fit an LMS line to the rows.
187  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
188  fit_lms_line(row_it.data());
189  float gradient;
190  float fit_error;
191  // Compute the skew based on the fitted line.
192  compute_page_skew(blocks, gradient, fit_error);
193  return gradient;
194 }
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
BLOCK * block
Definition: blobbox.h:773
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float MakeRowFromSubBlobs(TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
Definition: makerow.cpp:137
float MakeRowFromSubBlobs ( TO_BLOCK block,
C_BLOB blob,
TO_ROW_IT *  row_it 
)

Definition at line 137 of file makerow.cpp.

137  {
138  // The blobs made from the children will go in the small_blobs list.
139  BLOBNBOX_IT bb_it(&block->small_blobs);
140  C_OUTLINE_IT ol_it(blob->out_list());
141  // Get the children.
142  ol_it.set_to_list(ol_it.data()->child());
143  if (ol_it.empty())
144  return 0.0f;
145  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
146  // Deep copy the child outline and use that to make a blob.
147  C_BLOB* blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data()));
148  // Correct direction as needed.
150  BLOBNBOX* bbox = new BLOBNBOX(blob);
151  bb_it.add_after_then_move(bbox);
152  }
153  // Now we can make a row from the blobs.
154  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
155 }
void CheckInverseFlagAndDirection()
Definition: stepblob.cpp:221
static C_OUTLINE * deep_copy(const C_OUTLINE *src)
Definition: coutln.h:259
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
float line_size
Definition: blobbox.h:781
void mark_repeated_chars ( TO_ROW row)

Definition at line 2670 of file makerow.cpp.

2670  {
2671  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2672  int num_repeated_sets = 0;
2673  if (!box_it.empty()) {
2674  do {
2675  BLOBNBOX* bblob = box_it.data();
2676  int repeat_length = 1;
2677  if (bblob->flow() == BTFT_LEADER &&
2678  !bblob->joined_to_prev() && bblob->cblob() != NULL) {
2679  BLOBNBOX_IT test_it(box_it);
2680  for (test_it.forward(); !test_it.at_first();) {
2681  bblob = test_it.data();
2682  if (bblob->flow() != BTFT_LEADER)
2683  break;
2684  test_it.forward();
2685  bblob = test_it.data();
2686  if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
2687  repeat_length = 0;
2688  break;
2689  }
2690  ++repeat_length;
2691  }
2692  }
2693  if (repeat_length >= kMinLeaderCount) {
2694  num_repeated_sets++;
2695  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2696  bblob = box_it.data();
2697  bblob->set_repeated_set(num_repeated_sets);
2698  }
2699  } else {
2700  bblob->set_repeated_set(0);
2701  box_it.forward();
2702  }
2703  } while (!box_it.at_first()); // until all done
2704  }
2705  row->set_num_repeated_sets(num_repeated_sets);
2706 }
void set_repeated_set(int set_id)
Definition: blobbox.h:250
bool joined_to_prev() const
Definition: blobbox.h:241
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:636
const int kMinLeaderCount
Definition: makerow.cpp:107
BlobTextFlowType flow() const
Definition: blobbox.h:280
C_BLOB * cblob() const
Definition: blobbox.h:253
OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2509 of file makerow.cpp.

2516  {
2517  OVERLAP_STATE result; //result of tests
2518  float overlap; //of blob & row
2519  float bestover; //nearest row
2520  float merge_top, merge_bottom; //size of merged row
2521  ICOORD testpt; //testing only
2522  TO_ROW *row; //current row
2523  TO_ROW *test_row; //for multiple overlaps
2524  BLOBNBOX_IT blob_it; //for merging rows
2525 
2526  result = ASSIGN;
2527  row = row_it->data ();
2528  bestover = top - bottom;
2529  if (top > row->max_y ())
2530  bestover -= top - row->max_y ();
2531  if (bottom < row->min_y ())
2532  //compute overlap
2533  bestover -= row->min_y () - bottom;
2534  if (testing_blob && textord_debug_blob) {
2535  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2536  bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
2537  }
2538  test_row = row;
2539  do {
2540  if (!row_it->at_last ()) {
2541  row_it->forward ();
2542  test_row = row_it->data ();
2543  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2544  merge_top =
2545  test_row->max_y () >
2546  row->max_y ()? test_row->max_y () : row->max_y ();
2547  merge_bottom =
2548  test_row->min_y () <
2549  row->min_y ()? test_row->min_y () : row->min_y ();
2550  if (merge_top - merge_bottom <= rowsize) {
2551  if (testing_blob) {
2552  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2553  row->min_y (), row->max_y (),
2554  test_row->min_y (), test_row->max_y ());
2555  }
2556  test_row->set_limits (merge_bottom, merge_top);
2557  blob_it.set_to_list (test_row->blob_list ());
2558  blob_it.add_list_after (row->blob_list ());
2559  blob_it.sort (blob_x_order);
2560  row_it->backward ();
2561  delete row_it->extract ();
2562  row_it->forward ();
2563  bestover = -1.0f; //force replacement
2564  }
2565  overlap = top - bottom;
2566  if (top > test_row->max_y ())
2567  overlap -= top - test_row->max_y ();
2568  if (bottom < test_row->min_y ())
2569  overlap -= test_row->min_y () - bottom;
2570  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2571  result = REJECT;
2572  }
2573  if (overlap > bestover) {
2574  bestover = overlap; //find biggest overlap
2575  row = test_row;
2576  }
2577  if (testing_blob && textord_debug_blob) {
2578  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2579  bottom, top, test_row->min_y(), test_row->max_y(),
2580  rowsize, overlap, bestover);
2581  }
2582  }
2583  }
2584  }
2585  while (!row_it->at_last ()
2586  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2587  while (row_it->data () != row)
2588  row_it->backward (); //make it point to row
2589  //doesn't overlap much
2590  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2591  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2592  && result == ASSIGN)
2593  result = NEW_ROW; //doesn't overlap enough
2594  best_row = row;
2595  return result;
2596 }
bool textord_debug_blob
Definition: makerow.cpp:103
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
OVERLAP_STATE
Definition: makerow.h:29
float min_y() const
Definition: blobbox.h:557
bool textord_fix_makerow_bug
Definition: makerow.cpp:56
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2604
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float max_y() const
Definition: blobbox.h:554
#define tprintf(...)
Definition: tprintf.h:31
integer coordinate
Definition: points.h:30
Definition: makerow.h:31
Definition: makerow.h:32
double textord_overlap_x
Definition: makerow.cpp:81
void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1875 of file makerow.cpp.

1880  {
1881 #ifndef GRAPHICS_DISABLED
1882  ScrollView::Color colour; //of boxes
1883 #endif
1884  BLOBNBOX *blob; //current blob
1885  BLOBNBOX *nextblob; //next in list
1886  TBOX blob_box;
1887  FCOORD blob_rotation; //inverse of rotation
1888  BLOBNBOX_IT blob_it; //iterator
1889  BLOBNBOX_IT start_it; //iterator
1890  TO_ROW_IT row_it = block->get_rows ();
1891 
1892 #ifndef GRAPHICS_DISABLED
1893  colour = ScrollView::RED;
1894 #endif
1895 
1896  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1897  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1898  //get blobs
1899  blob_it.set_to_list (row_it.data ()->blob_list ());
1900  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1901  blob_it.forward ()) {
1902  blob = blob_it.data ();
1903  blob_box = blob->bounding_box ();
1904  start_it = blob_it; //save start point
1905  // if (testing_on && textord_show_final_blobs)
1906  // {
1907  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1908  // blob_box.left(),blob_box.bottom(),
1909  // blob_box.right(),blob_box.top(),
1910  // (void*)blob,blob_it.length());
1911  // }
1912  bool overlap;
1913  do {
1914  overlap = false;
1915  if (!blob_it.at_last ()) {
1916  nextblob = blob_it.data_relative(1);
1917  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1918  if (overlap) {
1919  blob->merge(nextblob); // merge new blob
1920  blob_box = blob->bounding_box(); // get bigger box
1921  blob_it.forward();
1922  }
1923  }
1924  }
1925  while (overlap);
1926  blob->chop (&start_it, &blob_it,
1927  blob_rotation,
1930  //attempt chop
1931  }
1932 #ifndef GRAPHICS_DISABLED
1933  if (testing_on && textord_show_final_blobs) {
1934  if (to_win == NULL)
1935  create_to_win(page_tr);
1936  to_win->Pen(colour);
1937  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1938  blob_it.forward ()) {
1939  blob = blob_it.data ();
1940  blob_box = blob->bounding_box ();
1941  blob_box.rotate (rotation);
1942  if (!blob->joined_to_prev ()) {
1943  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
1944  blob_box.right (), blob_box.top ());
1945  }
1946  }
1947  colour = (ScrollView::Color) (colour + 1);
1948  if (colour > ScrollView::MAGENTA)
1949  colour = ScrollView::RED;
1950  }
1951 #endif
1952  }
1953 }
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:87
bool joined_to_prev() const
Definition: blobbox.h:241
void rotate(const FCOORD &vec)
Definition: rect.h:189
inT16 bottom() const
Definition: rect.h:61
bool textord_show_final_blobs
Definition: makerow.cpp:49
Definition: points.h:189
void Pen(Color color)
Definition: scrollview.cpp:726
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 right() const
Definition: rect.h:75
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
inT16 left() const
Definition: rect.h:68
float y() const
Definition: points.h:212
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float x() const
Definition: points.h:209
inT16 top() const
Definition: rect.h:54
static const double kXHeightFraction
Definition: ccstruct.h:34
double textord_chop_width
Definition: makerow.cpp:78
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:115
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2648 of file makerow.cpp.

2650  {
2651  //converted ptr
2652  TO_ROW *row1 = *(TO_ROW **) item1;
2653  //converted ptr
2654  TO_ROW *row2 = *(TO_ROW **) item2;
2655 
2656  if (row1->spacing < row2->spacing)
2657  return -1;
2658  else if (row1->spacing > row2->spacing)
2659  return 1;
2660  else
2661  return 0;
2662 }
float spacing
Definition: blobbox.h:652
int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2626 of file makerow.cpp.

2628  {
2629  //converted ptr
2630  TO_ROW *row1 = *(TO_ROW **) item1;
2631  //converted ptr
2632  TO_ROW *row2 = *(TO_ROW **) item2;
2633 
2634  if (row1->parallel_c () > row2->parallel_c ())
2635  return -1;
2636  else if (row1->parallel_c () < row2->parallel_c ())
2637  return 1;
2638  else
2639  return 0;
2640 }
float parallel_c() const
Definition: blobbox.h:575
BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2119 of file makerow.cpp.

2124  {
2125  BOOL8 needs_curve; //needs curved line
2126  int blobcount; //no of blobs
2127  int blobindex; //current blob
2128  int last_state; //above, on , below
2129  int state; //of current blob
2130  float yshift; //from baseline
2131  TBOX box; //blob box
2132  TBOX new_box; //new_it box
2133  float middle; //xcentre of blob
2134  //blobs
2135  BLOBNBOX_IT blob_it = row->blob_list ();
2136  BLOBNBOX_IT new_it = blob_it; //front end
2137  SORTED_FLOATS yshifts; //shifts from baseline
2138 
2139  needs_curve = FALSE;
2140  box = box_next_pre_chopped (&blob_it);
2141  xstarts[0] = box.left ();
2142  segments = 1;
2143  blobcount = row->blob_list ()->length ();
2144  if (textord_oldbl_debug)
2145  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2146  blobcount, box.left (), box.bottom ());
2147  if (blobcount <= textord_spline_medianwin
2148  || blobcount < textord_spline_minblobs) {
2149  blob_it.move_to_last ();
2150  box = blob_it.data ()->bounding_box ();
2151  xstarts[1] = box.right ();
2152  return FALSE;
2153  }
2154  last_state = 0;
2155  new_it.mark_cycle_pt ();
2156  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2157  new_box = box_next_pre_chopped (&new_it);
2158  middle = (new_box.left () + new_box.right ()) / 2.0;
2159  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2160  //record shift
2161  yshifts.add (yshift, blobindex);
2162  if (new_it.cycled_list ()) {
2163  xstarts[1] = new_box.right ();
2164  return FALSE;
2165  }
2166  }
2167  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2168  box = box_next_pre_chopped (&blob_it);
2169  do {
2170  new_box = box_next_pre_chopped (&new_it);
2171  //get middle one
2172  yshift = yshifts[textord_spline_medianwin / 2];
2173  if (yshift > textord_spline_shift_fraction * block->line_size)
2174  state = 1;
2175  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2176  state = -1;
2177  else
2178  state = 0;
2179  if (state != 0)
2180  needs_curve = TRUE;
2181  // tprintf("State=%d, prev=%d, shift=%g\n",
2182  // state,last_state,yshift);
2183  if (state != last_state && blobcount > textord_spline_minblobs) {
2184  xstarts[segments++] = box.left ();
2185  blobcount = 0;
2186  }
2187  last_state = state;
2188  yshifts.remove (blobindex - textord_spline_medianwin);
2189  box = box_next_pre_chopped (&blob_it);
2190  middle = (new_box.left () + new_box.right ()) / 2.0;
2191  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2192  yshifts.add (yshift, blobindex);
2193  blobindex++;
2194  blobcount++;
2195  }
2196  while (!new_it.cycled_list ());
2197  if (blobcount > textord_spline_minblobs || segments == 1) {
2198  xstarts[segments] = new_box.right ();
2199  }
2200  else {
2201  xstarts[--segments] = new_box.right ();
2202  }
2203  if (textord_oldbl_debug)
2204  tprintf ("Made %d segments on row at (%d,%d)\n",
2205  segments, box.right (), box.bottom ());
2206  return needs_curve;
2207 }
#define TRUE
Definition: capi.h:45
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
int textord_spline_medianwin
Definition: makerow.cpp:66
#define tprintf(...)
Definition: tprintf.h:31
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:660
Definition: rect.h:30
#define FALSE
Definition: capi.h:46
inT16 right() const
Definition: rect.h:75
inT16 left() const
Definition: rect.h:68
int textord_spline_minblobs
Definition: makerow.cpp:65
void remove(inT32 key)
Definition: sortflts.cpp:53
float line_m() const
Definition: blobbox.h:566
float line_size
Definition: blobbox.h:781
float line_c() const
Definition: blobbox.h:569
unsigned char BOOL8
Definition: host.h:113
void add(float value, inT32 key)
Definition: sortflts.cpp:28
double textord_spline_shift_fraction
Definition: makerow.cpp:71
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1802 of file makerow.cpp.

1805  { // correct orientation
1806  BLOBNBOX *blob; // current blob
1807  C_BLOB *rotated_blob; // rotated blob
1808  TO_ROW *row; // current row
1809  float length; // of g_vec
1810  TBOX blob_box;
1811  FCOORD blob_rotation; // inverse of rotation
1812  FCOORD g_vec; // skew rotation
1813  BLOBNBOX_IT blob_it; // iterator
1814  // iterator
1815  BLOBNBOX_IT under_it = &block->underlines;
1816  BLOBNBOX_IT large_it = &block->large_blobs;
1817  TO_ROW_IT row_it = block->get_rows();
1818  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1819  block->line_size + 0.5);
1820 
1821  // length of vector
1822  length = sqrt(1 + gradient * gradient);
1823  g_vec = FCOORD(1 / length, -gradient / length);
1824  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1825  blob_rotation.rotate(g_vec); // undoing everything
1826  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1827  row = row_it.data();
1828  // get blobs
1829  blob_it.set_to_list(row->blob_list());
1830  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1831  blob_it.forward()) {
1832  blob = blob_it.data();
1833  blob_box = blob->bounding_box();
1834  if (blob_box.width() > block->line_size * textord_underline_width) {
1835  ASSERT_HOST(blob->cblob() != NULL);
1836  rotated_blob = crotate_cblob (blob->cblob(),
1837  blob_rotation);
1838  if (test_underline(
1839  testing_on && textord_show_final_rows,
1840  rotated_blob, static_cast<inT16>(row->intercept()),
1841  static_cast<inT16>(
1842  block->line_size *
1845  under_it.add_after_then_move(blob_it.extract());
1846  if (testing_on && textord_show_final_rows) {
1847  tprintf("Underlined blob at:");
1848  rotated_blob->bounding_box().print();
1849  tprintf("Was:");
1850  blob_box.print();
1851  }
1852  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1853  row->blob_list()) >
1855  large_it.add_after_then_move(blob_it.extract());
1856  if (testing_on && textord_show_final_rows) {
1857  tprintf("Large blob overlaps %d blobs at:",
1858  CountOverlaps(blob_box, min_blob_height,
1859  row->blob_list()));
1860  blob_box.print();
1861  }
1862  }
1863  delete rotated_blob;
1864  }
1865  }
1866  }
1867 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
Definition: points.h:189
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BOOL8 test_underline(BOOL8 testing_on, C_BLOB *blob, inT16 baseline, inT16 xheight)
Definition: blkocc.cpp:53
static const double kAscenderFraction
Definition: ccstruct.h:35
int textord_max_blob_overlaps
Definition: makerow.cpp:68
#define tprintf(...)
Definition: tprintf.h:31
void rotate(const FCOORD vec)
Definition: ipoints.h:471
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
double textord_underline_width
Definition: makerow.cpp:87
float y() const
Definition: points.h:212
void print() const
Definition: rect.h:270
float line_size
Definition: blobbox.h:781
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
float intercept() const
Definition: blobbox.h:584
float x() const
Definition: points.h:209
bool textord_show_final_rows
Definition: makerow.cpp:48
static const double kXHeightFraction
Definition: ccstruct.h:34
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:606
#define ASSERT_HOST(x)
Definition: errcode.h:84
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
inT16 width() const
Definition: rect.h:111
C_BLOB * cblob() const
Definition: blobbox.h:253
void vigorous_noise_removal ( TO_BLOCK block)

Definition at line 473 of file makerow.cpp.

473  {
474  TO_ROW_IT row_it = block->get_rows ();
475  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
476  TO_ROW* row = row_it.data();
477  BLOBNBOX_IT b_it = row->blob_list();
478  // Estimate the xheight on the row.
479  int max_height = 0;
480  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
481  BLOBNBOX* blob = b_it.data();
482  if (blob->bounding_box().height() > max_height)
483  max_height = blob->bounding_box().height();
484  }
485  STATS hstats(0, max_height + 1);
486  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
487  BLOBNBOX* blob = b_it.data();
488  int height = blob->bounding_box().height();
489  if (height >= kMinSize)
490  hstats.add(blob->bounding_box().height(), 1);
491  }
492  float xheight = hstats.median();
493  // Delete small objects.
494  BLOBNBOX* prev = NULL;
495  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
496  BLOBNBOX* blob = b_it.data();
497  const TBOX& box = blob->bounding_box();
498  if (box.height() < kNoiseSize * xheight) {
499  // Small so delete unless it looks like an i dot.
500  if (prev != NULL) {
501  if (dot_of_i(blob, prev, row))
502  continue; // Looks OK.
503  }
504  if (!b_it.at_last()) {
505  BLOBNBOX* next = b_it.data_relative(1);
506  if (dot_of_i(blob, next, row))
507  continue; // Looks OK.
508  }
509  // It might be noise so get rid of it.
510  delete blob->cblob();
511  delete b_it.extract();
512  } else {
513  prev = blob;
514  }
515  }
516  }
517 }
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
const int kMinSize
Definition: makerow.cpp:384
inT16 height() const
Definition: rect.h:104
Definition: rect.h:30
const TBOX & bounding_box() const
Definition: blobbox.h:215
Definition: statistc.h:33
const double kNoiseSize
Definition: makerow.cpp:383
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
C_BLOB * cblob() const
Definition: blobbox.h:253

Variable Documentation

const int kMinLeaderCount = 5

Definition at line 107 of file makerow.cpp.

const int kMinSize = 8

Definition at line 384 of file makerow.cpp.

const double kNoiseSize = 0.5

Definition at line 383 of file makerow.cpp.

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 93 of file makerow.cpp.

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 97 of file makerow.cpp.

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 96 of file makerow.cpp.

bool textord_biased_skewcalc = TRUE

"Bias skew estimates with line length"

Definition at line 58 of file makerow.cpp.

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 78 of file makerow.cpp.

bool textord_debug_blob = FALSE

"Print test blob information"

Definition at line 103 of file makerow.cpp.

bool textord_debug_xheights = FALSE

"Test xheight algorithms"

Definition at line 57 of file makerow.cpp.

double textord_descheight_mode_fraction = 0.08

"Min pile height to make descheight"

Definition at line 95 of file makerow.cpp.

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 99 of file makerow.cpp.

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 98 of file makerow.cpp.

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 85 of file makerow.cpp.

double textord_expansion_factor = 1.0

"Factor to expand rows by in expand_rows"

Definition at line 80 of file makerow.cpp.

bool textord_fix_makerow_bug = TRUE

"Prevent multiple baselines"

Definition at line 56 of file makerow.cpp.

bool textord_fix_xheight_bug = TRUE

"Use spline baseline"

Definition at line 55 of file makerow.cpp.

bool textord_heavy_nr = FALSE

"Vigorously remove noise"

Definition at line 44 of file makerow.cpp.

bool textord_interpolating_skew = TRUE

"Interpolate across gaps"

Definition at line 59 of file makerow.cpp.

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 76 of file makerow.cpp.

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 101 of file makerow.cpp.

int textord_max_blob_overlaps = 4

"Max number of blobs a big blob can overlap"

Definition at line 68 of file makerow.cpp.

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 89 of file makerow.cpp.

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 64 of file makerow.cpp.

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 83 of file makerow.cpp.

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 69 of file makerow.cpp.

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 82 of file makerow.cpp.

bool textord_new_initial_xheight = TRUE

"Use test xheight mechanism"

Definition at line 102 of file makerow.cpp.

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 86 of file makerow.cpp.

bool textord_old_baselines = TRUE

"Use old baseline algorithm"

Definition at line 53 of file makerow.cpp.

bool textord_old_xheight = FALSE

"Use old xheight algorithm"

Definition at line 54 of file makerow.cpp.

double textord_overlap_x = 0.375

"Fraction of linespace for good overlap"

Definition at line 81 of file makerow.cpp.

bool textord_parallel_baselines = TRUE

"Force parallel baselines"

Definition at line 51 of file makerow.cpp.

bool textord_show_expanded_rows = FALSE

"Display rows after expanding"

Definition at line 47 of file makerow.cpp.

bool textord_show_final_blobs = FALSE

"Display blob bounds after pre-ass"

Definition at line 49 of file makerow.cpp.

bool textord_show_final_rows = FALSE

"Display rows after final fitting"

Definition at line 48 of file makerow.cpp.

bool textord_show_initial_rows = FALSE

"Display row accumulation"

Definition at line 45 of file makerow.cpp.

bool textord_show_parallel_rows = FALSE

"Display page correlated rows"

Definition at line 46 of file makerow.cpp.

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 74 of file makerow.cpp.

double textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 75 of file makerow.cpp.

int textord_skewsmooth_offset = 4

"For smooth factor"

Definition at line 60 of file makerow.cpp.

int textord_skewsmooth_offset2 = 1

"For smooth factor"

Definition at line 61 of file makerow.cpp.

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 66 of file makerow.cpp.

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 65 of file makerow.cpp.

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 73 of file makerow.cpp.

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 71 of file makerow.cpp.

bool textord_straight_baselines = FALSE

"Force straight baselines"

Definition at line 52 of file makerow.cpp.

bool textord_test_landscape = FALSE

"Tests refer to land/port"

Definition at line 50 of file makerow.cpp.

int textord_test_x = -MAX_INT32

"coord of test pt"

Definition at line 62 of file makerow.cpp.

int textord_test_y = -MAX_INT32

"coord of test pt"

Definition at line 63 of file makerow.cpp.

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 87 of file makerow.cpp.

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 77 of file makerow.cpp.

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 100 of file makerow.cpp.

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 91 of file makerow.cpp.