#include <cube_utils.h>
|
| static int | Prob2Cost (double prob_val) |
| |
| static double | Cost2Prob (int cost) |
| |
| static int | StrLen (const char_32 *str) |
| |
| static int | StrCmp (const char_32 *str1, const char_32 *str2) |
| |
| static char_32 * | StrDup (const char_32 *str) |
| |
| static CharSamp * | CharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt) |
| |
| static Pix * | PixFromCharSample (CharSamp *char_samp) |
| |
| static bool | ReadFileToString (const string &file_name, string *str) |
| |
| static void | SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec) |
| |
| static void | UTF8ToUTF32 (const char *utf8_str, string_32 *str32) |
| |
| static void | UTF32ToUTF8 (const char_32 *utf32_str, string *str) |
| |
| static bool | IsCaseInvariant (const char_32 *str32, CharSet *char_set) |
| |
| static char_32 * | ToLower (const char_32 *str32, CharSet *char_set) |
| |
| static char_32 * | ToUpper (const char_32 *str32, CharSet *char_set) |
| |
Definition at line 35 of file cube_utils.h.
| tesseract::CubeUtils::CubeUtils |
( |
| ) |
|
| tesseract::CubeUtils::~CubeUtils |
( |
| ) |
|
| CharSamp * tesseract::CubeUtils::CharSampleFromPix |
( |
Pix * |
pix, |
|
|
int |
left, |
|
|
int |
top, |
|
|
int |
wid, |
|
|
int |
hgt |
|
) |
| |
|
static |
creates a char samp from a specified portion of the image
Definition at line 104 of file cube_utils.cpp.
107 unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
108 if (temp_buff == NULL) {
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
| double tesseract::CubeUtils::Cost2Prob |
( |
int |
cost | ) |
|
|
static |
| bool tesseract::CubeUtils::IsCaseInvariant |
( |
const char_32 * |
str32, |
|
|
CharSet * |
char_set |
|
) |
| |
|
static |
Definition at line 294 of file cube_utils.cpp.
295 bool all_one_case =
true;
308 first_upper = isupper(str32[0]);
309 first_lower = islower(str32[0]);
312 prev_upper = first_upper;
313 prev_lower = first_lower;
314 for (
int c = 1; str32[c] != 0; ++c) {
315 cur_upper = isupper(str32[c]);
316 cur_lower = islower(str32[c]);
317 if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
318 all_one_case =
false;
321 prev_upper = cur_upper;
322 prev_lower = cur_lower;
325 UNICHARSET *unicharset = char_set->InternalUnicharset();
327 first_upper = unicharset->
get_isupper(char_set->ClassID(str32[0]));
328 first_lower = unicharset->
get_islower(char_set->ClassID(str32[0]));
331 prev_upper = first_upper;
332 prev_lower = first_lower;
334 for (
int c = 1; c <
StrLen(str32); ++c) {
335 cur_upper = unicharset->
get_isupper(char_set->ClassID(str32[c]));
336 cur_lower = unicharset->
get_islower(char_set->ClassID(str32[c]));
337 if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
338 all_one_case =
false;
341 prev_upper = cur_upper;
342 prev_lower = cur_lower;
345 return all_one_case || capitalized;
bool get_islower(UNICHAR_ID unichar_id) const
static int StrLen(const char_32 *str)
bool get_isupper(UNICHAR_ID unichar_id) const
| Pix * tesseract::CubeUtils::PixFromCharSample |
( |
CharSamp * |
char_samp | ) |
|
|
static |
create a B/W image from a char_sample
Definition at line 123 of file cube_utils.cpp.
125 if (char_samp == NULL) {
130 int stride = char_samp->Stride();
131 int wid = char_samp->Width();
132 int hgt = char_samp->Height();
134 Pix *pix = pixCreate(wid, hgt, 1);
140 unsigned char *line = char_samp->RawData();
141 for (
int y = 0; y < hgt ; y++, line += stride) {
142 for (
int x = 0; x < wid; x++) {
144 pixSetPixel(pix, x, y, 0);
146 pixSetPixel(pix, x, y, 255);
| int tesseract::CubeUtils::Prob2Cost |
( |
double |
prob_val | ) |
|
|
static |
convert a prob to a cost (-ve log prob)
Definition at line 37 of file cube_utils.cpp.
| bool tesseract::CubeUtils::ReadFileToString |
( |
const string & |
file_name, |
|
|
string * |
str |
|
) |
| |
|
static |
read file contents to a string
Definition at line 195 of file cube_utils.cpp.
197 FILE *fp = fopen(file_name.c_str(),
"rb");
203 fseek(fp, 0, SEEK_END);
204 int file_size = ftell(fp);
210 str->reserve(file_size);
213 char *buff =
new char[file_size];
218 int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
219 if (read_bytes == file_size) {
220 str->append(buff, file_size);
224 return (read_bytes == file_size);
| void tesseract::CubeUtils::SplitStringUsing |
( |
const string & |
str, |
|
|
const string & |
delims, |
|
|
vector< string > * |
str_vec |
|
) |
| |
|
static |
splits a string into vectors based on specified delimiters
Definition at line 230 of file cube_utils.cpp.
234 if (delims[0] !=
'\0' && delims[1] ==
'\0') {
236 const char* p = str.data();
237 const char* end = p + str.size();
242 const char* start = p;
243 while (++p != end && *p != c);
244 str_vec->push_back(
string(start, p - start));
250 string::size_type begin_index, end_index;
251 begin_index = str.find_first_not_of(delims);
252 while (begin_index != string::npos) {
253 end_index = str.find_first_of(delims, begin_index);
254 if (end_index == string::npos) {
255 str_vec->push_back(str.substr(begin_index));
258 str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
259 begin_index = str.find_first_not_of(delims, end_index);
| int tesseract::CubeUtils::StrCmp |
( |
const char_32 * |
str1, |
|
|
const char_32 * |
str2 |
|
) |
| |
|
static |
compares two char_32 strings
Definition at line 66 of file cube_utils.cpp.
70 for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
71 if ((*pch1) != (*pch2)) {
72 return (*pch1) - (*pch2);
Duplicates a 32-bit char buffer
Definition at line 90 of file cube_utils.cpp.
93 if (new_str == NULL) {
96 memcpy(new_str, str32, len *
sizeof(*str32));
static int StrLen(const char_32 *str)
| int tesseract::CubeUtils::StrLen |
( |
const char_32 * |
char_32_ptr | ) |
|
|
static |
computes the length of a NULL terminated char_32 string
Definition at line 54 of file cube_utils.cpp.
55 if (char_32_ptr == NULL) {
59 while (char_32_ptr[++len]);
Definition at line 348 of file cube_utils.cpp.
352 UNICHARSET *unicharset = char_set->InternalUnicharset();
357 for (
int i = 0; i < len; ++i) {
359 if (ch == INVALID_UNICHAR_ID) {
364 if (unicharset->
get_isupper(char_set->ClassID(ch))) {
366 const char_32 *str32_lower = char_set->ClassString(uid_lower);
368 if (!str32_lower ||
StrLen(str32_lower) != 1) {
372 lower[i] = str32_lower[0];
static int StrLen(const char_32 *str)
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
bool get_isupper(UNICHAR_ID unichar_id) const
Definition at line 381 of file cube_utils.cpp.
385 UNICHARSET *unicharset = char_set->InternalUnicharset();
390 for (
int i = 0; i < len; ++i) {
392 if (ch == INVALID_UNICHAR_ID) {
397 if (unicharset->
get_islower(char_set->ClassID(ch))) {
399 const char_32 *str32_upper = char_set->ClassString(uid_upper);
401 if (!str32_upper ||
StrLen(str32_upper) != 1) {
405 upper[i] = str32_upper[0];
bool get_islower(UNICHAR_ID unichar_id) const
static int StrLen(const char_32 *str)
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
| void tesseract::CubeUtils::UTF32ToUTF8 |
( |
const char_32 * |
utf32_str, |
|
|
string * |
str |
|
) |
| |
|
static |
UTF-32 to UTF-8 conversion functions
Definition at line 282 of file cube_utils.cpp.
284 for (
const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
286 char *utf8 = uni_ch.utf8_str();
| void tesseract::CubeUtils::UTF8ToUTF32 |
( |
const char * |
utf8_str, |
|
|
string_32 * |
str32 |
|
) |
| |
|
static |
UTF-8 to UTF-32 conversion functions
Definition at line 266 of file cube_utils.cpp.
268 int len = strlen(utf8_str);
270 for (
int ch = 0; ch < len; ch += step) {
273 UNICHAR uni_ch(utf8_str + ch, step);
274 (*str32) += uni_ch.first_uni();
static int utf8_step(const char *utf8_str)
The documentation for this class was generated from the following files: