tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
renderer.cpp
Go to the documentation of this file.
1 // File: renderer.cpp
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 #include <string.h>
23 #include "baseapi.h"
24 #include "genericvector.h"
25 #include "renderer.h"
26 
27 namespace tesseract {
28 
29 /**********************************************************************
30  * Base Renderer interface implementation
31  **********************************************************************/
33  const char* extension)
34  : file_extension_(extension),
35  title_(""), imagenum_(-1),
36  fout_(stdout),
37  next_(NULL),
38  happy_(true) {
39  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
40  STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
41  fout_ = fopen(outfile.string(), "wb");
42  if (fout_ == NULL) {
43  happy_ = false;
44  }
45  }
46 }
47 
49  if (fout_ != stdout)
50  fclose(fout_);
51  else
52  clearerr(fout_);
53  delete next_;
54 }
55 
57  if (next == NULL) return;
58 
59  TessResultRenderer* remainder = next_;
60  next_ = next;
61  if (remainder) {
62  while (next->next_ != NULL) {
63  next = next->next_;
64  }
65  next->next_ = remainder;
66  }
67 }
68 
69 bool TessResultRenderer::BeginDocument(const char* title) {
70  if (!happy_) return false;
71  title_ = title;
72  imagenum_ = -1;
73  bool ok = BeginDocumentHandler();
74  if (next_) {
75  ok = next_->BeginDocument(title) && ok;
76  }
77  return ok;
78 }
79 
81  if (!happy_) return false;
82  ++imagenum_;
83  bool ok = AddImageHandler(api);
84  if (next_) {
85  ok = next_->AddImage(api) && ok;
86  }
87  return ok;
88 }
89 
91  if (!happy_) return false;
92  bool ok = EndDocumentHandler();
93  if (next_) {
94  ok = next_->EndDocument() && ok;
95  }
96  return ok;
97 }
98 
99 void TessResultRenderer::AppendString(const char* s) {
100  AppendData(s, strlen(s));
101 }
102 
103 void TessResultRenderer::AppendData(const char* s, int len) {
104  int n = fwrite(s, 1, len, fout_);
105  if (n != len) happy_ = false;
106 }
107 
109  return happy_;
110 }
111 
113  return happy_;
114 }
115 
116 
117 /**********************************************************************
118  * UTF8 Text Renderer interface implementation
119  **********************************************************************/
120 TessTextRenderer::TessTextRenderer(const char *outputbase)
121  : TessResultRenderer(outputbase, "txt") {
122 }
123 
125  char* utf8 = api->GetUTF8Text();
126  if (utf8 == NULL) {
127  return false;
128  }
129 
130  AppendString(utf8);
131  delete[] utf8;
132 
133  bool pageBreak = false;
134  api->GetBoolVariable("include_page_breaks", &pageBreak);
135  const char* pageSeparator = api->GetStringVariable("page_separator");
136  if (pageBreak) {
137  AppendString(pageSeparator);
138  }
139 
140  return true;
141 }
142 
143 /**********************************************************************
144  * HOcr Text Renderer interface implementation
145  **********************************************************************/
146 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
147  : TessResultRenderer(outputbase, "hocr") {
148  font_info_ = false;
149 }
150 
151 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
152  : TessResultRenderer(outputbase, "hocr") {
153  font_info_ = font_info;
154 }
155 
157  AppendString(
158  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
159  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
160  " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
161  "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
162  "lang=\"en\">\n <head>\n <title>");
163  AppendString(title());
164  AppendString(
165  "</title>\n"
166  "<meta http-equiv=\"Content-Type\" content=\"text/html;"
167  "charset=utf-8\" />\n"
168  " <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
169  "' />\n"
170  " <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
171  " ocr_line ocrx_word");
172  if (font_info_)
173  AppendString(
174  " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
175  AppendString(
176  "'/>\n"
177  "</head>\n<body>\n");
178 
179  return true;
180 }
181 
183  AppendString(" </body>\n</html>\n");
184 
185  return true;
186 }
187 
189  char* hocr = api->GetHOCRText(imagenum());
190  if (hocr == NULL) return false;
191 
192  AppendString(hocr);
193  delete[] hocr;
194 
195  return true;
196 }
197 
198 /**********************************************************************
199  * TSV Text Renderer interface implementation
200  **********************************************************************/
201 TessTsvRenderer::TessTsvRenderer(const char* outputbase)
202  : TessResultRenderer(outputbase, "tsv") {
203  font_info_ = false;
204 }
205 
206 TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
207  : TessResultRenderer(outputbase, "tsv") {
208  font_info_ = font_info;
209 }
210 
212  // Output TSV column headings
213  AppendString(
214  "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
215  "num\tleft\ttop\twidth\theight\tconf\ttext\n");
216  return true;
217 }
218 
219 bool TessTsvRenderer::EndDocumentHandler() { return true; }
220 
222  char* tsv = api->GetTSVText(imagenum());
223  if (tsv == NULL) return false;
224 
225  AppendString(tsv);
226  delete[] tsv;
227 
228  return true;
229 }
230 
231 /**********************************************************************
232  * UNLV Text Renderer interface implementation
233  **********************************************************************/
234 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
235  : TessResultRenderer(outputbase, "unlv") {
236 }
237 
239  char* unlv = api->GetUNLVText();
240  if (unlv == NULL) return false;
241 
242  AppendString(unlv);
243  delete[] unlv;
244 
245  return true;
246 }
247 
248 /**********************************************************************
249  * BoxText Renderer interface implementation
250  **********************************************************************/
252  : TessResultRenderer(outputbase, "box") {
253 }
254 
256  char* text = api->GetBoxText(imagenum());
257  if (text == NULL) return false;
258 
259  AppendString(text);
260  delete[] text;
261 
262  return true;
263 }
264 
265 /**********************************************************************
266  * Osd Text Renderer interface implementation
267  **********************************************************************/
268 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
269  : TessResultRenderer(outputbase, "osd") {}
270 
272  char* osd = api->GetOsdText(imagenum());
273  if (osd == NULL) return false;
274 
275  AppendString(osd);
276  delete[] osd;
277 
278  return true;
279 }
280 
281 } // namespace tesseract
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:120
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:206
TessHOcrRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:151
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:124
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:268
virtual bool EndDocumentHandler()
Definition: renderer.cpp:219
void AppendString(const char *s)
Definition: renderer.cpp:99
Definition: strngs.h:44
virtual bool AddImageHandler(TessBaseAPI *api)=0
bool BeginDocument(const char *title)
Definition: renderer.cpp:69
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:271
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:251
virtual bool EndDocumentHandler()
Definition: renderer.cpp:182
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:221
const char * GetStringVariable(const char *name) const
Definition: baseapi.cpp:242
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:238
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:156
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:211
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:188
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
TessResultRenderer * next()
Definition: renderer.h:55
const char * title() const
Definition: renderer.h:80
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:108
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:255
char * GetOsdText(int page_number)
Definition: baseapi.cpp:1942
void AppendData(const char *s, int len)
Definition: renderer.cpp:103
char * GetTSVText(int page_number)
Definition: baseapi.cpp:1613
char * GetBoxText(int page_number)
Definition: baseapi.cpp:1745
virtual bool EndDocumentHandler()
Definition: renderer.cpp:112
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:80
const char * string() const
Definition: strngs.cpp:201
char * GetHOCRText(ETEXT_DESC *monitor, int page_number)
Definition: baseapi.cpp:1442
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:32
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:234
void insert(TessResultRenderer *next)
Definition: renderer.cpp:56
bool GetBoolVariable(const char *name, bool *value) const
Definition: baseapi.cpp:234