tesseract  3.05.00
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H__
19 #define TESSERACT_API_RENDERER_H__
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include "genericvector.h"
25 #include "platform.h"
26 #include "publictypes.h"
27 
28 namespace tesseract {
29 
30 class TessBaseAPI;
31 
46  public:
47  virtual ~TessResultRenderer();
48 
49  // Takes ownership of pointer so must be new'd instance.
50  // Renderers aren't ordered, but appends the sequences of next parameter
51  // and existing next(). The renderers should be unique across both lists.
52  void insert(TessResultRenderer* next);
53 
54  // Returns the next renderer or NULL.
55  TessResultRenderer* next() { return next_; }
56 
61  bool BeginDocument(const char* title);
62 
71  bool AddImage(TessBaseAPI* api);
72 
77  bool EndDocument();
78 
79  const char* file_extension() const { return file_extension_; }
80  const char* title() const { return title_.c_str(); }
81 
91  int imagenum() const { return imagenum_; }
92 
93  protected:
104  TessResultRenderer(const char *outputbase,
105  const char* extension);
106 
107  // Hook for specialized handling in BeginDocument()
108  virtual bool BeginDocumentHandler();
109 
110  // This must be overriden to render the OCR'd results
111  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
112 
113  // Hook for specialized handling in EndDocument()
114  virtual bool EndDocumentHandler();
115 
116  // Renderers can call this to append '\0' terminated strings into
117  // the output string returned by GetOutput.
118  // This method will grow the output buffer if needed.
119  void AppendString(const char* s);
120 
121  // Renderers can call this to append binary byte sequences into
122  // the output string returned by GetOutput. Note that s is not necessarily
123  // '\0' terminated (and can contain '\0' within it).
124  // This method will grow the output buffer if needed.
125  void AppendData(const char* s, int len);
126 
127  private:
128  const char* file_extension_; // standard extension for generated output
129  STRING title_; // title of document being renderered
130  int imagenum_; // index of last image added
131 
132  FILE* fout_; // output file pointer
133  TessResultRenderer* next_; // Can link multiple renderers together
134  bool happy_; // I get grumpy when the disk fills up, etc.
135 };
136 
141  public:
142  explicit TessTextRenderer(const char *outputbase);
143 
144  protected:
145  virtual bool AddImageHandler(TessBaseAPI* api);
146 };
147 
152  public:
153  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
154  explicit TessHOcrRenderer(const char *outputbase);
155 
156  protected:
157  virtual bool BeginDocumentHandler();
158  virtual bool AddImageHandler(TessBaseAPI* api);
159  virtual bool EndDocumentHandler();
160 
161  private:
162  bool font_info_; // whether to print font information
163 };
164 
169  public:
170  explicit TessTsvRenderer(const char* outputbase, bool font_info);
171  explicit TessTsvRenderer(const char* outputbase);
172 
173  protected:
174  virtual bool BeginDocumentHandler();
175  virtual bool AddImageHandler(TessBaseAPI* api);
176  virtual bool EndDocumentHandler();
177 
178  private:
179  bool font_info_; // whether to print font information
180 };
181 
186  public:
187  // datadir is the location of the TESSDATA. We need it because
188  // we load a custom PDF font from this location.
189  TessPDFRenderer(const char *outputbase, const char *datadir);
190 
191  protected:
192  virtual bool BeginDocumentHandler();
193  virtual bool AddImageHandler(TessBaseAPI* api);
194  virtual bool EndDocumentHandler();
195 
196  private:
197  // We don't want to have every image in memory at once,
198  // so we store some metadata as we go along producing
199  // PDFs one page at a time. At the end that metadata is
200  // used to make everything that isn't easily handled in a
201  // streaming fashion.
202  long int obj_; // counter for PDF objects
203  GenericVector<long int> offsets_; // offset of every PDF object in bytes
204  GenericVector<long int> pages_; // object number for every /Page object
205  const char *datadir_; // where to find the custom font
206  // Bookkeeping only. DIY = Do It Yourself.
207  void AppendPDFObjectDIY(size_t objectsize);
208  // Bookkeeping + emit data.
209  void AppendPDFObject(const char *data);
210  // Create the /Contents object for an entire page.
211  static char* GetPDFTextObjects(TessBaseAPI* api,
212  double width, double height);
213  // Turn an image into a PDF object. Only transcode if we have to.
214  static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
215  char **pdf_object, long int *pdf_object_size);
216 };
217 
218 
223  public:
224  explicit TessUnlvRenderer(const char *outputbase);
225 
226  protected:
227  virtual bool AddImageHandler(TessBaseAPI* api);
228 };
229 
234  public:
235  explicit TessBoxTextRenderer(const char *outputbase);
236 
237  protected:
238  virtual bool AddImageHandler(TessBaseAPI* api);
239 };
240 
245  public:
246  explicit TessOsdRenderer(const char* outputbase);
247 
248  protected:
249  virtual bool AddImageHandler(TessBaseAPI* api);
250 };
251 
252 } // namespace tesseract.
253 
254 #endif // TESSERACT_API_RENDERER_H__
struct TessBaseAPI TessBaseAPI
Definition: capi.h:86
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:85
Definition: strngs.h:44
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:82
const char * file_extension() const
Definition: renderer.h:79
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:83
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:84
struct TessResultRenderer TessResultRenderer
Definition: capi.h:80
void insert(LIST list, void *node)
Definition: oldlist.cpp:221
TessResultRenderer * next()
Definition: renderer.h:55
const char * title() const
Definition: renderer.h:80
struct TessTextRenderer TessTextRenderer
Definition: capi.h:81
#define TESS_API
Definition: platform.h:79