#include <renderer.h>
Renders tesseract output into searchable PDF
Definition at line 185 of file renderer.h.
| tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir |
|
) |
| |
Definition at line 181 of file pdfrenderer.cpp.
TessResultRenderer(const char *outputbase, const char *extension)
| bool tesseract::TessPDFRenderer::AddImageHandler |
( |
TessBaseAPI * |
api | ) |
|
|
protectedvirtual |
Implements tesseract::TessResultRenderer.
Definition at line 837 of file pdfrenderer.cpp.
840 Pix *pix =
api->GetInputImage();
842 int ppi =
api->GetSourceYResolution();
843 if (!pix || ppi <= 0)
845 double width = pixGetWidth(pix) * 72.0 / ppi;
846 double height = pixGetHeight(pix) * 72.0 / ppi;
849 n = snprintf(buf,
sizeof(buf),
854 " /MediaBox [0 0 %.2f %.2f]\n"
855 " /Contents %ld 0 R\n"
858 " /XObject << /Im1 %ld 0 R >>\n"
859 " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n"
860 " /Font << /f-0-0 %ld 0 R >>\n"
871 if (n >=
sizeof(buf))
return false;
873 AppendPDFObject(buf);
876 char* pdftext = GetPDFTextObjects(
api, width, height);
877 long pdftext_len = strlen(pdftext);
878 unsigned char *pdftext_casted =
reinterpret_cast<unsigned char *
>(pdftext);
880 unsigned char *comp_pdftext =
881 zlibCompress(pdftext_casted, pdftext_len, &len);
882 long comp_pdftext_len = len;
883 n = snprintf(buf,
sizeof(buf),
886 " /Length %ld /Filter /FlateDecode\n"
888 "stream\n", obj_, comp_pdftext_len);
889 if (n >=
sizeof(buf)) {
891 lept_free(comp_pdftext);
895 long objsize = strlen(buf);
896 AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
897 objsize += comp_pdftext_len;
898 lept_free(comp_pdftext);
904 objsize += strlen(b2);
905 AppendPDFObjectDIY(objsize);
908 if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
912 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
| bool tesseract::TessPDFRenderer::BeginDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 485 of file pdfrenderer.cpp.
489 n = snprintf(buf,
sizeof(buf),
492 0xDE, 0xAD, 0xBE, 0xEB);
493 if (n >=
sizeof(buf))
return false;
494 AppendPDFObject(buf);
497 n = snprintf(buf,
sizeof(buf),
505 if (n >=
sizeof(buf))
return false;
506 AppendPDFObject(buf);
514 n = snprintf(buf,
sizeof(buf),
517 " /BaseFont /GlyphLessFont\n"
518 " /DescendantFonts [ %ld 0 R ]\n"
519 " /Encoding /Identity-H\n"
521 " /ToUnicode %ld 0 R\n"
528 if (n >=
sizeof(buf))
return false;
529 AppendPDFObject(buf);
532 n = snprintf(buf,
sizeof(buf),
535 " /BaseFont /GlyphLessFont\n"
536 " /CIDToGIDMap %ld 0 R\n"
539 " /Ordering (Identity)\n"
540 " /Registry (Adobe)\n"
543 " /FontDescriptor %ld 0 R\n"
544 " /Subtype /CIDFontType2\n"
552 if (n >=
sizeof(buf))
return false;
553 AppendPDFObject(buf);
556 const int kCIDToGIDMapSize = 2 * (1 << 16);
557 unsigned char *cidtogidmap =
new unsigned char[kCIDToGIDMapSize];
558 for (
int i = 0; i < kCIDToGIDMapSize; i++) {
559 cidtogidmap[i] = (i % 2) ? 1 : 0;
562 unsigned char *comp =
563 zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
564 delete[] cidtogidmap;
565 n = snprintf(buf,
sizeof(buf),
568 " /Length %lu /Filter /FlateDecode\n"
572 if (n >=
sizeof(buf)) {
577 long objsize = strlen(buf);
578 AppendData(reinterpret_cast<char *>(comp), len);
581 const char *endstream_endobj =
585 objsize += strlen(endstream_endobj);
586 AppendPDFObjectDIY(objsize);
589 "/CIDInit /ProcSet findresource begin\n"
594 " /Registry (Adobe)\n"
598 "/CMapName /Adobe-Identify-UCS def\n"
600 "1 begincodespacerange\n"
602 "endcodespacerange\n"
604 "<0000> <FFFF> <0000>\n"
607 "CMapName currentdict /CMap defineresource pop\n"
612 n = snprintf(buf,
sizeof(buf),
614 "<< /Length %lu >>\n"
618 "endobj\n", (
unsigned long) strlen(stream), stream);
619 if (n >=
sizeof(buf))
return false;
620 AppendPDFObject(buf);
623 n = snprintf(buf,
sizeof(buf),
630 " /FontBBox [ 0 0 %d %d ]\n"
631 " /FontFile2 %ld 0 R\n"
632 " /FontName /GlyphLessFont\n"
635 " /Type /FontDescriptor\n"
644 if (n >=
sizeof(buf))
return false;
645 AppendPDFObject(buf);
647 n = snprintf(buf,
sizeof(buf),
"%s/pdf.ttf", datadir_);
648 if (n >=
sizeof(buf))
return false;
649 FILE *fp = fopen(buf,
"rb");
651 tprintf(
"Can not open file \"%s\"!\n", buf);
654 fseek(fp, 0, SEEK_END);
655 long int size = ftell(fp);
656 fseek(fp, 0, SEEK_SET);
657 char *buffer =
new char[size];
658 if (fread(buffer, 1, size, fp) != size) {
665 n = snprintf(buf,
sizeof(buf),
671 "stream\n", size, size);
672 if (n >=
sizeof(buf)) {
677 objsize = strlen(buf);
682 objsize += strlen(endstream_endobj);
683 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
| bool tesseract::TessPDFRenderer::EndDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 918 of file pdfrenderer.cpp.
929 const long int kPagesObjectNumber = 2;
930 offsets_[kPagesObjectNumber] = offsets_.
back();
931 n = snprintf(buf,
sizeof(buf),
935 " /Kids [ ", kPagesObjectNumber);
936 if (n >=
sizeof(buf))
return false;
938 size_t pages_objsize = strlen(buf);
939 for (
size_t i = 0; i < pages_.
size(); i++) {
940 n = snprintf(buf,
sizeof(buf),
941 "%ld 0 R ", pages_[i]);
942 if (n >=
sizeof(buf))
return false;
944 pages_objsize += strlen(buf);
946 n = snprintf(buf,
sizeof(buf),
950 "endobj\n", pages_.
size());
951 if (n >=
sizeof(buf))
return false;
953 pages_objsize += strlen(buf);
954 offsets_.
back() += pages_objsize;
957 char* datestr = l_getFormattedDate();
958 n = snprintf(buf,
sizeof(buf),
961 " /Producer (Tesseract %s)\n"
962 " /CreationDate (D:%s)\n"
967 if (n >=
sizeof(buf))
return false;
968 AppendPDFObject(buf);
969 n = snprintf(buf,
sizeof(buf),
972 "0000000000 65535 f \n", obj_);
973 if (n >=
sizeof(buf))
return false;
975 for (
int i = 1; i < obj_; i++) {
976 n = snprintf(buf,
sizeof(buf),
"%010ld 00000 n \n", offsets_[i]);
977 if (n >=
sizeof(buf))
return false;
980 n = snprintf(buf,
sizeof(buf),
994 if (n >=
sizeof(buf))
return false;
void AppendString(const char *s)
#define TESSERACT_VERSION_STR
const char * title() const
The documentation for this class was generated from the following files: