├── manual.pdf ├── .gitignore ├── src ├── config.h ├── cmd_exec.h ├── pdf_filters.h ├── debug.h ├── Makefile ├── geometry.h ├── common.h ├── debug.cpp ├── common.cpp ├── fileio.h ├── doc_edit.h ├── crypt.h ├── pdf_doc.h ├── geometry.cpp ├── fileio.cpp ├── main.cpp ├── pdf_objects.h ├── pdf_filters.cpp ├── doc_edit.cpp ├── crypt.cpp ├── pdf_doc.cpp └── pdf_objects.cpp ├── README.md ├── pdfcook.1 └── LICENSE /manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ksharindam/pdfcook/HEAD/manual.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Libraries 2 | *.so 3 | *.o 4 | 5 | # Compiled Binaries 6 | pdfcook 7 | 8 | # User created dirs 9 | build/ 10 | test/ 11 | -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define PROG_VERSION "0.4.5" 4 | 5 | 6 | /* Do we have LZW decompression lib */ 7 | #define HAVE_LZW 1 8 | 9 | /* have asprintf() func */ 10 | #define HAVE_ASPRINTF 1 11 | -------------------------------------------------------------------------------- /src/cmd_exec.h: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #pragma once 3 | #include "common.h" 4 | #include "fileio.h" 5 | #include "pdf_doc.h" 6 | #include 7 | 8 | 9 | class Command; 10 | 11 | typedef std::list CmdList; 12 | 13 | void parse_commands(CmdList &cmd_list, MYFILE *f); 14 | void doc_exec_commands(PdfDocument &doc, CmdList &cmd_list); 15 | 16 | void print_cmd_info(FILE *f); 17 | -------------------------------------------------------------------------------- /src/pdf_filters.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include "common.h" 4 | #include "pdf_objects.h" 5 | 6 | 7 | int zlib_compress_filter(char **stream, size_t *len, DictObj &dict); 8 | int flate_decode_filter(char **stream, size_t *len, DictObj &dict); 9 | 10 | 11 | #if (HAVE_LZW) 12 | int lzw_decompress_filter(char **stream, size_t *len, DictObj &dict); 13 | #else 14 | #define lzw_decompress_filter NULL 15 | #endif 16 | 17 | typedef struct { 18 | const char *name; 19 | int (*filter)(char **stream, size_t *len, DictObj &dict); 20 | } stream_filters; 21 | 22 | int apply_filter(const char *name, char **stream, size_t *len, DictObj &dict, stream_filters *filters, size_t f_len); 23 | int apply_compress_filter(const char *name, char **stream, size_t *len, DictObj &dict); 24 | int apply_decompress_filter(const char *name, char **stream, size_t *len, DictObj &dict); 25 | 26 | -------------------------------------------------------------------------------- /src/debug.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | extern int quiet_mode; 9 | 10 | /* Here we use two debug functions, debug() and message(). 11 | debug() is only for developer. message() is for user. 12 | for lower level errors or warnings that a normal user won't understand, 13 | debug() is used. 14 | for fatal errors and for user understandable errors message() is used. 15 | */ 16 | 17 | /* message types */ 18 | enum { 19 | LOG,//only print message 20 | WARN,//print message with "warning : " prefix 21 | ERROR,//print message with "error : " prefix 22 | FATAL//print message with "error : " prefix, and exits program immediately 23 | }; 24 | 25 | void message(int type, const char *format, ...); 26 | 27 | // print message only when DEBUG is defined 28 | void debug(const char *format, ...); 29 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | 2 | CC = gcc 3 | CXX = g++ 4 | CFLAGS = -Wall -O2 5 | CXXFLAGS = -Wall -O2 -std=c++11 -DDEBUG 6 | INCLUDES = 7 | LFLAGS = -s 8 | LIBS = -lm -lz 9 | 10 | BUILD_DIR = ../build 11 | SOURCES = $(wildcard *.cpp) 12 | OBJS = $(SOURCES:%.cpp=$(BUILD_DIR)/%.o) 13 | 14 | pdfcook: ${OBJS} 15 | ${CXX} ${LFLAGS} -o $@ ${OBJS} ${LIBS} 16 | 17 | clean: 18 | rm -f $(BUILD_DIR)/*.o pdfcook 19 | 20 | # c 21 | $(BUILD_DIR)/%.o: %.c 22 | @mkdir -p $(@D) 23 | ${CC} ${CFLAGS} ${INCLUDES} -c $< -o $@ 24 | 25 | # c++ 26 | $(BUILD_DIR)/%.o: %.cpp 27 | @mkdir -p $(@D) 28 | ${CXX} ${CXXFLAGS} ${INCLUDES} -c $< -o $@ 29 | 30 | # requires full groff package installed 31 | manual: 32 | groff -m man -T pdf ../pdfcook.1 > ../manual.pdf 33 | 34 | install: pdfcook 35 | install pdfcook /usr/local/bin 36 | 37 | uninstall: 38 | rm /usr/local/bin/pdfcook 39 | 40 | installman: 41 | cp ../pdfcook.1 /usr/share/man/man1 42 | 43 | removeman: 44 | rm /usr/share/man/man1/pdfcook.1 45 | -------------------------------------------------------------------------------- /src/geometry.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include "pdf_objects.h" 4 | 5 | class Point 6 | { 7 | public: 8 | float x = 0; 9 | float y = 0; 10 | 11 | Point(); 12 | Point(float x, float y); 13 | }; 14 | 15 | class Rect { 16 | public: 17 | Point left; // bottom left (0,0) 18 | Point right;// top right 19 | 20 | bool isZero(); 21 | bool isLandscape(); 22 | bool getFromObject(PdfObject *src, ObjectTable &table); 23 | bool setToObject(PdfObject *dst); 24 | }; 25 | 26 | // transformation order : scale -> rotate -> translate 27 | 28 | class Matrix 29 | { 30 | public: 31 | float mat[3][3]; 32 | Matrix();// creates identity matrix 33 | Matrix( float m00, float m01, float m02, 34 | float m10, float m11, float m12, 35 | float m20, float m21, float m22); 36 | bool isIdentity(); 37 | void multiply(const Matrix &m); 38 | void scale (float scale); 39 | void rotate (float angle_deg); 40 | void translate (float dx, float dy); 41 | 42 | void transform (Point &point); 43 | void transform (Rect &dim); 44 | }; 45 | 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pdfcook 2 | Preprinting preparation tool for PDF ebooks. 3 | 4 | ### Build and Install 5 | Enter directory src 6 | `cd src` 7 | run 8 | ``` 9 | make -j4 10 | sudo make install 11 | ``` 12 | Install manpage 13 | `sudo make installman` 14 | 15 | **Windows Build** 16 | On windows create a folder build/ beside src/ directory. 17 | And edit Makefile and remove lines with 18 | ` @mkdir -p $(@D)` 19 | Then to build run... 20 | `make -j4` 21 | 22 | ### Features 23 | * PDF v1.7 support 24 | * Decrypt encrypted PDFs 25 | * Join or Split PDFs 26 | * Scale to any paper size, with specified margin 27 | * Write Page numbers 28 | * Write text 29 | * Transform pages (rotate, flip, move) 30 | * Booklet format arrange 31 | * 2 or 4 pages per page (2-up, 4-up) 32 | * More readable output syntax for easy debugging 33 | 34 | ### Usage 35 | See manual page (PDF or man page) for detailed usage 36 | 37 | Scale to print in A4 size paper 38 | `pdfcook 'scaleto(a4)' input.pdf output.pdf` 39 | 40 | Add binding margin after scaling (? for odd pages, + for even pages) 41 | `pdfcook 'scaleto(a4) move(20){?} move(-20){+}' input.pdf output.pdf` 42 | 43 | Add page numbers 44 | `pdfcook 'number' input.pdf output.pdf` 45 | 46 | Booklet format 47 | `pdfcook 'book nup(2, paper=a4)' input.pdf output.pdf` 48 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #ifndef _GNU_SOURCE 4 | #define _GNU_SOURCE// for asprintf() 5 | #endif 6 | #include "config.h" 7 | #include 8 | #include 9 | #include // memcpy and other string func 10 | #include 11 | #include 12 | //#include // uint32_t type 13 | //#include // toupper() isspace() etc 14 | 15 | extern bool repair_mode; 16 | 17 | typedef unsigned int uint; 18 | // M_PI is not available in mingw32, so using and defining PI 19 | #define PI 3.14159265358979323846 20 | 21 | #if (!HAVE_ASPRINTF) 22 | int asprintf(char **strp, const char *fmt, ...); 23 | #endif 24 | 25 | // check if string s1 starts with s2 26 | #define starts(s1, s2) (strncmp(s1,s2,strlen(s2)) == 0) 27 | 28 | #define MAX(a,b) ((a)>(b) ? (a):(b)) 29 | #define MIN(a,b) ((a)<(b) ? (a):(b)) 30 | 31 | // read a big endian integer provided as char array 32 | int arr2int(char *arr, int len); 33 | 34 | // like %f but strips trailing zeros 35 | std::string double2str(double num); 36 | 37 | // like malloc() but exits program when fails. use this where little memroy 38 | // is needed, and where we can not ignore the allocation failure 39 | inline void* malloc2(size_t size) 40 | { 41 | void *ptr = malloc(size); 42 | if (size!=0 && !ptr){ 43 | fprintf(stdout, "error : malloc() failed !\n"); 44 | exit(1); 45 | } 46 | return ptr; 47 | } 48 | -------------------------------------------------------------------------------- /src/debug.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "debug.h" 3 | #include 4 | #include 5 | #include 6 | 7 | int quiet_mode = 0; 8 | 9 | #define MAX_MSG_LEN 255 /* maximum formatted message length */ 10 | 11 | void message(int type, const char *format, ...) 12 | { 13 | if (quiet_mode && type!=FATAL) 14 | return; 15 | char msgbuf[MAX_MSG_LEN+1] = {}; /* buffer in which to put the message */ 16 | char *bufptr = msgbuf ; /* message buffer pointer */ 17 | int pos = 0; 18 | // should put newline if column is not 0 in terminal 19 | if (type==WARN) { 20 | snprintf(bufptr, 11, "warning : "); 21 | bufptr += 10; 22 | pos += 10; 23 | } 24 | else if (type==ERROR || type==FATAL) { 25 | snprintf(bufptr, 9, "error : "); 26 | bufptr += 8; 27 | pos += 8; 28 | } 29 | va_list args ; 30 | va_start(args, format); 31 | vsnprintf(bufptr, MAX_MSG_LEN-pos, format, args); 32 | va_end(args); 33 | // write the string to stdout or stderr 34 | fwrite(msgbuf, strlen(msgbuf), 1, stderr); 35 | fwrite("\n", 1, 1, stderr); 36 | if ( type==FATAL ) // exit program after the FATAL msg 37 | exit(1) ; 38 | } 39 | 40 | void debug(const char *format, ...) 41 | { 42 | #ifdef DEBUG 43 | va_list args ; 44 | va_start(args, format); 45 | vprintf(format, args); 46 | va_end(args); 47 | printf("\n"); 48 | #endif 49 | } 50 | -------------------------------------------------------------------------------- /src/common.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "common.h" 3 | 4 | // read a big endian integer provided as char array 5 | int arr2int(char *arr, int len) 6 | { 7 | unsigned char tmp[4] = {}; 8 | for (int i=0; i 18 | int asprintf(char **strp, const char *fmt, ...){ 19 | /* Guess we need no more than 100 bytes. */ 20 | int n, size = 100; 21 | char *p, *np; 22 | va_list ap; 23 | 24 | if ((p = (char*)malloc(size)) == NULL){ 25 | return -1; 26 | } 27 | 28 | while (1) { 29 | /* Try to print in the allocated space. */ 30 | va_start(ap, fmt); 31 | n = vsnprintf (p, size, fmt, ap); 32 | va_end(ap); 33 | /* If that worked, return the string. */ 34 | if ((n > -1) && (n < size)){ 35 | *strp = p; 36 | return n; 37 | } 38 | /* Else try again with more space. */ 39 | if (n > -1) /* glibc 2.1 */ 40 | size = n+1; /* precisely what is needed */ 41 | else /* glibc 2.0 */ 42 | size *= 2; /* twice the old size */ 43 | if ((np = (char*)realloc (p, size)) == NULL) { 44 | free(p); 45 | return -1; 46 | } else { 47 | p = np; 48 | } 49 | } 50 | return -1; 51 | } 52 | #endif 53 | 54 | // like %f but strips trailing zeros 55 | std::string double2str(double real) 56 | { 57 | int len = std::snprintf(nullptr, 0, "%f", real);// get length 58 | char buf[len+1]; 59 | std::snprintf(buf, len+1, "%f", real); 60 | while (buf[len-1]=='0')// strip trailing zeros 61 | len--; 62 | if (buf[len-1]=='.')// keep a zero after decimal point eg. 2.0 63 | len++; 64 | return std::string(buf, len); 65 | } 66 | -------------------------------------------------------------------------------- /src/fileio.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include 4 | #include // toupper() isspace() etc 5 | 6 | typedef struct { 7 | FILE *f; 8 | unsigned char *buf;// must be unsigned, otherwise char 255 becomes -1 (i.e EOF) 9 | unsigned char *ptr; 10 | unsigned char *end; 11 | long pos;// offset of *end from the beginning file/string 12 | int eof;// eof==EOF if no data left to read from file to internal buffer 13 | // used in command parsing 14 | int column; 15 | int row; 16 | int lastc; 17 | } MYFILE; 18 | 19 | // returns current seek position 20 | #define myftell(f) ((f->pos)-((f->end) - (f->ptr))) 21 | // returns true if seek pos is at the end 22 | #define myfeof(f) (f->eof==EOF && (f->ptr>=f->end)) 23 | // returns the current char and seek 1 byte forward 24 | #define mygetc(f) ((f->ptr < f->end) ? *(f->ptr)++ : slow_mygetc(f)) 25 | // seek 1 byte backward 26 | #define myungetc(f) (f->ptr = ((f->ptr)>(f->buf)) ? (f->ptr-1) : (f->buf)) 27 | 28 | 29 | // open a file stream by given filename 30 | MYFILE * myfopen(const char * filename, const char *mode); 31 | // close a stream 32 | int myfclose(MYFILE *stream); 33 | 34 | // returns 0 on success and -1 on failure 35 | int myfseek(MYFILE *stream, long offset, int origin); 36 | 37 | // read size*nmemb bytes from *stream and put data in *where 38 | size_t myfread(void *where, size_t size, size_t nmemb, MYFILE *stream); 39 | 40 | // read string upto next newline or atmost size-1 bytes. 41 | // Unlike fgets() it does not put newline character 42 | char* myfgets(char *line, int size, MYFILE *stream); 43 | // it is getc() for MYFILE when re-reading the file is needed to fill buffer 44 | int slow_mygetc(MYFILE * f); 45 | 46 | // create a MYFILE from null terminated string 47 | MYFILE * stropen(const char *str); 48 | // create a MYFILE any stream with given len 49 | MYFILE * streamopen(const char *str, size_t len); 50 | 51 | inline void skipspace(MYFILE *f) { 52 | int c; 53 | while ((c = mygetc(f))!=EOF && isspace(c)); 54 | if (c!=EOF) 55 | myungetc(f); 56 | } 57 | 58 | bool file_exist (const char *name); 59 | -------------------------------------------------------------------------------- /src/doc_edit.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include "pdf_doc.h" 4 | #include 5 | 6 | typedef enum { 7 | PAGE_SET_ALL, 8 | PAGE_SET_ODD, 9 | PAGE_SET_EVEN, 10 | PAGE_SET_RANGE 11 | } PageSetType; 12 | 13 | 14 | class PageRange 15 | { 16 | public: 17 | PageSetType type; 18 | int16_t begin; 19 | int16_t end; 20 | bool negative; 21 | 22 | PageRange (); 23 | PageRange (PageSetType _type); 24 | PageRange (int begin, int end, bool neg); 25 | }; 26 | 27 | typedef std::vector::iterator PageNumIter; 28 | 29 | class PageRanges { 30 | public: 31 | std::list array; 32 | std::vector page_num_array; 33 | 34 | void append(PageRange range); 35 | // convert list of PageRange to list of page numbers 36 | void initPageNums(int max_page_num); 37 | void sort(); 38 | void clear(); 39 | PageNumIter begin(); 40 | PageNumIter end(); 41 | }; 42 | 43 | 44 | bool doc_pages_transform(PdfDocument &doc, PageRanges &pages, Matrix mat); 45 | 46 | bool doc_pages_scaleto (PdfDocument &doc, PageRanges &pages, Rect paper, 47 | float top, float right, float bottom, float left); 48 | 49 | bool doc_pages_translate(PdfDocument &doc, PageRanges &pages, float x, float y); 50 | 51 | bool doc_pages_delete(PdfDocument &doc, PageRanges &pages); 52 | 53 | bool doc_pages_arrange(PdfDocument &doc, PageRanges &pages); 54 | 55 | bool doc_pages_number(PdfDocument &doc, PageRanges &pages, 56 | int x, int y, int start, const char *text, int size, const char *font); 57 | bool doc_pages_text(PdfDocument &doc, PageRanges &pages, 58 | int x, int y, const char *text, int size, const char *font); 59 | 60 | bool doc_pages_crop (PdfDocument &doc, PageRanges &pages, Rect box); 61 | 62 | bool doc_pages_set_paper_size (PdfDocument &doc, PageRanges &pages, Rect paper); 63 | 64 | bool add_new_paper_size (std::string name, float w, float h); 65 | 66 | typedef enum { 67 | ORIENT_AUTO, 68 | ORIENT_PORTRAIT, 69 | ORIENT_LANDSCAPE 70 | } Orientation; 71 | 72 | bool set_paper_from_name(Rect &paper, std::string name, Orientation orientation); 73 | void paper_set_orientation (Rect &paper, Orientation orientation); 74 | void print_paper_sizes(); 75 | -------------------------------------------------------------------------------- /src/crypt.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pdf_objects.h" 4 | 5 | 6 | class Crypt 7 | { 8 | public: 9 | bool can_decrypt; 10 | Crypt(); 11 | bool decryptionSupported(); 12 | bool authenticate(const char *password); 13 | bool getEncryptionInfo(PdfObject *encrypt_dict, PdfObject *p_trailer); 14 | void decryptIndirectObject(PdfObject *obj, int obj_no, int gen_no); 15 | private: 16 | int version; 17 | int revision; 18 | int keylen;// in bytes 19 | int perm; 20 | std::string O; 21 | std::string U; 22 | std::string id0; 23 | 24 | std::string encryption_key;// calculated from password, /O, permission and trailer ID 25 | bool authenticateUserPassword(std::string password); 26 | }; 27 | 28 | 29 | 30 | typedef unsigned char uchar; 31 | 32 | class RC4 33 | { 34 | public: 35 | uchar init_state[256];// initial state 36 | 37 | RC4(std::string key); 38 | void crypt(uchar *data, int len); 39 | }; 40 | 41 | 42 | 43 | // a small class for calculating MD5 hashes of strings or byte arrays 44 | // it is not meant to be fast or secure 45 | // assumes that char is 8 bit and int is 32 bit 46 | class MD5 47 | { 48 | public: 49 | uchar digest[16];// the result 50 | 51 | MD5(); 52 | MD5(const std::string& text); 53 | void init(); 54 | void update(const uchar *buf, size_t length); 55 | void update(const char *buf, size_t length); 56 | MD5& finalize(); 57 | 58 | private: 59 | typedef uint8_t uint1; 60 | typedef uint32_t uint4; 61 | enum {blocksize = 64}; // VC6 won't eat a const static int here 62 | 63 | void transform(const uint1 block[blocksize]); 64 | static void decode(uint4 output[], const uint1 input[], size_t len); 65 | static void encode(uint1 output[], const uint4 input[], size_t len); 66 | 67 | bool finalized; 68 | uint1 buffer[blocksize]; // bytes that didn't fit in last 64 byte chunk 69 | uint4 count[2]; // 64bit counter for number of bits (lo, hi) 70 | uint4 state[4]; // digest so far 71 | 72 | // low level logic operations 73 | static inline uint4 F(uint4 x, uint4 y, uint4 z); 74 | static inline uint4 G(uint4 x, uint4 y, uint4 z); 75 | static inline uint4 H(uint4 x, uint4 y, uint4 z); 76 | static inline uint4 I(uint4 x, uint4 y, uint4 z); 77 | static inline uint4 rotate_left(uint4 x, int n); 78 | static inline void FF(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac); 79 | static inline void GG(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac); 80 | static inline void HH(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac); 81 | static inline void II(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac); 82 | }; 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/pdf_doc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include "pdf_objects.h" 4 | #include "geometry.h" 5 | #include "crypt.h" 6 | #include 7 | 8 | class PdfDocument; 9 | 10 | typedef struct { 11 | const char *name; 12 | int major; 13 | int minor; 14 | } Font; 15 | 16 | void print_font_names(); 17 | 18 | 19 | class PdfPage 20 | { 21 | public: 22 | Rect paper; 23 | Matrix matrix; 24 | int major;// of Page Object 25 | int minor; 26 | bool compressed;// converted to xobject 27 | PdfDocument *doc; 28 | 29 | PdfPage(); 30 | Rect pageSize();// get CropBox or MediaBox 31 | void drawLine (Point begin, Point end, float width); 32 | void drawText (const char *text, Point &pos, int size, Font font); 33 | void crop (Rect box); 34 | void mergePage (PdfPage &p2); 35 | void transform (Matrix mat); 36 | void applyTransformation (); 37 | //void duplicateContent(); 38 | }; 39 | 40 | typedef std::vector::iterator PageIter; 41 | 42 | class PageList 43 | { 44 | public: 45 | std::vector array; 46 | 47 | int count(); 48 | void append(PdfPage &page); 49 | void remove(int index);// page index val is one less than page number 50 | void clear(); 51 | // allows range based for-loop 52 | PageIter begin(); 53 | PageIter end(); 54 | // allows indexing operator 55 | PdfPage& operator[] (int index) { 56 | assert( index>=0 && index<(int)array.size() ); 57 | return array[index]; 58 | } 59 | }; 60 | 61 | class PdfDocument 62 | { 63 | public: 64 | const char *filename; 65 | int v_major; 66 | int v_minor; 67 | //List of PdfPage 68 | PageList page_list; 69 | ObjectTable obj_table; 70 | PdfObject *trailer; 71 | 72 | bool encrypted; 73 | bool have_encrypt_info; 74 | bool decryption_supported; 75 | Crypt crypt; 76 | 77 | PdfDocument(); 78 | ~PdfDocument(); 79 | 80 | bool getPdfHeader (MYFILE *f, char *line); 81 | bool getPdfTrailer (MYFILE *f, char *line, long offset); 82 | bool getAllPages (MYFILE *f); 83 | bool getPdfPages (MYFILE *f, int major, int minor); 84 | bool open (const char *fname); 85 | bool decrypt(const char *password); 86 | void mergeDocument(PdfDocument &doc); 87 | 88 | void putPdfPages(); 89 | bool save (const char *filename); 90 | 91 | Font newFontObject(const char *font); 92 | bool newBlankPage(int page_num); 93 | void applyTransformations(); 94 | }; 95 | 96 | /* -------- Handling Errors ----------- 97 | 1. free obj is referenced by an indirect obj 98 | sol. - before saving those indirect ref objs are changed to null obj. 99 | 100 | 2. object offset is 0 for nonfree obj in Object table entry 101 | sol. - the obj is set a null obj 102 | 103 | 3. obj no 0 is nonfree obj in object table 104 | sol. - obj 0 is set as free obj 105 | */ 106 | -------------------------------------------------------------------------------- /src/geometry.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "common.h" 3 | #include "geometry.h" 4 | #include "debug.h" 5 | 6 | static Point min_coordinate (Point p1, Point p2); 7 | static Point max_coordinate (Point p1, Point p2); 8 | 9 | 10 | Point:: Point() : x(0), y(0) 11 | { } 12 | 13 | Point:: Point(float _x, float _y) : x(_x), y(_y) 14 | { } 15 | 16 | 17 | bool Rect:: isZero() 18 | { 19 | return (left.x==0 && left.y==0 && right.x==0 && right.y==0); 20 | } 21 | 22 | bool Rect:: isLandscape() 23 | { 24 | return right.x > right.y; 25 | } 26 | 27 | 28 | bool Rect:: getFromObject(PdfObject *src, ObjectTable &obj_table) 29 | { 30 | if (!src) 31 | return false; 32 | if (isRef(src)) { 33 | src = obj_table.getObject(src->indirect.major, src->indirect.minor); 34 | } 35 | assert(isArray(src)); 36 | //message(WARN, "failed to get Rect : obj type isn't array"); 37 | int val, i=0; 38 | for (auto iter=src->array->begin(); iter!=src->array->end(); iter++, i++){ 39 | PdfObject *obj = (*iter); 40 | if (!isInt(obj) && !isReal(obj)){ 41 | message(WARN,"failed to get Rect : array item isn't number"); 42 | continue; 43 | } 44 | val = round( isReal(obj) ? (obj->real) : (obj->integer)); 45 | switch (i) { 46 | case 0: 47 | left.x = val; 48 | break; 49 | case 1: 50 | left.y = val; 51 | break; 52 | case 2: 53 | right.x = val; 54 | break; 55 | case 3: 56 | right.y = val; 57 | break; 58 | default: 59 | message(WARN,"wrong boundaries"); 60 | } 61 | } 62 | if (i<4){ 63 | message(FATAL, "wrong boundaries");//todo warn only 64 | } 65 | return true; 66 | } 67 | 68 | bool Rect:: setToObject(PdfObject *dst) 69 | { 70 | if (dst==NULL) return false; 71 | 72 | char *str; 73 | 74 | asprintf(&str,"[ %f %f %f %f ]", left.x, left.y, right.x, right.y); 75 | dst->clear();//free previous data (if any) 76 | assert (dst->readFromString(str)); 77 | free(str); 78 | return true; 79 | } 80 | 81 | 82 | 83 | Matrix:: Matrix() : Matrix(1,0,0, 0,1,0, 0,0,1) 84 | { 85 | } 86 | 87 | Matrix:: Matrix(float m00, float m01, float m02, 88 | float m10, float m11, float m12, 89 | float m20, float m21, float m22) 90 | { 91 | mat[0][0]=m00; mat[0][1]=m01; mat[0][2]=m02; 92 | mat[1][0]=m10; mat[1][1]=m11; mat[1][2]=m12; 93 | mat[2][0]=m20; mat[2][1]=m21; mat[2][2]=m22; 94 | } 95 | 96 | bool Matrix:: isIdentity() 97 | { 98 | if ( mat[0][0]==1 && mat[0][1]==0 && mat[0][2]==0 99 | && mat[1][0]==0 && mat[1][1]==1 && mat[1][2]==0 100 | && mat[2][0]==0 && mat[2][1]==0 && mat[2][2]==1) { 101 | return true; 102 | } 103 | return false; 104 | } 105 | 106 | // A.B = AB 107 | void Matrix:: multiply (const Matrix &B) 108 | { 109 | Matrix AB; 110 | for (int i=0; i<3; ++i){//each row in 1st matrix 111 | for (int j=0; j<3; ++j){// each col in 2nd matrix 112 | AB.mat[i][j] = 0; 113 | for (int k=0; k<3; ++k){ 114 | AB.mat[i][j] += mat[i][k] * B.mat[k][j]; 115 | } 116 | } 117 | } 118 | memcpy(&mat, &AB.mat, sizeof(mat)); 119 | } 120 | 121 | // transformation order : scale -> rotate -> translate 122 | void Matrix:: scale (float scale) 123 | { 124 | Matrix matrix(scale,0,0, 0,scale,0, 0,0,1); 125 | this->multiply(matrix); 126 | } 127 | 128 | void Matrix:: rotate (float angle_deg) 129 | { 130 | // rounding off so that value of cos90 becomes zero instead of 6.12323e-17 131 | float sinx = sin((angle_deg*PI)/180.0); 132 | float cosx = cos((angle_deg*PI)/180.0); 133 | Matrix matrix(cosx,-sinx,0, sinx,cosx,0, 0,0,1);// rotation along z axis 134 | this->multiply(matrix); 135 | } 136 | 137 | void Matrix:: translate (float dx, float dy) 138 | { 139 | Matrix matrix(1,0,0, 0,1,0, dx,dy,1); 140 | this->multiply(matrix); 141 | } 142 | 143 | void Matrix:: transform (Point &point) 144 | { 145 | float x = point.x; 146 | float y = point.y; 147 | point.x = x* mat[0][0] + y* mat[1][0] + mat[2][0]; 148 | point.y = x* mat[0][1] + y* mat[1][1] + mat[2][1]; 149 | } 150 | 151 | /* apply transformation matrix, then set min coordinate as bottom left 152 | and max coordinate as top right coordinate 153 | _________ 154 | | p2 | 155 | | | 156 | | p1 | 157 | */ 158 | void Matrix:: transform (Rect &dim) 159 | { 160 | Point p1 = dim.left; 161 | Point p2 = dim.right; 162 | // need to transform 2 points if only multiple of 90 deg rotation allowed 163 | transform(p1); 164 | transform(p2); 165 | 166 | dim.left = min_coordinate(p1, p2); 167 | dim.right = max_coordinate(p1, p2); 168 | } 169 | 170 | 171 | static Point max_coordinate (Point p1, Point p2) 172 | { 173 | p1.x = MAX(p1.x, p2.x); 174 | p1.y = MAX(p1.y, p2.y); 175 | return p1; 176 | } 177 | 178 | static Point min_coordinate (Point p1, Point p2) 179 | { 180 | p1.x = MIN(p1.x, p2.x); 181 | p1.y = MIN(p1.y, p2.y); 182 | return p1; 183 | } 184 | 185 | /* 186 | Rect max_dimension (Rect d1, Rect d2) 187 | { 188 | if (d1.isZero()) { 189 | return d2; 190 | } 191 | if (d2.isZero()) { 192 | return d1; 193 | } 194 | d1.left = min_coordinate(d1.left, d2.left); 195 | d1.right = max_coordinate(d1.right, d2.right); 196 | return d1; 197 | } 198 | */ 199 | -------------------------------------------------------------------------------- /src/fileio.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include 3 | #include 4 | #include "fileio.h" 5 | #include "debug.h" 6 | #include "common.h" 7 | 8 | // 16KB buffer for MYFILE 9 | #define BUFSIZE 16384 10 | 11 | #define crlf(x) (((x)=='\r') || ((x)=='\n')) 12 | 13 | 14 | MYFILE * stropen(const char *str) 15 | { 16 | if (str==NULL) 17 | return NULL; 18 | size_t len = strlen(str); 19 | return streamopen(str, len); 20 | } 21 | 22 | // read the whole string in buffer, MYFILE uses the buffer to read data 23 | MYFILE * streamopen(const char *str, size_t len) 24 | { 25 | if (str==NULL){ 26 | return NULL; 27 | } 28 | MYFILE *f = (MYFILE*) malloc2(sizeof(MYFILE)); 29 | f->f = NULL; 30 | 31 | // allocate buffer and read whole string 32 | f->buf = (unsigned char *) malloc(len); 33 | if (f->buf==NULL){ 34 | free(f); 35 | return NULL; 36 | } 37 | memcpy(f->buf, str, len); 38 | f->ptr = f->buf; 39 | f->end = f->buf + len; 40 | f->pos = len; 41 | f->eof = EOF;// means no data left to read from string 42 | f->row = 1; 43 | f->column = 0; 44 | f->lastc = 0; 45 | return f; 46 | } 47 | 48 | /* Open a file from filename and mode, creates a buffer. 49 | this buffer is used to store and read file data. */ 50 | MYFILE * myfopen(const char *filename, const char *mode) 51 | { 52 | MYFILE *f = (MYFILE*) malloc2(sizeof(MYFILE)); 53 | 54 | f->f = fopen(filename, mode); 55 | 56 | if (f->f==NULL){ 57 | free(f); 58 | return NULL; 59 | } 60 | 61 | f->buf = (unsigned char*) malloc(BUFSIZE); 62 | if (f->buf==NULL){ 63 | fclose(f->f); 64 | free(f); 65 | return NULL; 66 | } 67 | f->ptr = f->end = f->buf;// this indicates we have not read buffer 68 | f->pos = 0; 69 | f->eof = 0; 70 | return f; 71 | } 72 | 73 | 74 | int myfclose(MYFILE *stream) 75 | { 76 | int ret = stream->f ? fclose(stream->f) : 0; 77 | free(stream->buf); 78 | free(stream); 79 | if (ret==EOF){ 80 | return -1; 81 | } 82 | return 0; 83 | } 84 | 85 | int myfseek(MYFILE *stream, long offset, int origin) 86 | { 87 | if (stream->f==NULL) 88 | { 89 | switch (origin){ 90 | case SEEK_SET: 91 | stream->ptr = stream->buf + offset; 92 | break; 93 | case SEEK_END: 94 | stream->ptr = stream->end + offset; 95 | break; 96 | case SEEK_CUR: 97 | stream->ptr = stream->ptr + offset; 98 | break; 99 | } 100 | if (stream->ptr < stream->buf || stream->ptr > stream->end){ 101 | return -1; 102 | } 103 | return 0; 104 | } 105 | stream->eof = fseek(stream->f, offset, origin); 106 | if (stream->eof==0) { 107 | stream->pos = ftell(stream->f); 108 | stream->ptr = stream->end = stream->buf; 109 | return 0; 110 | } 111 | stream->eof = EOF; 112 | return -1; 113 | } 114 | 115 | // it gets called only when re-reading of the whole buffer needed. 116 | int slow_mygetc(MYFILE *f) 117 | { 118 | if (myfeof(f)){ 119 | return EOF; 120 | } 121 | // does not reach here if MYFILE created from file (not from string) 122 | size_t len = fread(f->buf, 1, BUFSIZE, f->f); 123 | f->pos += len; 124 | f->ptr = f->buf; 125 | f->end = f->buf + len; 126 | if (len!=BUFSIZE){ 127 | f->eof = EOF; 128 | if (len==0) 129 | return EOF; 130 | } 131 | return *(f->ptr)++; 132 | } 133 | 134 | 135 | size_t myfread (void *where, size_t size, size_t nmemb, MYFILE *stream) 136 | { 137 | char *str = (char *) where; 138 | size_t read; 139 | int c; 140 | long pos = myftell(stream); 141 | 142 | if (stream->f != NULL){ 143 | if (myfseek(stream, pos, SEEK_SET)==-1){ 144 | message(FATAL,"seek error"); 145 | } 146 | read = fread(where, size, nmemb, stream->f); 147 | stream->pos = ftell(stream->f); 148 | stream->ptr = stream->end = stream->buf; 149 | return read; 150 | } 151 | // if MYFILE was created from string 152 | for (read=0; readeof = EOF; 187 | *(line-1) = 0; 188 | return NULL; 189 | } 190 | 191 | switch (*(line-1)) { 192 | case '\n': 193 | *(line-1) = 0; 194 | break; 195 | case '\r': 196 | *(line-1) = 0; 197 | 198 | if (mygetc(f)!='\n') { 199 | myungetc(f); 200 | } 201 | break; 202 | } 203 | *line = 0; 204 | return buf; 205 | } 206 | 207 | bool file_exist (const char *name) 208 | { 209 | FILE *f = fopen(name,"r"); 210 | if (f==NULL) { 211 | return false; 212 | } 213 | fclose(f); 214 | return true; 215 | } 216 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | pdfcook : A prepress preparation tool for PDF files 3 | Copyright (C) 2021-2024 Arindam Chaudhuri 4 | 5 | This program is free software; you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation; either version 2 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along 16 | with this program; if not, see . 17 | */ 18 | #include "common.h" 19 | #include "debug.h" 20 | #include "pdf_doc.h" 21 | #include "doc_edit.h" 22 | #include "cmd_exec.h" 23 | #include 24 | #include 25 | 26 | /* when no commands are provided, no used pdf objects are removed, dict filters not applied. 27 | As new single Xref table created, so /Prev entry is removed from trailer dict. */ 28 | bool repair_mode = false; 29 | 30 | 31 | char pusage[][LLEN] = { 32 | "Usage: pdfcook [] [] ... ", 33 | " -h Display this help screen", 34 | " -q --quiet Supress warning and log messages", 35 | " --fonts Show available standard font names", 36 | " -p --papers Show available paper sizes", 37 | "commands: ' ... '", 38 | "command: name(arg_1, ... arg_name=arg_value){page_range1 page_range2 ...}", 39 | "args eg. : 12, 12.0, a4, \"Helvetica\"", 40 | " 612.0 (without unit pt) or 8.5in (with unit mm,cm,in)", 41 | "list of commands :" 42 | }; 43 | 44 | static void print_help (FILE * stream, int exit_code) 45 | { 46 | fprintf(stream, "pdfcook %s\n", PROG_VERSION); 47 | for (size_t i = 0; i < sizeof(pusage) / LLEN; ++i){ 48 | fprintf(stream, "%s\n", pusage[i]); 49 | } 50 | print_cmd_info(stream); 51 | exit(exit_code); 52 | } 53 | // if an option requires argument, put a colon (:) after it in shortoptions 54 | static const char *short_options = "hqfp"; 55 | // here, in 4th column, any integer can be used instead 56 | static struct option long_options[] = { 57 | {"help", no_argument, 0, 'h'}, 58 | {"quiet", no_argument, 0, 'q'}, 59 | {"fonts", no_argument, 0, 'f'}, 60 | {"papers", no_argument, 0, 'p'}, 61 | {NULL, 0, 0, 0} 62 | }; 63 | 64 | typedef struct { 65 | int infile; 66 | int outfile; 67 | char *commands; 68 | } Conf; 69 | 70 | 71 | static void parseargs (int argc, char *argv[], Conf * conf) 72 | { 73 | conf->infile = -1; 74 | conf->outfile = -1; 75 | conf->commands = NULL; 76 | int next_opt; 77 | while ((next_opt = getopt_long(argc, argv, short_options, long_options, NULL))!= -1) { 78 | 79 | switch (next_opt) { 80 | case '?'://unknown option, or option requires argument but not provided 81 | case 'h': 82 | print_help(stderr, 1); 83 | break; 84 | case 'q': 85 | quiet_mode = 1; 86 | break; 87 | case 'f': 88 | print_font_names(); 89 | exit(1); 90 | case 'p': 91 | print_paper_sizes(); 92 | exit(1); 93 | } 94 | } 95 | // now optind is index of first non-option argument 96 | switch (argc-optind) { 97 | case 1: 98 | conf->infile = optind; 99 | break; 100 | case 2: 101 | conf->infile = optind; 102 | conf->outfile = optind + 1; 103 | repair_mode = true; 104 | break; 105 | case 3: 106 | conf->commands = argv[optind]; 107 | conf->infile = optind + 1; 108 | conf->outfile = optind + 2; 109 | break; 110 | default:// for more than 3 args, first is command, last is outfile and rest are infiles 111 | if (argc-optind>3) { 112 | conf->commands = argv[optind]; 113 | conf->infile = optind + 1; 114 | conf->outfile = argc - 1; 115 | break; 116 | } 117 | print_help(stderr, 1);//no argument 118 | break; 119 | } 120 | } 121 | 122 | 123 | bool open_document(PdfDocument &doc, char *filename) 124 | { 125 | if (not doc.open(filename)) 126 | message(FATAL, "Failed to open file '%s'", filename); 127 | 128 | if (doc.encrypted) { 129 | if (doc.decryption_supported) { 130 | printf("Enter Password : "); 131 | char pwd[128]; 132 | scanf("%s", pwd); 133 | if (!doc.decrypt(pwd)) 134 | return false; 135 | } 136 | else return false; 137 | } 138 | return true; 139 | } 140 | 141 | int main (int argc, char *argv[]) 142 | { 143 | // parse command line arguments 144 | Conf conf; 145 | parseargs(argc, argv, &conf);// if no args given, program exits here 146 | 147 | PdfDocument doc; 148 | if (not open_document(doc, argv[conf.infile])) 149 | return -1; 150 | // read all other input files (if any) and join them 151 | for (int i=conf.infile + 1; i0; i++){ 152 | PdfDocument new_doc; 153 | if (not open_document(new_doc, argv[i])) 154 | return -1; 155 | doc.mergeDocument(new_doc); 156 | } 157 | // build command tree 158 | CmdList cmd_list; 159 | MYFILE *commands = stropen(conf.commands); 160 | if (commands != NULL) { 161 | parse_commands(cmd_list, commands); 162 | myfclose(commands); 163 | // execute command tree 164 | doc_exec_commands(doc, cmd_list); 165 | } 166 | 167 | if (conf.outfile != -1){ 168 | if (not doc.save( argv[conf.outfile] )) 169 | return -1; 170 | } 171 | return 0; 172 | } 173 | 174 | 175 | -------------------------------------------------------------------------------- /pdfcook.1: -------------------------------------------------------------------------------- 1 | .TH pdfcook 1 "2021-04-10" 2 | .SH NAME 3 | pdfcook - tool for prepress preparation of PDF documents 4 | 5 | .SH SYNOPSIS 6 | .B pdfcook 7 | [OPTIONS] [\fIcommands\fR] \fIinfile\fR [infile2..] [\fIoutfile\fR] 8 | 9 | .SH DESCRIPTION 10 | .I pdfcook 11 | is used for preparing PDF documents before printing. 12 | .br 13 | It can split, join PDFs, 14 | add page numbers, text, draw lines, scale, rotate, add binding margin, 15 | arrange in 2-up, 4-up, booklet format. 16 | 17 | .SH OPTIONS 18 | .TP 19 | .B "\-q \-\-quiet" 20 | Suppress warnings 21 | .TP 22 | .B "\-p \-\-papers" 23 | Show list of available paper sizes 24 | .TP 25 | .B " \-\-fonts" 26 | Show available standard fonts 27 | 28 | .SH COMMANDS 29 | Commands follow this syntax : 30 | .RS 31 | .TP 10 32 | .I commands 33 | = "command1 command2 ..." 34 | .TP 35 | .I command 36 | = name(arg, name1=arg1 ...){page_selections} 37 | .TP 38 | .I {page_selections} 39 | space separated list of page_selection inside curly bracket. 40 | All commands support page_selections. 41 | 42 | .RE 43 | Argument types : 44 | .RS 45 | .TP 12 46 | int : 47 | an integer number (1,2, 45 etc) 48 | .TP 49 | real : 50 | a number with or without decimal point. (5, 1.2 etc) 51 | .TP 52 | measure : 53 | An integer or real followed by a unit like pt,cm,mm,in. 54 | If unit is omitted, the unit is assumed as point (pt). 55 | eg- 595, 595.0, 210mm, 21.0cm 56 | .TP 57 | id : 58 | An identifier is unquoted string. eg- a4, landscape 59 | .TP 60 | str : 61 | Double-quoted string. eg- "Page No %d" 62 | 63 | .RE 64 | page_selection : 65 | .RS 66 | .TP 67 | .B number 68 | page number 69 | .TP 70 | .B -number 71 | page from end of the list. -1 is last page 72 | .TP 73 | .B number..number 74 | 8..20 means all pages from 8 to 20 75 | .TP 76 | .B -number..number 77 | -1..5 means from last page to fifth last page 78 | .TP 79 | .B $ 80 | last page 81 | .TP 82 | .B ? 83 | odd set of pages 84 | .TP 85 | .B + 86 | even set of pages 87 | .RE 88 | 89 | 90 | List of supported commands : 91 | 92 | Commands for working with documents : 93 | 94 | .RS 95 | .TP 12 96 | .B read 97 | .I (name) 98 | 99 | Open document and merge with previous document 100 | .TP 101 | .B write 102 | .I (name) 103 | 104 | Save document by given filename 105 | 106 | .RE 107 | Commands for arrange or select pages : 108 | .RS 109 | 110 | .TP 111 | .B new 112 | .I {page_ranges} 113 | 114 | Add new pages. 115 | .br 116 | If page_ranges not provided, the page is added to end 117 | 118 | .TP 119 | .B del 120 | .I {page_ranges} 121 | 122 | Delete selected pages 123 | 124 | .TP 125 | .B select 126 | .I {page_ranges} 127 | 128 | Keep only selected pages in page_ranges. 129 | 130 | .TP 131 | .B modulo 132 | .I (step, round){page_ranges} 133 | 134 | Special type of select pages. 135 | .br 136 | This command iterate pages by given step. 137 | .br 138 | such as if step=4, index are 0, 4, 8, 12 ... 139 | .br 140 | if command is modulo(4){1 2 3} then chosen pages are, 141 | .br 142 | for index 0 -> 4*0+1, 4*0+2, 4*0+3 = 1, 2, 3 143 | .br 144 | for index 1 -> 5, 6, 7 145 | .br 146 | thus page numbers will be in 1,2,3,5,6,7,9,10,11.. order 147 | .br 148 | if command is modulo(4){-1}, selected pages are 149 | .br 150 | -(4*0+1), -(4*1+1), -(4*2+1), -(4*3+1) ... 151 | .br 152 | = -1, -5, -9, -13, ... 153 | .br 154 | 'round' option adds extra blank pages to make total page count multiple of round 155 | 156 | .TP 157 | .B book 158 | Arrange pages for printing booklets. Use nup command after this. 159 | So pages can be center-folded after printing. 160 | 161 | .TP 162 | .B nup 163 | .I (n, cols, dx, dy, paper, orient) 164 | 165 | Puts n pages in one paper. cols is number of columns. 166 | dx and dy are spacings between pages along x and y axis. 167 | 168 | .RE 169 | Command for pages' content transform: 170 | .RS 171 | .TP 10 172 | .B crop 173 | .I (paper, orient) 174 | 175 | Make outside area blank white. eg- crop(a4, portrait) 176 | 177 | .TP 178 | .B crop2 179 | .I (lx, ly, hx, hy) 180 | 181 | Crop pages to the exact size 182 | 183 | .TP 184 | .B flip 185 | .I (mode=) 186 | 187 | horizontal or vertical flip. eg- flip(v) or flip(vertical) 188 | 189 | .TP 190 | .B line 191 | .I (lx, ly, hx, hy, width) 192 | 193 | Draw line on page 194 | 195 | .TP 196 | .B matrix 197 | .I (a,b,c,d,e,f) 198 | 199 | Transform pages by 3x3 matrix [a,b,0, c,d,0, e,f,1] 200 | 201 | .TP 202 | .B move 203 | .I (x, y) 204 | 205 | Move page to right by x and to top by y 206 | 207 | .TP 208 | .B number 209 | .I (x, y, start, text, size, font) 210 | 211 | Write page numbers over pages. numbering is started from 'start' page no. 212 | eg - number(start=3) 213 | 214 | .TP 215 | .B paper 216 | .I (paper, orient) 217 | 218 | Set paper size without scaling content. 219 | 220 | .TP 221 | .B paper2 222 | .I (w, h) 223 | 224 | Sets papes size to exact width and height 225 | 226 | .TP 227 | .B rotate 228 | .I (angle) 229 | 230 | Rotate page clockwise. angle must be multiple of 90 231 | 232 | .TP 233 | .B scale 234 | .I (scale) 235 | 236 | Scale pages by specified scale factor 237 | 238 | .TP 239 | .B scaleto 240 | .I (paper, top, right, bottom, left, orient) 241 | 242 | Fit page to paper size and specified margins. eg - scaleto(a4) 243 | 244 | .TP 245 | .B scaleto2 246 | .I (w, h, top, right, bottom, left) 247 | 248 | Scale to particular width and height and margins 249 | 250 | .TP 251 | .B text 252 | .I (x, y, text, font, size) 253 | 254 | Write text on page at position (x,y) 255 | 256 | .RE 257 | 258 | 259 | .SH EXAMPLES 260 | Booklet format in A4 page 261 | .sp 262 | .ce 263 | .B book nup(2, paper=a4) 264 | .sp 265 | Select first five and last five pages from document 266 | .sp 267 | .ce 268 | .B select{1..5 -5..1} 269 | .sp 270 | Reverse pages 271 | .sp 272 | .ce 273 | .B select{$..1} 274 | .sp 275 | or 276 | .sp 277 | .ce 278 | .B modulo(1){-1} 279 | .sp 280 | Select even pages 281 | .sp 282 | .ce 283 | .B select{+} 284 | .sp 285 | Fit page to a4 landscape 286 | .sp 287 | .ce 288 | .B scaleto(a4, orient=landscape) 289 | 290 | .SH AUTHORS 291 | Arindam Chaudhuri 292 | .SH TRADEMARKS 293 | .B PDF 294 | is trademark of Adobe Systems Incorporated. 295 | 296 | 297 | -------------------------------------------------------------------------------- /src/pdf_objects.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "fileio.h" 8 | 9 | #define PDF_NAME_MAX_LEN 255 10 | #define PDF_ID_MAX_LEN 255 11 | #define XREF_ENT_LEN 18// [10 digit obj no][5 digit gen no][f or n] 12 | #define LLEN 256 13 | #define STARTXREF_OFFSET 64 // how much to seek from end to read startxref 14 | 15 | /* 16 | PDF includes eight basic types of objects: Boolean values, Integer and Real numbers, 17 | Strings, Names, Arrays, Dictionaries, Streams, and the null object. 18 | Objects may be labelled so that they can be referred to by other objects. A labelled 19 | object is called an indirect object. 20 | */ 21 | class Token; 22 | class PdfObject; 23 | class ObjectTable; 24 | 25 | typedef struct { 26 | char *data; 27 | int len;// string length excluding null character 28 | } String; 29 | 30 | enum { 31 | BYTE_STR, 32 | HEX_STR 33 | }; 34 | 35 | typedef bool BoolObj;// keyword 'true' and 'false' 36 | typedef int IntObj; 37 | typedef double RealObj;// eg. 2.0, 0.2, 2., .2, +2.0, -2.0, -2., -.2 etc 38 | typedef char* NameObj; // name starting with '/' , eg - /Page , /Count 39 | typedef String StringObj; // (abcd) or 40 | 41 | std::string pdfstr2bytes(String str, int *str_type); 42 | void bytes2pdfstr(std::string str, String &out_str, int str_type); 43 | 44 | 45 | typedef std::vector::iterator ArrayIter; 46 | 47 | class ArrayObj 48 | { 49 | public: 50 | std::vector array; 51 | 52 | int count(); 53 | PdfObject* at (int index); 54 | void append (PdfObject *item); 55 | void deleteItems(); 56 | int write (FILE *f); 57 | //allows range based for-loop 58 | ArrayIter begin(); 59 | ArrayIter end(); 60 | }; 61 | 62 | typedef std::map::iterator MapIter; 63 | // filter class used to remove unnecessary items in dict 64 | typedef std::set DictFilter; 65 | 66 | class DictObj 67 | { 68 | public: 69 | std::map dict; 70 | 71 | bool contains (std::string key); 72 | PdfObject* get (std::string key); 73 | void add (std::string key, PdfObject *val); 74 | PdfObject* newItem (std::string key);// if val exist, clear and return obj, else create new PdfObject 75 | void deleteItem (std::string key); 76 | void deleteItems(); 77 | void setDict (std::map &map); 78 | void merge (DictObj *src_dict);// hard copy new items, overwrite old items 79 | void filter (DictFilter &filter_set);// remove all objects which are not in filter_set 80 | int write (FILE *f); 81 | MapIter begin(); 82 | MapIter end(); 83 | PdfObject* operator[] (std::string key); 84 | }; 85 | 86 | 87 | class StreamObj 88 | { 89 | public: 90 | size_t begin;//pos where stream begins in file 91 | size_t len; 92 | bool decompressed; 93 | DictObj dict; 94 | char *stream; 95 | int write(FILE *f); 96 | bool decompress(); 97 | bool compress (const char *filter); 98 | 99 | StreamObj(); 100 | ~StreamObj(); 101 | }; 102 | 103 | typedef struct { 104 | int major; 105 | int minor; 106 | PdfObject *obj; 107 | } IndirectObj; 108 | 109 | 110 | 111 | typedef enum { 112 | PDF_OBJ_BOOL, PDF_OBJ_INT, PDF_OBJ_REAL, PDF_OBJ_STR, 113 | PDF_OBJ_NAME, PDF_OBJ_ARRAY, PDF_OBJ_DICT, PDF_OBJ_STREAM, 114 | PDF_OBJ_INDIRECT, PDF_OBJ_INDIRECT_REF, PDF_OBJ_NULL, PDF_OBJ_UNKNOWN 115 | } ObjectType; 116 | 117 | // can't store objects inside union that contain std::map, vector (eg. DictObj). 118 | // So storing pointers of these objects inside union. 119 | 120 | class PdfObject 121 | { 122 | public: 123 | ObjectType type; 124 | union { 125 | BoolObj boolean; 126 | IntObj integer; 127 | RealObj real; 128 | StringObj str; 129 | NameObj name; 130 | DictObj *dict;// always allocates dict 131 | ArrayObj *array;// always allocates array 132 | StreamObj *stream;// always allocates stream, may allocate stream->stream 133 | IndirectObj indirect;// always allocates indirect.obj if PDF_OBJ_INDIRECT 134 | }; 135 | PdfObject(); 136 | void setType(ObjectType obj_type); 137 | bool read (MYFILE *f, ObjectTable *xref, Token *last_tok); 138 | bool readFromString (const char *str); 139 | int write (FILE *f); 140 | int copyFrom (PdfObject *src_obj); 141 | void clear(); 142 | ~PdfObject(); 143 | }; 144 | /* iterate array of pointers like this ... 145 | for (auto item = obj->array->begin(); item != obj->array->end(); item++) { 146 | val = (*item)->integer; 147 | } 148 | */ 149 | 150 | enum { 151 | XREF_INVALID, 152 | XREF_TABLE, 153 | XREF_STREAM 154 | }; 155 | 156 | enum { 157 | FREE_OBJ, 158 | NONFREE_OBJ, 159 | COMPRESSED_OBJ, 160 | }; 161 | 162 | int getXrefType(MYFILE *f); 163 | 164 | // always obj==NULL for free obj, and never NULL for nonfree obj 165 | typedef struct { 166 | PdfObject *obj; 167 | int8_t type; // free(f), nonfree(n), compressed 168 | int major; // object no. 169 | int minor; // gen id for type 1, (always 0 for type 2) 170 | union { 171 | int offset; // offset of obj from beginning of file (type 1 only) 172 | int next_free;// obj no of next free obj (type 0 only) 173 | int obj_stm; // obj no. of object stream where obj is stored (type 2 only) 174 | }; 175 | int index;// index no. within the obj stream (for type 2) 176 | bool used; 177 | } ObjectTableItem; 178 | 179 | 180 | /* If there is any error in object table, it should be fixed while reading the table. */ 181 | 182 | class ObjectTable 183 | { 184 | public: 185 | std::vector table; 186 | 187 | int count(); 188 | void expandToFit(size_t size); 189 | int addObject (PdfObject *obj); 190 | PdfObject* getObject(int major, int minor); 191 | bool read (MYFILE *f, size_t xref_pos); 192 | bool read (PdfObject *stream, PdfObject *p_trailer); 193 | bool readObject(MYFILE *f, int major); 194 | void readObjects(MYFILE *f); 195 | void writeObjects(FILE *f); 196 | void writeXref (FILE *f); 197 | 198 | ObjectTableItem& operator[] (int index); 199 | }; 200 | 201 | 202 | /* constants defining whitespace characters */ 203 | enum { CHAR_NULL=0, CHAR_TAB=9, CHAR_LF=10, CHAR_FF=12, CHAR_CR=13, CHAR_SP=32}; 204 | 205 | /* constants identifying the token type */ 206 | typedef enum { 207 | TOK_INT, TOK_REAL, TOK_STR, TOK_NAME, TOK_BDICT, TOK_EDICT, 208 | TOK_BARRAY, TOK_EARRAY, TOK_ID, TOK_EOF, TOK_UNKNOWN 209 | } TokType; 210 | 211 | class Token 212 | { 213 | public: 214 | TokType type; 215 | double real; 216 | int integer; 217 | char name[PDF_NAME_MAX_LEN]; 218 | char id[PDF_ID_MAX_LEN]; 219 | String str; 220 | bool new_line;//if there is newline before parsed token 221 | int sign;// 1='+', -1='-', 0=no sign 222 | 223 | Token(); 224 | bool get(MYFILE *f); 225 | void freeData(); 226 | }; 227 | 228 | #define isInt(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_INT)) 229 | #define isReal(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_REAL)) 230 | #define isName(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_NAME)) 231 | #define isString(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_STR)) 232 | #define isArray(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_ARRAY)) 233 | #define isDict(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_DICT)) 234 | #define isStream(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_STREAM)) 235 | #define isRef(obj) (((obj)!=NULL) && ((obj)->type==PDF_OBJ_INDIRECT_REF)) 236 | 237 | // dereferencing a pdf object 238 | inline PdfObject* derefObject(PdfObject *obj, ObjectTable &obj_table) 239 | { 240 | while (isRef(obj)){ 241 | obj = obj_table.getObject(obj->indirect.major, obj->indirect.minor); 242 | } 243 | return obj; 244 | } 245 | -------------------------------------------------------------------------------- /src/pdf_filters.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "pdf_filters.h" 3 | #include "debug.h" 4 | #include 5 | 6 | int flate_decode_filter(char **stream, size_t *len, DictObj &dict) 7 | { 8 | if (*len==0) return 0; // in some stream dict /Length in 0 9 | // decompress stream using zlib 10 | char *buff; 11 | int predictor; 12 | PdfObject *dec_params; 13 | 14 | size_t new_stream_len = 3 * (*len); 15 | char *new_stream_content = (char *) malloc(new_stream_len); 16 | if (new_stream_content==NULL){ 17 | message(WARN, "zlib : malloc() failed !"); 18 | return -1; 19 | } 20 | _z_d_try: 21 | switch (uncompress((Bytef*)new_stream_content, (uLongf*)&new_stream_len, 22 | (Bytef*)*stream, *len)) 23 | { 24 | case Z_OK: 25 | break; 26 | case Z_BUF_ERROR: 27 | new_stream_len *= 2; 28 | buff = (char*) realloc(new_stream_content, new_stream_len); 29 | if (buff==NULL){ 30 | message(WARN, "zlib : realloc() failed !"); 31 | goto fail; 32 | } 33 | new_stream_content = buff; 34 | goto _z_d_try; 35 | case Z_DATA_ERROR: 36 | message(WARN, "zlib : invalid input data"); 37 | case Z_MEM_ERROR: 38 | default: 39 | goto fail; 40 | } 41 | // decode the decompressed stream 42 | predictor = 1; 43 | dec_params = dict["DecodeParms"]; 44 | if (dec_params) 45 | predictor = dec_params->dict->get("Predictor")->integer; 46 | // 10-15 = png filter 47 | if (predictor == 12) { 48 | int cols = dec_params->dict->get("Columns")->integer; 49 | cols++; // the leading extra byte that stores filter type 50 | int rows = new_stream_len/cols; 51 | char *row_data, *prev_row_data; 52 | char *empty_row = (char *) calloc(1,cols);; 53 | prev_row_data = empty_row; 54 | for (int row=0;row1) { 70 | message(WARN, "Unsupported FlateDecode predictor of type %d", predictor); 71 | goto fail; 72 | } 73 | free(*stream); 74 | // shrink to content size 75 | buff = (char*) realloc(new_stream_content, new_stream_len); 76 | if (buff){ 77 | new_stream_content = buff; 78 | } 79 | // new_stream_len my be zero after decompress, in that case realloc() frees memory 80 | if (!new_stream_len){ 81 | new_stream_content = NULL; 82 | } 83 | *stream = new_stream_content; 84 | *len = new_stream_len; 85 | return 0; 86 | fail: 87 | free(new_stream_content); 88 | return -1; 89 | } 90 | 91 | int zlib_compress_filter(char **stream, size_t *len, DictObj &dict) 92 | { 93 | char *buff; 94 | long new_stream_len = 2 * (*len); 95 | char *new_stream_content = (char *) malloc(new_stream_len); 96 | if (new_stream_content==NULL) 97 | return -1; 98 | try_comp: 99 | switch (compress((Bytef*)new_stream_content, (uLongf*)&new_stream_len, 100 | (Bytef*)*stream, *len)) 101 | { 102 | case Z_OK: 103 | break; 104 | case Z_BUF_ERROR: 105 | new_stream_len *= 2; 106 | buff = (char*) realloc(new_stream_content, new_stream_len); 107 | if (buff==NULL){ 108 | message(WARN, "zlib : realloc() failed !"); 109 | goto fail; 110 | } 111 | new_stream_content = buff; 112 | goto try_comp; 113 | case Z_MEM_ERROR: 114 | case Z_DATA_ERROR: 115 | default: 116 | goto fail; 117 | } 118 | free(*stream); 119 | // shrink to content size 120 | buff = (char*) realloc(new_stream_content, new_stream_len); 121 | if (buff){ 122 | new_stream_content = buff; 123 | } 124 | *stream = new_stream_content; 125 | *len = new_stream_len; 126 | return 0; 127 | fail: 128 | free(new_stream_content); 129 | return -1; 130 | } 131 | 132 | 133 | #if (HAVE_LZW) 134 | #define DICT_LEN 4096 135 | struct lzw_dict{ 136 | size_t symbol; 137 | size_t prev; 138 | size_t len; 139 | }; 140 | 141 | enum { LZW_CL_DICT = 256, LZW_END_STREAM = 257 }; 142 | 143 | static int lzw_raw_get_ch(unsigned char * buf,size_t len,size_t * index, size_t * offset,size_t length){ 144 | int out; 145 | if (*index == len){ 146 | return EOF; 147 | } 148 | out = ( (1<<(8 - *offset)) - 1) & buf[*index]; 149 | /* 150 | if (length<=(8-*offset)){ 151 | (*offset) += length; 152 | (*offset) %= 8; 153 | out &=((1<=8){ 163 | length-=8; 164 | out = (out << 8) | buf[*index]; 165 | (*index)++; 166 | if (*index == len){ 167 | return EOF; 168 | } 169 | } 170 | if (length){ 171 | out = (out << (length)) | ((buf[*index]>>(8-length)) & ((1<DICT_LEN;++i){ 186 | dict[i].symbol = 0; 187 | dict[i].prev = 0; 188 | dict[i].len = 0; 189 | } 190 | } 191 | 192 | #define lzw_add_prefix(dict,prev_word,word,d_index) do{\ 193 | if (d_index>DICT_LEN){\ 194 | message(WARN,"Bad LZW stream - expected clear-table code");\ 195 | return -1;\ 196 | }\ 197 | if ((size_t)word=0){\ 198 | lzw_dict[d_index].symbol = word;\ 199 | lzw_dict[d_index].prev = prev_word;\ 200 | lzw_dict[d_index].len = dict[prev_word].len + 1;\ 201 | prev_word=word;\ 202 | while (lzw_dict[prev_word].prev){\ 203 | prev_word = lzw_dict[prev_word].prev;\ 204 | }\ 205 | lzw_dict[d_index].symbol = prev_word;\ 206 | }\ 207 | else{\ 208 | if ((size_t)word==d_index){\ 209 | lzw_dict[d_index].prev = prev_word;\ 210 | lzw_dict[d_index].len = dict[prev_word].len + 1;\ 211 | while (lzw_dict[prev_word].prev){\ 212 | prev_word = lzw_dict[prev_word].prev;\ 213 | }\ 214 | lzw_dict[d_index].symbol = prev_word;\ 215 | }\ 216 | else{\ 217 | message(WARN,"Bad LZW stream - unexpected code");\ 218 | return -1;\ 219 | }\ 220 | }\ 221 | ++d_index;\ 222 | switch (d_index + early){\ 223 | case 512:\ 224 | w_size=10;\ 225 | break;\ 226 | case 1024:\ 227 | w_size=11;\ 228 | break;\ 229 | case 2048:\ 230 | w_size=12;\ 231 | break;\ 232 | }\ 233 | }while(0) 234 | 235 | static void lzw_put_prefix(int word, struct lzw_dict dict[DICT_LEN], char ** out, int * len, int * index){ 236 | char * tmp; 237 | if ( word>DICT_LEN || word<0 || dict[word].len<=0){ 238 | return; 239 | } 240 | if (dict[word].len == 1){ 241 | if (*len == *index){ 242 | *len *= 2; 243 | tmp = (char*) realloc(*out, *len); 244 | if (tmp==NULL){ 245 | message(FATAL, "realloc() failed !"); 246 | } 247 | *out = tmp; 248 | } 249 | 250 | (*out)[*index] = dict[word].symbol; 251 | (*index)++; 252 | } 253 | else{ 254 | lzw_put_prefix(dict[word].prev,dict,out,len,index); 255 | lzw_put_prefix(dict[word].symbol,dict,out,len,index); 256 | } 257 | } 258 | 259 | int lzw_decompress_filter(char **stream, size_t *len, DictObj &dict) 260 | { 261 | if (len==0) 262 | return 0; 263 | struct lzw_dict lzw_dict[DICT_LEN +1]; 264 | size_t index, offset, w_size, d_index = LZW_END_STREAM + 1; 265 | int word; 266 | int prev_word = LZW_CL_DICT; 267 | int out_index = 0; 268 | int early = 1; 269 | int out_len = 3 * (*len); 270 | 271 | char *out_buf = (char *) malloc(out_len); 272 | if (out_buf==NULL){ 273 | message(WARN, "lzw : malloc() failed !"); 274 | return -1; 275 | } 276 | 277 | PdfObject *early_val = dict["EarlyChange"]; 278 | if (early_val!=NULL && early_val->type == PDF_OBJ_INT){ 279 | early = early_val->integer; 280 | } 281 | 282 | index = 0; 283 | w_size = 9; 284 | 285 | do { 286 | offset = 0; 287 | word = lzw_raw_get_ch((unsigned char *)*stream,*len,&index,&offset,w_size); 288 | } while(word != EOF && word != LZW_CL_DICT); 289 | 290 | if (word == EOF){ 291 | message(WARN, "lzw : reached EOF !"); 292 | free(out_buf); 293 | return -1; 294 | } 295 | 296 | do { 297 | if (word==LZW_CL_DICT){ 298 | lzw_clear_dict(lzw_dict,256); 299 | d_index = LZW_END_STREAM + 1; 300 | w_size=9; 301 | } 302 | else{ 303 | if (prev_word!=LZW_CL_DICT){ 304 | lzw_add_prefix(lzw_dict,prev_word,word,d_index); 305 | } 306 | lzw_put_prefix(word,lzw_dict,&out_buf,&out_len,&out_index); 307 | } 308 | prev_word = word; 309 | word = lzw_raw_get_ch((unsigned char *)*stream,*len,&index,&offset,w_size); 310 | } while(word != EOF && word != LZW_END_STREAM); 311 | 312 | *len = out_index; 313 | free(*stream); 314 | *stream = out_buf; 315 | return 0; 316 | } 317 | #endif 318 | 319 | /*filter mapping for decompression*/ 320 | stream_filters _decompress_filters[]= { 321 | {"FlateDecode", flate_decode_filter}, 322 | {"LZWDecode", lzw_decompress_filter}, 323 | {"ASCII85Decode", NULL}, 324 | {"DCTDecode", NULL}, 325 | {"RunLengthDecode", NULL}, 326 | {"CCITTFaxDecode", NULL}, 327 | {"JBIG2Decode", NULL}, 328 | {"JPXDecode", NULL}, 329 | {"Crypt", NULL} 330 | }; 331 | /*filter mapping for compressions*/ 332 | stream_filters _compress_filters[] = { 333 | {"FlateDecode", zlib_compress_filter}, 334 | {"LZWDecode", NULL}, 335 | {"ASCII85Decode", NULL}, 336 | {"DCTDecode", NULL}, 337 | {"RunLengthDecode", NULL}, 338 | {"CCITTFaxDecode", NULL}, 339 | {"JBIG2Decode", NULL}, 340 | {"JPXDecode", NULL}, 341 | {"Crypt", NULL} 342 | }; 343 | 344 | int apply_filter(const char *name, char **stream, size_t *len, DictObj &dict, stream_filters *filters, size_t f_len) 345 | { 346 | for (size_t i=0; i// sort() 6 | 7 | PageRange:: PageRange () { 8 | type = PAGE_SET_ALL; 9 | begin = 1; end = -1; negative = false; 10 | } 11 | PageRange:: PageRange (PageSetType _type) : PageRange() { 12 | type = _type; 13 | } 14 | PageRange:: PageRange (int begin, int end, bool neg) : begin(begin), end(end), negative(neg) { 15 | type = PAGE_SET_RANGE; 16 | } 17 | 18 | //------------------ List of PageRange object ------------------------- 19 | 20 | void PageRanges:: append (PageRange range) { 21 | array.push_back(range); 22 | } 23 | 24 | void PageRanges:: initPageNums (int max_page_num) 25 | { 26 | for (PageRange range : array) { 27 | switch (range.type) { 28 | case PAGE_SET_RANGE: 29 | if (range.negative) { 30 | range.begin *= -1; 31 | range.end *= -1; 32 | } 33 | if (range.begin < 0) {// converts -1 to last page no 34 | range.begin = max_page_num + range.begin + 1; 35 | } 36 | if (range.end < 0) { 37 | range.end = max_page_num + range.end + 1; 38 | } 39 | if (range.begin <= range.end) {// eg. -> 1..5 or 4..4 40 | for (int i=range.begin; i<=range.end /*&& i<=max_page_num*/; i++) 41 | page_num_array.push_back(i); 42 | } 43 | else if (range.begin<=max_page_num) {// eg. -> 5..1 44 | for (int i=range.begin; i>=range.end; i--) 45 | page_num_array.push_back(i); 46 | } 47 | break; 48 | case PAGE_SET_ODD: 49 | for (int i=1; i<=max_page_num; i+=2) 50 | page_num_array.push_back(i); 51 | break; 52 | case PAGE_SET_EVEN: 53 | for (int i=2; i<=max_page_num; i+=2) 54 | page_num_array.push_back(i); 55 | break; 56 | case PAGE_SET_ALL: 57 | default: 58 | for (int i=1; i<=max_page_num; i++) 59 | page_num_array.push_back(i); 60 | } 61 | } 62 | } 63 | 64 | static bool sort_func (int i, int j) { return i pg_list.count()) 101 | return false; 102 | PdfPage page = pg_list[page_num-1]; 103 | doc.page_list.append(page); 104 | } 105 | return true; 106 | } 107 | 108 | bool doc_pages_number (PdfDocument &doc, PageRanges &pages, 109 | int x, int y, int start, const char *text, int size, const char *font_name) 110 | { 111 | start -= 1;// this will help to calc page number to print 112 | Point poz; 113 | char *str; 114 | // make sure that given text contains a %d, which is replaced by page number 115 | const char *p1 = strstr(text, "%d");// find %d in the given text 116 | const char *p2 = strstr(text, "%");// first % is followed by d 117 | if (p1==NULL || p1!=p2){ 118 | message(ERROR, "text does not contain %%d"); 119 | return false; 120 | } 121 | p2 = strstr(p1+1, "%");// no other % character in string 122 | if (p2!=NULL){ 123 | return false; 124 | } 125 | Font font = doc.newFontObject(font_name); 126 | 127 | for (int page_num : pages) { 128 | if (page_num-start<1) 129 | continue; 130 | PdfPage &page = doc.page_list[page_num-1];// page index = page_num -1 131 | Rect page_size = page.pageSize(); 132 | poz.x = (x!=-1) ? page_size.left.x +x : 133 | page_size.left.x + (page_size.right.x - page_size.left.x)/2; 134 | poz.y = (y!=-1) ? page_size.left.y + y : page_size.left.y + size+10; 135 | asprintf(&str, text, page_num-start); 136 | page.drawText(str, poz, size, font); 137 | free(str); 138 | } 139 | return true; 140 | } 141 | 142 | bool doc_pages_text (PdfDocument &doc, PageRanges &pages, 143 | int x, int y, const char *text, int size, const char *font_name) 144 | { 145 | Point poz; 146 | Font font = doc.newFontObject(font_name); 147 | 148 | for (int page_num : pages) { 149 | PdfPage &page = doc.page_list[page_num-1];// page index = page_num -1 150 | Rect page_size = page.pageSize(); 151 | poz.x = page_size.left.x + x; 152 | poz.y = page_size.left.y + y; 153 | page.drawText(text, poz, size, font); 154 | } 155 | return true; 156 | } 157 | 158 | bool doc_pages_crop (PdfDocument &doc, PageRanges &pages, Rect crop_area) 159 | { 160 | for (int page_num : pages){ 161 | PdfPage &page = doc.page_list[page_num-1]; 162 | page.crop(crop_area); 163 | } 164 | return true; 165 | } 166 | 167 | bool doc_pages_transform(PdfDocument &doc, PageRanges &pages, Matrix mat) 168 | { 169 | for (int page_num : pages) { 170 | PdfPage &page = doc.page_list[page_num-1];// page index = page_num -1 171 | page.transform(mat); 172 | } 173 | return true; 174 | } 175 | 176 | bool doc_pages_translate(PdfDocument &doc, PageRanges &pages, float x, float y) 177 | { 178 | Rect paper; 179 | 180 | Matrix matrix; 181 | matrix.translate(x,y); 182 | 183 | for (int page_num : pages) { 184 | PdfPage &page = doc.page_list[page_num-1]; 185 | Rect page_size = page.pageSize(); 186 | // this transforms page content, paper size 187 | page.transform(matrix); 188 | // we dont want to transform paper so restoring it 189 | page.paper = page_size; 190 | } 191 | return true; 192 | } 193 | 194 | bool doc_pages_scaleto (PdfDocument &doc, PageRanges &pages, Rect paper, 195 | float top, float right, float bottom, float left)//margins 196 | { 197 | double scale, scale_x, scale_y; 198 | double move_x, move_y; 199 | double old_page_w, old_page_h, avail_w, avail_h; 200 | 201 | Rect bbox = paper; 202 | bbox.right.x -= right; 203 | bbox.right.y -= top; 204 | bbox.left.x += left; 205 | bbox.left.y += bottom; 206 | // available width and height inside margin 207 | avail_w = bbox.right.x - bbox.left.x; 208 | avail_h = bbox.right.y - bbox.left.y; 209 | 210 | for (int page_num : pages){ 211 | PdfPage &page = doc.page_list[page_num-1]; 212 | Rect page_size = page.pageSize(); 213 | // using paper size instead of bounding box size, because viewers show paper 214 | // size as page size, and you dont see the bounding box rect in a viewer 215 | old_page_w = page_size.right.x - page_size.left.x; 216 | old_page_h = page_size.right.y - page_size.left.y; 217 | // get scale value to fit inside margin of new page 218 | scale_x = avail_w / old_page_w; 219 | scale_y = avail_h / old_page_h; 220 | scale = MIN(scale_x, scale_y); 221 | // adjust for new margin 222 | move_x = bbox.left.x; 223 | move_y = bbox.left.y; 224 | // adjust to fit center 225 | move_x += (avail_w - (scale * old_page_w)) / 2; 226 | move_y += (avail_h - (scale * old_page_h)) / 2; 227 | // adjust in case, old paper bottom left is not (0,0) 228 | // as the old page is scaled, dimension is also scaled 229 | move_x -= scale*page_size.left.x; 230 | move_y -= scale*page_size.left.y; 231 | 232 | Matrix matrix; 233 | matrix.scale(scale); 234 | matrix.translate(move_x, move_y); 235 | 236 | page.transform(matrix); 237 | page.paper = paper; 238 | } 239 | return true; 240 | } 241 | 242 | bool doc_pages_set_paper_size (PdfDocument &doc, PageRanges &pages, Rect paper) 243 | { 244 | for (int page_num : pages){ 245 | PdfPage &page = doc.page_list[page_num-1]; 246 | page.paper = paper; 247 | } 248 | return true; 249 | } 250 | 251 | 252 | typedef struct { 253 | std::string name; 254 | float width; 255 | float height; 256 | } PaperSize; 257 | 258 | /* list of supported paper sizes. 259 | all format names must be in lowercase. 260 | sizes are in points (1/72 inch) 261 | */ 262 | static std::list paper_sizes({ 263 | { "a0", 2382, 3369 }, // 84.1cm * 118.8cm 264 | { "a1", 1684, 2382 }, // 59.4cm * 84.1cm 265 | { "a2", 1191, 1684 }, // 42cm * 59.4cm 266 | { "a3", 842, 1191 }, // 29.7cm * 42cm 267 | { "a4", 595, 842 }, // 21cm * 29.7cm 268 | { "a5", 421, 595 }, // 14.85cm * 21cm 269 | { "a6", 297, 420 }, // 10.5cm * 14.85 cm 270 | { "a7", 210, 297 }, // 7.4cm * 10.5cm 271 | { "a8", 148, 210 }, // 5.2cm * 7.4cm 272 | { "a9", 105, 148 }, // 3.7cm * 5.2cm 273 | { "a10", 73, 105 }, // 2.6cm * 3.7cm 274 | { "b0", 2835, 4008 }, // 100cm * 141.4cm 275 | { "b1", 2004, 2835 }, // 70.7cm * 100cm 276 | { "b2", 1417, 2004 }, // 50cm * 70.7cm 277 | { "b3", 1001, 1417 }, // 35.3cm * 50cm 278 | { "b4", 709, 1001 }, // 25cm * 35.3cm 279 | { "b5", 499, 709 }, // 17.6cm * 25cm 280 | { "b6", 354, 499 }, // 12.5cm * 17.6cm 281 | { "jisb0", 2920, 4127 },// 103.0cm * 145.6cm 282 | { "jisb1", 2064, 2920 },// 72.8cm * 103.0cm 283 | { "jisb2", 1460, 2064 },// 51.5cm * 72.8cm 284 | { "jisb3", 1032, 1460 },// 36.4cm * 51.5cm 285 | { "jisb4", 729, 1032 },// 25.7cm * 36.4cm 286 | { "jisb5", 516, 729 },// 18.2cm * 25.7cm 287 | { "jisb6", 363, 516 },// 12.8cm * 18.2cm 288 | { "c0", 2599, 3677 }, // 91.7cm * 129.7cm 289 | { "c1", 1837, 2599 }, // 64.8cm * 91.7cm 290 | { "c2", 1298, 1837 }, // 45.8cm * 64.8cm 291 | { "c3", 918, 1298 }, // 32.4cm * 45.8cm 292 | { "c4", 649, 918 }, // 22.9cm * 32.4cm 293 | { "c5", 459, 649 }, // 16.2cm * 22.9cm 294 | { "c6", 323, 459 }, // 11.4cm * 16.2cm 295 | { "ledger", 1224, 792 }, // 17in * 11in 296 | { "tabloid", 792, 1224 }, // 11in * 17in 297 | { "letter", 612, 792 }, // 8.5in * 11in 298 | { "halfletter",396, 612 }, // 5.5in * 8.5in 299 | { "statement", 396, 612 }, // 5.5in * 8.5in 300 | { "legal", 612, 1008 }, // 8.5in * 14in 301 | { "executive", 540, 720 }, // 7.6in * 10in 302 | { "folio", 612, 936 }, // 8.5in * 13in 303 | { "quarto", 610, 780 }, // 8.5in * 10.83in 304 | { "10x14", 720, 1008 }, // 10in * 14in 305 | { "arche", 2592, 3456 }, // 34in * 44in 306 | { "archd", 1728, 2592 }, // 22in * 34in 307 | { "archc", 1296, 1728 }, // 17in * 22in 308 | { "archb", 864, 1296 }, // 11in * 17in 309 | { "archa", 648, 864 }, // 8.5in * 11in 310 | { "flsa", 612, 936 }, // 8.5in * 13in (U.S. foolscap) 311 | { "flse", 612, 936 } // 8.5in * 13in (European foolscap) 312 | }); 313 | 314 | // add user defined paper size 315 | bool add_new_paper_size (std::string name, float w, float h) 316 | { 317 | transform(name.begin(), name.end(), name.begin(), ::tolower); 318 | PaperSize new_size = {name, w, h}; 319 | paper_sizes.push_front(new_size); 320 | return true; 321 | } 322 | 323 | bool set_paper_from_name(Rect &paper, std::string name, Orientation orientation) 324 | { 325 | transform(name.begin(), name.end(), name.begin(), ::tolower); 326 | 327 | for (auto &paper_size : paper_sizes) { 328 | if (paper_size.name == name) { 329 | paper.left = Point(0, 0); 330 | paper.right = Point(paper_size.width, paper_size.height); 331 | paper_set_orientation(paper, orientation); 332 | return true; 333 | } 334 | } 335 | return false; 336 | } 337 | 338 | void paper_set_orientation (Rect &paper, Orientation orientation) 339 | { 340 | // switch width & height if landscape is required 341 | if ( (orientation==ORIENT_PORTRAIT && paper.isLandscape()) 342 | or (orientation==ORIENT_LANDSCAPE && (not paper.isLandscape())) ) { 343 | paper.right = Point(paper.right.y, paper.right.x); 344 | } 345 | } 346 | 347 | void print_paper_sizes() 348 | { 349 | for (auto &paper_size : paper_sizes) { 350 | fprintf(stderr, "%s\n", paper_size.name.c_str()); 351 | } 352 | } 353 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /src/crypt.cpp: -------------------------------------------------------------------------------- 1 | #include "crypt.h" 2 | #include "debug.h" 3 | #include "common.h" 4 | #include 5 | 6 | static uchar padding_arr[] = { 7 | 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 8 | 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08, 9 | 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 10 | 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A 11 | }; 12 | static char *padding_str = (char*)padding_arr; 13 | 14 | Crypt:: Crypt() 15 | { 16 | version = 1;// only 1 and 2 are supported 17 | revision = 3; 18 | keylen = 5; 19 | O = ""; 20 | U = ""; 21 | perm = 0; 22 | id0 = ""; 23 | } 24 | 25 | bool 26 | Crypt:: decryptionSupported() 27 | { 28 | return ((version>=1 && version<=2) and (keylen>=5 && keylen<=16) and 29 | perm!=0 and O.size()==32 and id0.size()==16); 30 | } 31 | 32 | 33 | bool 34 | Crypt:: getEncryptionInfo(PdfObject *encrypt_dict, PdfObject *p_trailer) 35 | { 36 | if (!encrypt_dict || !p_trailer) 37 | return false; 38 | int str_type; 39 | PdfObject *obj = encrypt_dict->dict->get("Filter"); 40 | if (obj && obj->type==PDF_OBJ_NAME && strcmp(obj->name, "Standard")!=0){ 41 | debug("error : unsupported Encrypt filter '%s'", obj->name); 42 | return false; 43 | } 44 | obj = encrypt_dict->dict->get("V"); 45 | if (isInt(obj)){ 46 | this->version = obj->integer; 47 | } 48 | obj = encrypt_dict->dict->get("R"); 49 | if (isInt(obj)){ 50 | this->revision = obj->integer; 51 | } 52 | obj = encrypt_dict->dict->get("Length"); 53 | if (isInt(obj)){ 54 | this->keylen = obj->integer/8;// converting bits to bytes 55 | } 56 | obj = encrypt_dict->dict->get("U"); 57 | if (obj){ 58 | if (obj->type==PDF_OBJ_STR) { 59 | this->U = pdfstr2bytes(obj->str, &str_type); 60 | } 61 | else { 62 | debug("error : Encrypt dict /U entry is not string obj"); 63 | return false; 64 | } 65 | } 66 | obj = encrypt_dict->dict->get("O"); 67 | if (isString(obj)){ 68 | this->O = pdfstr2bytes(obj->str, &str_type); 69 | if (this->O.size()!=32){ 70 | debug("error : Encrypt dict /O entry size is not 32"); 71 | return false; 72 | } 73 | } 74 | else { 75 | debug("error : Encrypt dict does not have valid /O entry"); 76 | return false; 77 | } 78 | obj = encrypt_dict->dict->get("P"); 79 | if (isInt(obj)){ 80 | this->perm = obj->integer; 81 | } 82 | // if any previous fails, id0 val will be empty 83 | obj = p_trailer->dict->get("ID"); 84 | if (isArray(obj) && obj->array->count()==2) { 85 | PdfObject *id_obj = obj->array->at(0); 86 | this->id0 = pdfstr2bytes(id_obj->str, &str_type); 87 | } 88 | else { 89 | debug("error : failed to get trailer ID for decryption"); 90 | return false; 91 | } 92 | return true; 93 | } 94 | 95 | bool 96 | Crypt:: authenticateUserPassword(std::string password) 97 | { 98 | // Using algorithm 3.2 (PDF 1.4) 99 | // pad or truncate entered password to exactly 32 bytes 100 | std::string pwd = password + std::string(padding_str, 32); 101 | pwd.resize(32); 102 | pwd += O; // append /O entry from Encrypt dictionary 103 | pwd.append((char*)&perm, 4);// append /P entry as 4 byte little-endian integer 104 | pwd += id0;// append first character from trailer /ID entry 105 | MD5 hash(pwd); 106 | if (revision==3){ 107 | for (int i=0; i<50; i++){ 108 | hash = MD5(std::string((char*)hash.digest, keylen)); 109 | } 110 | } 111 | encryption_key = std::string((char*)hash.digest, keylen); 112 | 113 | if (U.empty()) 114 | return true; 115 | 116 | if (revision==2){ 117 | char tmp_U[32]; 118 | memcpy(tmp_U, padding_str, 32); 119 | RC4 rc4(encryption_key); 120 | rc4.crypt((uchar*)tmp_U, 32); 121 | if (strncmp(tmp_U, U.data(), 32)==0) 122 | return true; 123 | } 124 | else if (revision==3) { 125 | std::string str(padding_str, 32); 126 | str += id0; 127 | MD5 hash(str); 128 | RC4 rc4(encryption_key); 129 | rc4.crypt(hash.digest, 16); 130 | 131 | char rc4_key[128]; 132 | for (int i=1; i<=19; i++) { 133 | for (int j=0; j=0; i--){ 176 | for (int j=0; jtype){ 192 | case PDF_OBJ_STR: 193 | if (obj->str.len>0 && obj->str.data!=NULL){ 194 | int str_type; 195 | std::string str = pdfstr2bytes(obj->str, &str_type); 196 | char *str_data = (char*) malloc2(str.size()); 197 | memcpy(str_data, str.data(), str.size()); 198 | rc4.crypt((uchar*)str_data, str.size()); 199 | bytes2pdfstr(std::string(str_data, str.size()), obj->str, str_type); 200 | } 201 | return; 202 | case PDF_OBJ_ARRAY: 203 | for (auto child : *obj->array) { 204 | decryptObject(child, key); 205 | } 206 | return; 207 | case PDF_OBJ_DICT: 208 | for (auto it : *obj->dict){ 209 | decryptObject(it.second, key); 210 | } 211 | return; 212 | case PDF_OBJ_STREAM: 213 | if (obj->stream->len>0 && obj->stream->stream){ 214 | rc4.crypt((uchar*)obj->stream->stream, obj->stream->len); 215 | } 216 | for (auto it : obj->stream->dict){ 217 | decryptObject(it.second, key); 218 | } 219 | return; 220 | default: 221 | return; 222 | } 223 | } 224 | 225 | void 226 | Crypt:: decryptIndirectObject(PdfObject *obj, int obj_no, int gen_no) 227 | { 228 | std::string key_str = encryption_key; 229 | key_str.append((char*)&obj_no, 3); 230 | key_str.append((char*)&gen_no, 2); 231 | int rc4key_len = key_str.size() > 16 ? 16 : key_str.size(); 232 | MD5 hash(key_str); 233 | key_str = std::string((char*)hash.digest, rc4key_len); 234 | decryptObject(obj, key_str); 235 | } 236 | 237 | 238 | // ****************** ARC4 Algorithm Class ***************** 239 | 240 | #define swap_byte(x,y) t = *(x); *(x) = *(y); *(y) = t 241 | 242 | 243 | RC4:: RC4(std::string key) 244 | { 245 | uchar t; 246 | int keylen = key.size(); 247 | 248 | for (short i=0; i<256; i++) 249 | init_state[i] = i; 250 | 251 | for (short i=0, j=0; i<256; i++) { 252 | j = (j + init_state[i] + key[i%keylen]) % 256; 253 | swap_byte(&init_state[i], &init_state[j]); 254 | } 255 | } 256 | 257 | void 258 | RC4:: crypt(uchar *data, int len) 259 | { 260 | uchar state[256]; 261 | for (int i=0; i<256; i++){ 262 | state[i] = init_state[i]; 263 | } 264 | uchar t, xorIndex, x=0, y=0; 265 | 266 | for (int i=0; i> (32-n)); 350 | } 351 | 352 | // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. 353 | // Rotation is separate from addition to prevent recomputation. 354 | inline void MD5::FF(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac) { 355 | a = rotate_left(a+ F(b,c,d) + x + ac, s) + b; 356 | } 357 | 358 | inline void MD5::GG(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac) { 359 | a = rotate_left(a + G(b,c,d) + x + ac, s) + b; 360 | } 361 | 362 | inline void MD5::HH(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac) { 363 | a = rotate_left(a + H(b,c,d) + x + ac, s) + b; 364 | } 365 | 366 | inline void MD5::II(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac) { 367 | a = rotate_left(a + I(b,c,d) + x + ac, s) + b; 368 | } 369 | 370 | 371 | // default ctor, just initailize 372 | MD5::MD5() 373 | { 374 | init(); 375 | } 376 | 377 | // nifty shortcut ctor, compute MD5 for string and finalize it right away 378 | MD5::MD5(const std::string &text) 379 | { 380 | init(); 381 | update(text.data(), text.length()); 382 | finalize(); 383 | } 384 | 385 | void MD5::init() 386 | { 387 | finalized=false; 388 | 389 | count[0] = 0; 390 | count[1] = 0; 391 | // load magic initialization constants. 392 | state[0] = 0x67452301; 393 | state[1] = 0xefcdab89; 394 | state[2] = 0x98badcfe; 395 | state[3] = 0x10325476; 396 | } 397 | 398 | 399 | // decodes input (unsigned char) into output (uint4). Assumes len is a multiple of 4. 400 | void MD5::decode(uint4 output[], const uint1 input[], size_t len) 401 | { 402 | for (unsigned int i = 0, j = 0; j < len; i++, j += 4) { 403 | output[i] = ((uint4)input[j]) | (((uint4)input[j+1]) << 8) | 404 | (((uint4)input[j+2]) << 16) | (((uint4)input[j+3]) << 24); 405 | } 406 | } 407 | 408 | 409 | // encodes input (uint4) into output (unsigned char). Assumes len is 410 | // a multiple of 4. 411 | void MD5::encode(uint1 output[], const uint4 input[], size_t len) 412 | { 413 | for (size_t i = 0, j = 0; j < len; i++, j += 4) { 414 | output[j] = input[i] & 0xff; 415 | output[j+1] = (input[i] >> 8) & 0xff; 416 | output[j+2] = (input[i] >> 16) & 0xff; 417 | output[j+3] = (input[i] >> 24) & 0xff; 418 | } 419 | } 420 | 421 | 422 | // apply MD5 algo on a block 423 | void MD5::transform(const uint1 block[blocksize]) 424 | { 425 | uint4 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; 426 | decode (x, block, blocksize); 427 | 428 | /* Round 1 */ 429 | FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ 430 | FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ 431 | FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ 432 | FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ 433 | FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ 434 | FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ 435 | FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ 436 | FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ 437 | FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ 438 | FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ 439 | FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ 440 | FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ 441 | FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ 442 | FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ 443 | FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ 444 | FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ 445 | 446 | /* Round 2 */ 447 | GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ 448 | GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ 449 | GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ 450 | GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ 451 | GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ 452 | GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ 453 | GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ 454 | GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ 455 | GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ 456 | GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ 457 | GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ 458 | GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ 459 | GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ 460 | GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ 461 | GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ 462 | GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ 463 | 464 | /* Round 3 */ 465 | HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ 466 | HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ 467 | HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ 468 | HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ 469 | HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ 470 | HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ 471 | HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ 472 | HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ 473 | HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ 474 | HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ 475 | HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ 476 | HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ 477 | HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ 478 | HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ 479 | HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ 480 | HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ 481 | 482 | /* Round 4 */ 483 | II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ 484 | II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ 485 | II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ 486 | II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ 487 | II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ 488 | II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ 489 | II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ 490 | II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ 491 | II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ 492 | II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ 493 | II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ 494 | II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ 495 | II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ 496 | II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ 497 | II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ 498 | II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ 499 | 500 | state[0] += a; 501 | state[1] += b; 502 | state[2] += c; 503 | state[3] += d; 504 | 505 | // Zeroize sensitive information. 506 | memset(x, 0, sizeof x); 507 | } 508 | 509 | 510 | // MD5 block update operation. Continues an MD5 message-digest 511 | // operation, processing another message block 512 | void MD5::update(const unsigned char input[], size_t length) 513 | { 514 | // compute number of bytes mod 64 515 | size_t index = count[0] / 8 % blocksize; 516 | 517 | // Update number of bits 518 | if ((count[0] += (length << 3)) < (length << 3)) 519 | count[1]++; 520 | count[1] += (length >> 29); 521 | 522 | // number of bytes we need to fill in buffer 523 | size_t firstpart = 64 - index; 524 | 525 | size_t i; 526 | 527 | // transform as many times as possible. 528 | if (length >= firstpart) { 529 | // fill buffer first, transform 530 | memcpy(&buffer[index], input, firstpart); 531 | transform(buffer); 532 | // transform chunks of blocksize (64 bytes) 533 | for (i = firstpart; i + blocksize <= length; i += blocksize) 534 | transform(&input[i]); 535 | 536 | index = 0; 537 | } 538 | else 539 | i = 0; 540 | 541 | // buffer remaining input 542 | memcpy(&buffer[index], &input[i], length-i); 543 | } 544 | 545 | 546 | // for convenience provide a verson with signed char 547 | void MD5::update(const char input[], size_t length) 548 | { 549 | update((const unsigned char*)input, length); 550 | } 551 | 552 | 553 | // MD5 finalization. Ends an MD5 message-digest operation, writing the 554 | // the message digest and zeroizing the context. 555 | MD5& MD5::finalize() 556 | { 557 | static unsigned char padding[64] = { 558 | 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 559 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 561 | }; 562 | 563 | if (!finalized) { 564 | // Save number of bits 565 | unsigned char bits[8]; 566 | encode(bits, count, 8); 567 | 568 | // pad out to 56 mod 64. 569 | size_t index = count[0] / 8 % 64; 570 | size_t padLen = (index < 56) ? (56 - index) : (120 - index); 571 | update(padding, padLen); 572 | 573 | // Append length (before padding) 574 | update(bits, 8); 575 | 576 | // Store state in digest 577 | encode(digest, state, 16); 578 | 579 | // Zeroize sensitive information. 580 | memset(buffer, 0, sizeof buffer); 581 | memset(count, 0, sizeof count); 582 | 583 | finalized=true; 584 | } 585 | return *this; 586 | } 587 | 588 | -------------------------------------------------------------------------------- /src/pdf_doc.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "common.h" 3 | #include "pdf_doc.h" 4 | #include "debug.h" 5 | #include 6 | 7 | static void updateRefs(PdfDocument &doc); 8 | static void deleteUnusedObjects(PdfDocument &doc); 9 | 10 | static DictFilter trailer_filter({ "Size", "Root", "ID"}); 11 | static DictFilter catalog_filter({ "Pages", "Type"}); 12 | static DictFilter page_filter({ "Type", "Parent", "Resources", "Contents" }); 13 | static DictFilter xobject_filter({ "Type", "Subtype", "FormType", "BBox", "Resources", "Length", "Filter"}); 14 | 15 | // These standard 14 font names are supported by all PDF viewers 16 | static std::set standard_fonts({ 17 | "Times-Roman", "Times-Bold", "Times-Italic", "Times-BoldItalic", 18 | "Helvetica", "Helvetica-Bold", "Helvetica-Oblique", "Helvetica-BoldOblique", 19 | "Courier", "Courier-Bold", "Courier-Oblique", "Courier-BoldOblique", 20 | "Symbol", "ZapfDingbats" 21 | }); 22 | 23 | void print_font_names() 24 | { 25 | fprintf(stderr, "Standard 14 Fonts :\n"); 26 | for (auto font : standard_fonts) { 27 | fprintf(stderr, " %s\n", font.c_str()); 28 | } 29 | } 30 | 31 | void PageList:: append(PdfPage &page) { 32 | array.push_back(page); 33 | } 34 | void PageList:: remove(int index) 35 | { 36 | array.erase(array.begin()+index); 37 | } 38 | void PageList:: clear() 39 | { 40 | array.clear(); 41 | } 42 | 43 | int PageList:: count() { 44 | return array.size(); 45 | } 46 | PageIter PageList:: begin() { 47 | return array.begin(); 48 | } 49 | PageIter PageList:: end() { 50 | return array.end(); 51 | } 52 | 53 | 54 | PdfDocument:: PdfDocument() 55 | { 56 | trailer = new PdfObject(); 57 | trailer->setType(PDF_OBJ_DICT); 58 | // set default paper size 59 | //paper.right = Point(595, 842); 60 | //cropbox = paper; 61 | encrypted = false; 62 | have_encrypt_info = false; 63 | decryption_supported = false; 64 | } 65 | 66 | PdfDocument:: ~PdfDocument() 67 | { 68 | page_list.clear(); 69 | int count = obj_table.count(); 70 | for (int i=0; iv_major = major; 100 | this->v_minor = minor; 101 | return true; 102 | } 103 | 104 | bool PdfDocument:: getPdfTrailer (MYFILE *f, char *line, long offset) 105 | { 106 | // read from end of file and find last xref offset 107 | if (offset==-1){ 108 | int i, n, c; 109 | char *p; 110 | char buff[STARTXREF_OFFSET + 1]; 111 | 112 | if (myfseek(f, -1*STARTXREF_OFFSET, SEEK_END)==-1){ 113 | message(ERROR, "Seek error"); 114 | return false; 115 | } 116 | for (n=0; n= 0; --i) { 122 | if (!strncmp(buff+i, "startxref", 9)) 123 | break; 124 | } 125 | if (i < 0) { 126 | message(FATAL,"'startxref' not found"); 127 | return false; 128 | } 129 | for (p = buff+i+9; isspace(*p); p++); 130 | offset = atol(p); 131 | } 132 | myfseek(f, offset, SEEK_SET); 133 | 134 | int xref_type = getXrefType(f); 135 | if (xref_type==XREF_INVALID){ 136 | message(ERROR, "failed to determine xref type"); 137 | return false; 138 | } 139 | 140 | if (xref_type==XREF_TABLE){ 141 | if (not obj_table.read(f, offset)){ 142 | message(FATAL,"xreftable read error"); 143 | } 144 | // skip trailer keyword 145 | long fpos; 146 | do { 147 | fpos = myftell(f); 148 | if (myfgets(line,LLEN,f)==NULL){ 149 | message(ERROR, "'trailer' keyword not found"); 150 | return false; 151 | } 152 | } 153 | while (!starts(line,"trailer")); 154 | // some pdfs may have space after trailer keyword instead of newline 155 | // set seek pos just after trailer keyword 156 | myfseek(f, fpos+7, SEEK_SET); 157 | } 158 | // read trailer dictionary 159 | PdfObject content; 160 | if (not content.read(f, NULL, NULL)) { 161 | message(FATAL, "Unable to read trailer object"); 162 | } 163 | PdfObject *p_trailer = new PdfObject(); 164 | 165 | if (content.type==PDF_OBJ_INDIRECT && content.indirect.obj->type==PDF_OBJ_STREAM){ 166 | p_trailer->setType(PDF_OBJ_DICT); 167 | p_trailer->dict->merge(&content.indirect.obj->stream->dict); 168 | } 169 | else if (content.type==PDF_OBJ_DICT) { 170 | p_trailer->copyFrom(&content); 171 | } 172 | else { 173 | debug("trailer obj is neither dict nor stream"); 174 | delete p_trailer; 175 | return false; 176 | } 177 | if (xref_type==XREF_STREAM) { 178 | if (not obj_table.read(content.indirect.obj, p_trailer)){ 179 | message(FATAL,"xreftable read error"); 180 | return false; 181 | } 182 | } 183 | if (p_trailer->dict->contains("Encrypt")){ 184 | if (xref_type==XREF_STREAM){ 185 | message(FATAL, "Can not handle encrypted PDF with Xref stream"); 186 | } 187 | if (!have_encrypt_info) { 188 | encrypted = true; 189 | PdfObject *encrypt_dict = p_trailer->dict->get("Encrypt"); 190 | 191 | if (isRef(encrypt_dict)){ 192 | if (not obj_table.readObject(f, encrypt_dict->indirect.major)) 193 | return false; 194 | encrypt_dict = obj_table.getObject(encrypt_dict->indirect.major, 195 | encrypt_dict->indirect.minor); 196 | } 197 | if (isDict(encrypt_dict)){ 198 | crypt.getEncryptionInfo(encrypt_dict, p_trailer); 199 | have_encrypt_info = true; 200 | decryption_supported = crypt.decryptionSupported(); 201 | } 202 | } 203 | p_trailer->dict->deleteItem("Encrypt"); 204 | } 205 | PdfObject *prev = p_trailer->dict->get("Prev"); 206 | if (prev){ 207 | if (prev->type!=PDF_OBJ_INT){ 208 | message(FATAL,"Object in dict of trailer Prev is not int"); 209 | return false; 210 | } 211 | if (not getPdfTrailer(f, line, prev->integer)){ 212 | return false; 213 | } // this->trailer = Prev trailer, p_trailer = current trailer 214 | p_trailer->dict->deleteItem("Prev"); 215 | } 216 | if (not repair_mode) 217 | p_trailer->dict->filter(trailer_filter); 218 | this->trailer->dict->merge(p_trailer->dict); 219 | delete p_trailer; 220 | return true; 221 | } 222 | 223 | bool PdfDocument:: getAllPages(MYFILE *f) 224 | { 225 | PdfObject *pobj = trailer->dict->get("Root");//get Catalog 226 | 227 | if (not isRef(pobj)) { 228 | message(FATAL,"Trailer dictionary doesn't contain Root entry"); 229 | } 230 | pobj = obj_table.getObject(pobj->indirect.major, pobj->indirect.minor); 231 | if (not repair_mode) 232 | pobj->dict->filter(catalog_filter); 233 | pobj = pobj->dict->get("Pages"); 234 | if (not isRef(pobj)){ 235 | message(FATAL,"Catalog dictionary dosn't contain Pages entry"); 236 | } 237 | bool ret_val = getPdfPages(f, pobj->indirect.major, pobj->indirect.minor); 238 | return ret_val; 239 | } 240 | 241 | bool PdfDocument:: open (const char *fname) 242 | { 243 | MYFILE *f; 244 | char iobuffer[LLEN]; 245 | 246 | if (!file_exist(fname)){ 247 | message(FATAL,"File '%s' not found", fname); 248 | } 249 | if ((f=myfopen(fname, "rb"))==NULL){ 250 | return false; 251 | } 252 | filename = fname; 253 | if (not getPdfHeader(f,iobuffer)){ 254 | message(ERROR, "failed to read PDF header"); 255 | myfclose(f); 256 | return false; 257 | } 258 | message(LOG, fname); 259 | if (not getPdfTrailer(f,iobuffer,-1)){ 260 | message(ERROR, "failed to read PDF trailer"); 261 | myfclose(f); 262 | return false; 263 | } 264 | if (encrypted){ 265 | if (have_encrypt_info){ 266 | decrypt("");// if user password is empty, we can decrypt it 267 | return true; 268 | } 269 | return false; 270 | } 271 | obj_table.readObjects(f); 272 | getAllPages(f); 273 | myfclose(f); 274 | 275 | debug(" Version : %d.%d", v_major, v_minor); 276 | debug(" Objects : %d", obj_table.table.size()); 277 | message(LOG, " Pages : %d", page_list.count()); 278 | return true; 279 | } 280 | 281 | bool PdfDocument:: decrypt(const char *password) 282 | { 283 | MYFILE *f; 284 | if ((f=myfopen(filename, "rb"))==NULL){ 285 | return false; 286 | } 287 | if (!decryption_supported){ 288 | message(ERROR, "decryption is not supported for this PDF"); 289 | return false; 290 | } 291 | // if object table is loaded, decrypt all objects in object table 292 | if (not crypt.authenticate(password)){ 293 | if (strlen(password)!=0) 294 | message(ERROR, "Incorrect password !"); 295 | return false; 296 | } 297 | obj_table.readObjects(f); 298 | 299 | for (int obj_no=0; obj_nodict->get("Type"); 321 | if (not isName(pages_type)){ 322 | message(FATAL,"Pages or Page dictionary dosn't contain /Type entry"); 323 | } 324 | /*Pages node*/ 325 | if (strcmp("Pages", pages_type->name)==0){ 326 | // get paper size and cropbox 327 | mediabox = pages->dict->get("MediaBox"); 328 | cropbox = pages->dict->get("CropBox"); 329 | // get all childs, each child may be a Pages Node, or a Page Object 330 | kids = derefObject(pages->dict->get("Kids"), obj_table); 331 | 332 | if (not isArray(kids)){ 333 | message(FATAL,"Pages dictionary doesn't contain /Kids entry"); 334 | } 335 | resources = derefObject(pages->dict->get("Resources"), obj_table); 336 | 337 | for (auto kid=kids->array->begin(); kid!=kids->array->end(); kid++) 338 | { 339 | if ((*kid)->type!=PDF_OBJ_INDIRECT_REF){ 340 | message(FATAL,"Kids array item is not indirect ref object"); 341 | } 342 | child_pg = obj_table.getObject((*kid)->indirect.major, (*kid)->indirect.minor); 343 | // copy MediaBox and CropBox of Pages Node to child node 344 | if (mediabox and child_pg->dict->get("MediaBox")==NULL){ 345 | child_pg->dict->newItem("MediaBox")->copyFrom(mediabox); 346 | } 347 | if (cropbox and child_pg->dict->get("CropBox")==NULL){ 348 | child_pg->dict->newItem("CropBox")->copyFrom(cropbox); 349 | } 350 | // add resources of Pages Node to child page Resources 351 | if (isDict(resources)){ 352 | // child has Resources entry, merge with parent's Resources Dict 353 | if ((child_resources = child_pg->dict->get("Resources"))!=NULL){ 354 | child_resources = derefObject(child_resources, obj_table); 355 | assert(child_resources->type==PDF_OBJ_DICT); 356 | // both resources may be same indirect obj, no need to merge then 357 | if (resources != child_resources){ 358 | PdfObject *new_res = new PdfObject(); 359 | new_res->copyFrom(resources); 360 | new_res->dict->merge(child_resources->dict); 361 | child_pg->dict->deleteItem("Resources"); 362 | child_pg->dict->add("Resources", new_res); 363 | } 364 | } 365 | else {// child doesn't have Resources entry, copy all Resources from parent 366 | child_pg->dict->newItem("Resources")->copyFrom(resources); 367 | } 368 | } 369 | getPdfPages(f, (*kid)->indirect.major, (*kid)->indirect.minor); 370 | } 371 | return true; 372 | } 373 | /*Page leaf*/ 374 | if (strcmp("Page", pages_type->name)==0){ 375 | PdfPage new_page; 376 | /* Page Boundaries are very confusing. There are 4 types of Boundaries 377 | MediaBox = Paper Size on which page is printed 378 | CropBox = this is the displayed page size in Viewer. When printed, outside 379 | this area is not printed. The printer fits CropBox inside printable 380 | area of paper when FitToPaper is on in printer settings. 381 | TrimBox = Same as CropBox When FitToPage is on in printer, otherwise no effect. 382 | BleedBox and ArtBpx has no effect either in printer or in viewer 383 | */ 384 | if (!new_page.paper.getFromObject(pages->dict->get("MediaBox"), obj_table)) { 385 | message(FATAL, "Page does not have MediaBox entry"); 386 | } 387 | // in a pdfviewer, the visible page size is the CropBox 388 | Rect cropbox; 389 | if (cropbox.getFromObject(pages->dict->get("CropBox"), obj_table)){ 390 | new_page.paper = cropbox; 391 | } 392 | if (not repair_mode) 393 | pages->dict->filter(page_filter); 394 | new_page.major = major; 395 | new_page.minor = minor; 396 | new_page.doc = this; 397 | page_list.append(new_page); 398 | return true; 399 | } 400 | message(FATAL,"PdfDocument::getPdfPages : Object isn't Page or Pages"); 401 | return false; 402 | } 403 | 404 | #define NODE_MAX 50 405 | /* distribute the pages in a tree, so that each Pages node contains maximum of 406 | 50 childs (Page objects or Pages nodes). So, if document contains 100 pages, 407 | at first run it creates 50 Pages nodes, it is run once again recursively and 408 | create a single Pages node that contains previous 50 nodes 409 | @arg nodes -> array of object numbers of pages 410 | @arg pages_count -> count of members in nodes array 411 | */ 412 | static int makePagesTree(int *nodes, int pages_count, ObjectTable &obj_table) 413 | { 414 | PdfObject *node, *page, *kids, *pobj, *count_obj; 415 | int nodes_count, count, major=0; 416 | // calculate how many nodes we need to contain all pages 417 | nodes_count = (pages_count/NODE_MAX)+((pages_count%NODE_MAX)?1:0); 418 | for (int i=0; ireadFromString("<< /Type /Pages /Count 0 /Kids [ ] /Parent 0 0 R >>"); 422 | major = obj_table.addObject(node); 423 | kids = node->dict->get("Kids"); 424 | count = 0; 425 | for (int j=0; j=pages_count){ 428 | break; 429 | } 430 | page = obj_table[ nodes[pg_num] ].obj;// Page leaf or Pages Node 431 | if (((pobj=page->dict->get("Count"))!=NULL) && isInt(pobj)){ 432 | count += pobj->integer; 433 | } 434 | else{ 435 | count++; 436 | } 437 | pobj = page->dict->get("Parent"); 438 | pobj->indirect.major = major; 439 | pobj->indirect.minor = obj_table[major].minor; 440 | 441 | // add the ref of Page obj to Kids array of Pages node 442 | pobj = new PdfObject(); 443 | pobj->setType(PDF_OBJ_INDIRECT_REF); 444 | pobj->indirect.major = nodes[pg_num]; 445 | pobj->indirect.minor = obj_table[ nodes[pg_num] ].minor; 446 | kids->array->append(pobj); 447 | } 448 | count_obj = node->dict->get("Count"); 449 | count_obj->integer = count; 450 | // add this node to nodes array, so this function can be run recursively 451 | nodes[i] = major; 452 | } 453 | if (nodes_count>1) { 454 | return makePagesTree(nodes, nodes_count, obj_table); 455 | } 456 | if (nodes_count==1) { 457 | node = obj_table[ nodes[0] ].obj; 458 | node->dict->deleteItem("Parent"); 459 | return major; 460 | } 461 | return -1;//nodes_count==0 462 | } 463 | 464 | 465 | void PdfDocument:: putPdfPages() 466 | { 467 | PdfObject *pobj; 468 | 469 | if (page_list.count()<1){ 470 | message(FATAL, "Cannot create PDF with zero pages"); 471 | } 472 | int *nodes = (int*) malloc2(sizeof(int) * page_list.count()); 473 | int count=0; 474 | // store major nums in nodes array, which is required for creating pages tree 475 | for (auto page=page_list.begin(); page!=page_list.end(); page++,count++){ 476 | nodes[count] = page->major; 477 | pobj = obj_table.getObject(page->major, page->minor); 478 | // set paper size in Page Dict 479 | page->paper.setToObject(pobj->dict->newItem("MediaBox")); 480 | } 481 | makePagesTree(nodes, count, obj_table); 482 | 483 | // get catalog object from trailer, 484 | pobj = trailer->dict->get("Root"); 485 | pobj = obj_table[pobj->indirect.major].obj; 486 | // get Pages node obj from catalog 487 | pobj = pobj->dict->get("Pages"); 488 | // set reference of Pages Node to root of pages tree 489 | pobj->indirect.major = nodes[0]; 490 | pobj->indirect.minor = obj_table[ nodes[0] ].minor; 491 | free(nodes); 492 | } 493 | 494 | bool PdfDocument:: save (const char *filename) 495 | { 496 | PdfObject *pobj; 497 | FILE *f = stdout; 498 | 499 | if (strcmp(filename,"-")!=0){ 500 | f = fopen(filename,"wb"); 501 | if (f==NULL){ 502 | message(ERROR, "Cannot open for writing file '%s'",filename); 503 | return false; 504 | } 505 | } 506 | // write header 507 | fprintf(f, "%%PDF-%d.%d\n", v_major, v_minor); 508 | // second line of file should contain at least 4 non-ASCII characters in 509 | char binary[] = {(char)0xDE,(char)0xAD,' ',(char)0xBE,(char)0xEF,'\n',0}; 510 | fprintf(f, "%s", binary); 511 | // build Pages tree, and insert root Pages node in Catalog 512 | applyTransformations();// apply transformation matrix of all pages 513 | putPdfPages(); 514 | deleteUnusedObjects(*this);//remove unused objects from object table 515 | obj_table.writeObjects(f); 516 | // write cross reference table 517 | long xref_poz = ftell(f); 518 | obj_table.writeXref(f); 519 | // write trailer dictionary 520 | fprintf(f, "trailer\n"); 521 | pobj = trailer->dict->get("Size"); 522 | pobj->integer = obj_table.count(); 523 | trailer->write(f); 524 | // startxref, xref offset, and %%EOF must be in three separate lines 525 | fprintf(f, "\nstartxref\n%ld\n%%%%EOF\n", xref_poz); 526 | fclose(f); 527 | return true; 528 | } 529 | 530 | // insert parameter doc structure into current doc structure 531 | void 532 | PdfDocument:: mergeDocument(PdfDocument &doc) 533 | { 534 | int offset = obj_table.count(); 535 | // new obj_table size is one less than size of the two tables. 536 | // because, we dont need to copy first item of the second obj_table. 537 | obj_table.expandToFit(obj_table.count()+doc.obj_table.count()-1); 538 | 539 | for (int i=1; itype){ 562 | case PDF_OBJ_BOOL: 563 | case PDF_OBJ_INT: 564 | case PDF_OBJ_REAL: 565 | case PDF_OBJ_STR: 566 | case PDF_OBJ_NAME: 567 | case PDF_OBJ_NULL: 568 | return; 569 | case PDF_OBJ_ARRAY: 570 | for (auto it : *obj->array) { 571 | flag_used_objects(it, table); 572 | } 573 | return; 574 | case PDF_OBJ_DICT: 575 | for (auto it : *obj->dict){ 576 | flag_used_objects(it.second, table); 577 | } 578 | return; 579 | case PDF_OBJ_STREAM: 580 | for (auto it : obj->stream->dict){ 581 | flag_used_objects(it.second, table); 582 | } 583 | return; 584 | case PDF_OBJ_INDIRECT_REF: 585 | if (table[obj->indirect.major].used){ 586 | return; 587 | } 588 | if (table[obj->indirect.major].obj==NULL){ 589 | // in some bad pdfs even if the object is free, the object is referenced 590 | debug("warning : referencing free obj : %d %d R", obj->indirect.major, obj->indirect.minor); 591 | obj->type = PDF_OBJ_NULL; 592 | return; 593 | } 594 | table[obj->indirect.major].used = true; 595 | flag_used_objects(table[obj->indirect.major].obj, table); 596 | return; 597 | default: 598 | assert(0); 599 | } 600 | } 601 | 602 | // Replace old references with new references of same object 603 | static void update_obj_ref(PdfObject *obj, ObjectTable &table) 604 | { 605 | switch (obj->type){ 606 | case PDF_OBJ_BOOL: 607 | case PDF_OBJ_INT: 608 | case PDF_OBJ_REAL: 609 | case PDF_OBJ_STR: 610 | case PDF_OBJ_NAME: 611 | case PDF_OBJ_NULL: 612 | return; 613 | case PDF_OBJ_ARRAY: 614 | for (auto it : *obj->array) { 615 | update_obj_ref(it, table); 616 | } 617 | return; 618 | case PDF_OBJ_DICT: 619 | for (auto it : *obj->dict){ 620 | update_obj_ref(it.second, table); 621 | } 622 | return; 623 | case PDF_OBJ_STREAM: 624 | for (auto it : obj->stream->dict){ 625 | update_obj_ref(it.second, table); 626 | } 627 | return; 628 | case PDF_OBJ_INDIRECT_REF: 629 | obj->indirect.minor = table[obj->indirect.major].minor; 630 | obj->indirect.major = table[obj->indirect.major].major; 631 | return; 632 | default: 633 | assert(0); 634 | } 635 | } 636 | 637 | static void updateRefs(PdfDocument &doc) 638 | { 639 | for (auto& page : doc.page_list) {// page.minor must be set before page.major 640 | page.minor = doc.obj_table[page.major].minor; 641 | page.major = doc.obj_table[page.major].major; 642 | } 643 | update_obj_ref(doc.trailer, doc.obj_table); 644 | for (int i=1; i (page_list.count()+1)) { 712 | message(WARN, "newBlankPage() : invalid page num %d", page_num); 713 | return false; 714 | } 715 | PdfObject *page, *content; 716 | 717 | page = new PdfObject(); 718 | // to set this page as compressed=false, Resources dict must be present (even empty) 719 | assert(page->readFromString("<< /Type /Page /Parent 3 0 R /Resources << >> \n >> ")); 720 | // create an empty stream object and add to obj_table, then use it as content stream 721 | content = new PdfObject(); 722 | content->setType(PDF_OBJ_STREAM); 723 | int major = obj_table.addObject(content); 724 | 725 | content = page->dict->newItem("Contents"); 726 | content->setType(PDF_OBJ_INDIRECT_REF); 727 | content->indirect.major = major; 728 | content->indirect.minor = obj_table[major].minor; 729 | 730 | major = obj_table.addObject(page); 731 | PdfPage p_page; 732 | p_page.major = major; 733 | p_page.minor = obj_table[major].minor; 734 | p_page.compressed = false; 735 | p_page.doc = this; 736 | int ref_page_num = page_num;// if odd page and not last page, use next page size 737 | // if new page is last page or page no. is even, use prev page size 738 | if (page_num > page_list.count() or page_num%2==0) 739 | ref_page_num = page_num-1; 740 | // use the same cropbox and papersize as reference page 741 | p_page.paper = page_list[ref_page_num-1].paper; 742 | 743 | if (page_num > page_list.count()) { 744 | page_list.append(p_page); 745 | } 746 | else { 747 | page_list.array.emplace(page_list.array.begin()+(page_num-1), p_page);//c++11 748 | } 749 | return true; 750 | } 751 | 752 | Font 753 | PdfDocument:: newFontObject(const char *font_name) 754 | { 755 | Font font; 756 | if (font_name == NULL){ 757 | font_name = "Helvetica"; 758 | } 759 | else if (standard_fonts.count(font_name)==0) { 760 | message(LOG, "'%s' is not a standard font, using Helvetica Font instead", font_name); 761 | font_name = "Helvetica"; 762 | } 763 | char *str; 764 | asprintf(&str,"<< /Type /Font /Subtype /Type1 /BaseFont /%s /Name /F%s /Encoding /MacRomanEncoding >>",font_name, font_name); 765 | PdfObject *font_obj = new PdfObject(); 766 | assert(font_obj->readFromString(str)); 767 | free(str); 768 | font.major = obj_table.addObject(font_obj); 769 | font.minor = obj_table[font.major].minor; 770 | font.name = font_name; 771 | return font; 772 | } 773 | 774 | static void pdf_stream_prepend(PdfObject *stream, const char *str, int len) 775 | { 776 | if (len==0 or str==NULL) 777 | return; 778 | char *new_stream = (char*) malloc2(len + stream->stream->len); 779 | memcpy(new_stream, str, len); 780 | if (stream->stream->len!=0) { 781 | memcpy(new_stream+len, stream->stream->stream, stream->stream->len); 782 | } 783 | if (stream->stream->stream){ 784 | free(stream->stream->stream); 785 | } 786 | stream->stream->stream = new_stream; 787 | stream->stream->len += len; 788 | } 789 | 790 | // get a stream object and append a char stream to it 791 | static void pdf_stream_append(PdfObject *stream, const char *str, int len) 792 | { 793 | if (len==0 or str==NULL) 794 | return; 795 | int old_len = stream->stream->len; 796 | stream->stream->len += len; 797 | stream->stream->stream = (char*) realloc(stream->stream->stream, stream->stream->len); 798 | if (stream->stream->stream==NULL) 799 | message(FATAL, "realloc() failed !"); 800 | char *ptr = stream->stream->stream + old_len; 801 | memcpy(ptr, str, len); 802 | } 803 | 804 | /* 805 | takes a content stream obj and a page obj, creates a new xobject, then copy the 806 | content stream and resources of page to the xobject 807 | contents = a content stream (stream obj) 808 | contents must be direct or indirect stream obj 809 | */ 810 | static int 811 | stream_to_xobj (PdfObject *contents, PdfObject *page, Rect &bbox, ObjectTable &obj_table) 812 | { 813 | PdfObject * xobj, *tmp, *pg_res, *xobj_res; 814 | 815 | while (isRef(contents)){ 816 | contents = obj_table.getObject(contents->indirect.major, contents->indirect.minor); 817 | } 818 | assert(isStream(contents)); 819 | 820 | xobj = new PdfObject(); 821 | xobj->copyFrom(contents); 822 | 823 | tmp = new PdfObject; 824 | tmp->readFromString("<< /Type /XObject /Subtype /Form /FormType 1 >>"); 825 | bbox.setToObject(tmp->dict->newItem("BBox")); 826 | xobj->stream->dict.merge(tmp->dict); 827 | delete tmp; 828 | // copy page resources to xobject resources 829 | pg_res = derefObject(page->dict->get("Resources"), obj_table); 830 | 831 | if (pg_res!=NULL){ 832 | assert(pg_res->type==PDF_OBJ_DICT); 833 | xobj_res = xobj->stream->dict.newItem("Resources"); 834 | xobj_res->copyFrom(pg_res); 835 | } 836 | xobj->stream->dict.filter(xobject_filter); 837 | return obj_table.addObject(xobj); 838 | } 839 | 840 | /* first create a new page object, and add this to object table. get old page contents, 841 | create new XObject using the contents, and add it to object table. 842 | */ 843 | static void pdf_page_to_xobj (PdfPage *page) 844 | { 845 | PdfDocument *doc = page->doc; 846 | int major; 847 | PdfObject *new_page, *new_page_contents, *new_page_xobject, 848 | *contents, *cont, *pg, *xobj, *xobj_val; 849 | char * xobjname; 850 | char * stream_content = NULL; 851 | static int revision = 1; 852 | if (not page->compressed)// we have already converted to xobj, nothing to do 853 | return; 854 | 855 | //get_page_object 856 | pg = doc->obj_table.getObject(page->major, page->minor); 857 | 858 | // create new_page object 859 | new_page = new PdfObject(); 860 | new_page->readFromString("<> >> >>"); 861 | //add new page to xref table 862 | major = doc->obj_table.addObject(new_page); 863 | page->major = major; 864 | page->minor = doc->obj_table[major].minor; 865 | page->compressed = false; 866 | 867 | new_page_xobject = new_page->dict->get("Resources")->dict->get("XObject"); 868 | 869 | cont = derefObject(pg->dict->get("Contents"), doc->obj_table);// it may be null 870 | 871 | if (isStream(cont)){ 872 | major = stream_to_xobj(cont, pg, page->paper, doc->obj_table); 873 | asprintf(&xobjname, "xo%d", revision++); 874 | xobj_val = new_page_xobject->dict->newItem(xobjname); 875 | 876 | xobj_val->setType(PDF_OBJ_INDIRECT_REF); 877 | xobj_val->indirect.major = major; 878 | xobj_val->indirect.minor = doc->obj_table[major].minor; 879 | 880 | asprintf(&stream_content,"q /%s Do Q", xobjname); 881 | free(xobjname); 882 | } 883 | else if (isArray(cont)) { 884 | // array contains indirect objects of streams. Join all streams 885 | // to create a single merged stream. create xobject from that stream 886 | if (cont->array->count()==0){// if empty array, nothing to do 887 | goto empty_cont; 888 | } 889 | PdfObject *tmp_stream = NULL; 890 | PdfObject *new_stream = new PdfObject; 891 | new_stream->setType(PDF_OBJ_STREAM); 892 | 893 | for (auto it = cont->array->begin(); it!=cont->array->end(); it++) 894 | { 895 | tmp_stream = derefObject((*it), doc->obj_table);//decompressed stream 896 | if (not tmp_stream->stream->decompress() ){ 897 | message(FATAL, "Can not decompress content stream"); 898 | } 899 | pdf_stream_append(new_stream, " ", 1); 900 | pdf_stream_append(new_stream, tmp_stream->stream->stream, 901 | tmp_stream->stream->len); 902 | } 903 | major = stream_to_xobj(new_stream, pg, page->paper, doc->obj_table); 904 | 905 | xobj = doc->obj_table.getObject(major, doc->obj_table[major].minor); 906 | assert( xobj->stream->compress("FlateDecode") ); 907 | // each time different xobject rev numbers are used, so that we can 908 | // join content streams of two pages without conflict 909 | asprintf(&xobjname, "xo%d", revision++); 910 | xobj_val = new_page_xobject->dict->newItem(xobjname); 911 | 912 | xobj_val->setType(PDF_OBJ_INDIRECT_REF); 913 | xobj_val->indirect.major = major; 914 | xobj_val->indirect.minor = doc->obj_table[major].minor; 915 | 916 | asprintf(&stream_content,"q /%s Do Q", xobjname); 917 | free(xobjname); 918 | } 919 | else { 920 | message(WARN, "Page contents is neither stream nor array obj"); 921 | empty_cont: 922 | asprintf(&stream_content, " "); 923 | } 924 | //create content stream for new page 925 | contents = new PdfObject(); 926 | contents->setType(PDF_OBJ_STREAM); 927 | contents->stream->len = strlen(stream_content); 928 | contents->stream->stream = stream_content; 929 | // add content stream to object table 930 | major = doc->obj_table.addObject(contents); 931 | 932 | new_page_contents = new_page->dict->get("Contents"); 933 | new_page_contents->indirect.major = major; 934 | new_page_contents->indirect.minor = doc->obj_table[major].minor; 935 | } 936 | 937 | 938 | // *************----------- PdfPage Object -------------************* 939 | 940 | // this constructor is called in getPdfPages() and newBlankPage() function 941 | PdfPage:: PdfPage() 942 | { 943 | major = 0; 944 | compressed = true; 945 | doc = NULL; 946 | } 947 | 948 | Rect 949 | PdfPage:: pageSize() 950 | { 951 | return paper; 952 | } 953 | 954 | void 955 | PdfPage:: drawLine (Point begin, Point end, float width) 956 | { 957 | PdfObject *page_obj, *cont; 958 | char *cmd; 959 | 960 | asprintf(&cmd, "\nq %g w %g %g m %g %g l S Q", width, begin.x, begin.y, end.x, end.y); 961 | applyTransformation(); 962 | // convert to xobject so that drawing commands can be appended 963 | pdf_page_to_xobj(this); 964 | page_obj = doc->obj_table.getObject(this->major, this->minor); 965 | // create new stream by joining page stream and line drawing commands 966 | cont = page_obj->dict->get("Contents"); 967 | cont = doc->obj_table.getObject(cont->indirect.major, cont->indirect.minor); 968 | pdf_stream_append(cont, cmd, strlen(cmd)); 969 | free(cmd); 970 | } 971 | 972 | /* when used as resources, F is prepended before font name. 973 | if font.name is "Helvetica", its name used in resource dict is FHelvetica. 974 | As only base fonts are used, we can safely merge font dictionary and content stream. 975 | That means if the page contains /FHelvetica already, and we are drawing text using 976 | Helvetica fonts, this will replace and add the same /FHelvetica item, and both font 977 | objects will be identical. 978 | */ 979 | void 980 | PdfPage:: drawText (const char *text, Point &pos, int size, Font font) 981 | { 982 | char *str; 983 | PdfObject *font_obj, *page, *stream, *cont, *res, *font_dict; 984 | 985 | applyTransformation(); 986 | pdf_page_to_xobj(this); 987 | page = doc->obj_table.getObject(this->major, this->minor); 988 | // /Resources << /Font << /FHelvetica 4 0 R >> XObject <> >> 989 | res = page->dict->get("Resources"); 990 | font_dict = res->dict->get("Font"); 991 | if (not font_dict) { 992 | font_dict = res->dict->newItem("Font"); 993 | font_dict->setType(PDF_OBJ_DICT); 994 | } 995 | asprintf(&str, "F%s", font.name); 996 | font_obj = font_dict->dict->newItem(str); 997 | font_obj->setType(PDF_OBJ_INDIRECT_REF); 998 | font_obj->indirect.major = font.major; 999 | font_obj->indirect.minor = font.minor; 1000 | free(str); 1001 | 1002 | cont = page->dict->get("Contents"); 1003 | stream = doc->obj_table.getObject(cont->indirect.major, cont->indirect.minor); 1004 | // we dont want trailing zeros in a float, so we used %g instead of %f 1005 | asprintf(&str, "\nq BT /F%s %d Tf %g %g Td (%s) Tj ET Q", font.name, size, pos.x, pos.y, text); 1006 | pdf_stream_append(stream, str, strlen(str)); 1007 | free(str); 1008 | } 1009 | 1010 | void 1011 | PdfPage:: crop (Rect box) 1012 | { 1013 | PdfObject *page_obj, *cont; 1014 | char *cmd; 1015 | 1016 | asprintf(&cmd, "q %g %g %g %g re W n\n", box.left.x, box.left.y, box.right.x-box.left.x, box.right.y-box.left.y); 1017 | applyTransformation(); 1018 | // convert to xobject so that drawing commands can be appended 1019 | pdf_page_to_xobj(this); 1020 | page_obj = doc->obj_table.getObject(this->major, this->minor); 1021 | // create new stream by joining page stream and crop commands 1022 | cont = page_obj->dict->get("Contents"); 1023 | cont = doc->obj_table.getObject(cont->indirect.major, cont->indirect.minor); 1024 | pdf_stream_prepend(cont, cmd, strlen(cmd)); 1025 | pdf_stream_append(cont, " Q", 2); 1026 | free(cmd); 1027 | } 1028 | 1029 | void 1030 | PdfPage:: mergePage (PdfPage &p2) 1031 | { 1032 | PdfObject *page1, *page2, *res1, *res2, *cont, *stream1, *stream2; 1033 | 1034 | applyTransformation(); 1035 | p2.applyTransformation(); 1036 | pdf_page_to_xobj(this); 1037 | pdf_page_to_xobj(&p2); 1038 | 1039 | page1 = doc->obj_table.getObject(this->major, this->minor); 1040 | page2 = doc->obj_table.getObject(p2.major, p2.minor); 1041 | //page2->write(stdout); 1042 | // pages has been already converted to xobject. So xobjects and fonts are the 1043 | // only resources of page objects. No two different XObjects or Fonts have 1044 | // same name. So we can merge the Resources dicts safely 1045 | res1 = page1->dict->get("Resources"); 1046 | res2 = page2->dict->get("Resources"); 1047 | //res2->write(stdout); 1048 | res1->dict->merge(res2->dict); 1049 | 1050 | cont = page1->dict->get("Contents"); 1051 | stream1 = doc->obj_table.getObject(cont->indirect.major, cont->indirect.minor); 1052 | cont = page2->dict->get("Contents"); 1053 | stream2 = doc->obj_table.getObject(cont->indirect.major, cont->indirect.minor); 1054 | 1055 | pdf_stream_append(stream1, " ", 1); 1056 | pdf_stream_append(stream1, stream2->stream->stream, stream2->stream->len); 1057 | } 1058 | 1059 | /* Apply the transformation matrix in PdfPage if the matrix is not unity matrix 1060 | transform_page() must be called before save doc, draw line, draw text and crop page. 1061 | */ 1062 | void 1063 | PdfPage:: applyTransformation() 1064 | { 1065 | if (this->matrix.isIdentity()) { 1066 | return; 1067 | } 1068 | PdfObject *page_obj, *stream; 1069 | char *str; 1070 | 1071 | page_obj = doc->obj_table.getObject(this->major, this->minor); 1072 | 1073 | stream = page_obj->dict->get("Contents"); 1074 | stream = doc->obj_table.getObject(stream->indirect.major, stream->indirect.minor); 1075 | if (stream->stream->len==0){ 1076 | return; 1077 | } 1078 | asprintf(&str, "q %s %s %s %s %s %s cm\n", 1079 | double2str(matrix.mat[0][0]).c_str(), double2str(matrix.mat[0][1]).c_str(), 1080 | double2str(matrix.mat[1][0]).c_str(), double2str(matrix.mat[1][1]).c_str(), 1081 | double2str(matrix.mat[2][0]).c_str(), double2str(matrix.mat[2][1]).c_str() ); 1082 | 1083 | pdf_stream_prepend(stream, str, strlen(str)); 1084 | pdf_stream_append(stream, " Q", 2); 1085 | free(str); 1086 | 1087 | Matrix identity_matrix; 1088 | this->matrix = identity_matrix; 1089 | } 1090 | 1091 | /* transforms page content, bounding box and paper using the given matrix */ 1092 | void 1093 | PdfPage:: transform (Matrix mat) 1094 | { 1095 | pdf_page_to_xobj(this); 1096 | // transform page content 1097 | matrix.multiply(mat); 1098 | // transform paper 1099 | mat.transform(paper); 1100 | } 1101 | 1102 | -------------------------------------------------------------------------------- /src/pdf_objects.cpp: -------------------------------------------------------------------------------- 1 | /* This file is a part of pdfcook program, which is GNU GPLv2 licensed */ 2 | #include "common.h" 3 | #include "pdf_objects.h" 4 | #include 5 | #include 6 | #include "debug.h" 7 | #include "pdf_filters.h" 8 | 9 | 10 | // *********** ------------- Array Object ----------------- *********** 11 | //allows range based for loop 12 | ArrayIter ArrayObj:: begin() { 13 | return array.begin(); 14 | } 15 | ArrayIter ArrayObj:: end() { 16 | return array.end(); 17 | } 18 | int ArrayObj:: count () { 19 | return array.size(); 20 | } 21 | PdfObject* ArrayObj:: at (int index) { 22 | return array[index]; 23 | } 24 | void ArrayObj:: append (PdfObject *item) { 25 | array.push_back(item); 26 | } 27 | 28 | void ArrayObj:: deleteItems() 29 | { 30 | for (PdfObject *item : array){ 31 | delete item; 32 | } 33 | array.clear(); 34 | } 35 | 36 | int ArrayObj:: write (FILE *f) 37 | { 38 | int ret_val = fprintf(f, "[ "); 39 | for (PdfObject *obj : this->array){ 40 | ret_val = obj->write(f); 41 | ret_val = fprintf(f, " "); 42 | } 43 | ret_val = fprintf(f, "]"); 44 | return ret_val<0?ret_val:0; 45 | } 46 | 47 | // *********** ------------ Dictionary Object -------------- *********** 48 | void DictObj:: setDict (std::map &map){ 49 | this->dict = map; 50 | } 51 | 52 | bool DictObj:: contains (std::string key) { 53 | return (dict.count(key) > 0); 54 | } 55 | 56 | PdfObject* DictObj:: get (std::string key) { 57 | if (dict.count(key) < 1) 58 | return NULL; 59 | return dict[key]; 60 | } 61 | 62 | void DictObj:: add (std::string key, PdfObject *val) { 63 | dict[key] = val; 64 | }; 65 | 66 | PdfObject* DictObj:: newItem (std::string key) 67 | { 68 | if (dict.count(key) > 0) { 69 | dict[key]->clear(); 70 | } 71 | else { 72 | dict[key] = new PdfObject(); 73 | } 74 | return dict[key]; 75 | } 76 | 77 | // hard copy all items from src_dict to this, overwrite if exists 78 | // this dict and src_dict must be different object, otherwise will cause segfault 79 | void DictObj:: merge(DictObj *src_dict) 80 | { 81 | for (auto it : src_dict->dict) { 82 | // if val of key is dict obj, merge the dicts 83 | if (this->contains(it.first) && dict[it.first]->type==PDF_OBJ_DICT && 84 | it.second->type==PDF_OBJ_DICT) { 85 | dict[it.first]->dict->merge(it.second->dict); 86 | continue; 87 | } 88 | PdfObject *item = this->newItem(it.first); 89 | item->copyFrom(it.second); 90 | } 91 | } 92 | 93 | void DictObj:: filter(DictFilter &filter_set) 94 | { 95 | for (auto it=dict.begin(); it!=dict.end();) { 96 | std::string key = it->first; 97 | PdfObject *val = it->second; 98 | it++;//it must be placed before dict.erase() 99 | if (filter_set.count(key) == 0) { 100 | delete val; 101 | dict.erase(key); 102 | } 103 | } 104 | } 105 | 106 | void DictObj:: deleteItem (std::string key) 107 | { 108 | if (dict.count(key) > 0) { 109 | PdfObject *val = dict[key]; 110 | delete val; 111 | dict.erase(key); 112 | } 113 | } 114 | 115 | void DictObj:: deleteItems() 116 | { 117 | for (auto it : dict) { 118 | delete it.second; 119 | } 120 | dict.clear(); 121 | } 122 | 123 | int DictObj:: write (FILE *f) 124 | { 125 | fprintf(f, "<<\n"); 126 | 127 | for (auto it : dict){ 128 | fprintf(f, "/%s ", it.first.c_str()); 129 | PdfObject *val = it.second; 130 | val->write(f); 131 | fprintf(f,"\n"); 132 | } 133 | fprintf(f,">>"); 134 | return 0; 135 | } 136 | 137 | MapIter DictObj:: begin() { 138 | return dict.begin(); 139 | } 140 | MapIter DictObj:: end() { 141 | return dict.end(); 142 | } 143 | 144 | PdfObject* DictObj:: operator[] (std::string key) { 145 | if (dict.count(key) < 1) 146 | return NULL; 147 | return dict[key]; 148 | } 149 | 150 | 151 | // *********** ------------- Stream Object ----------------- *********** 152 | 153 | static int get_correct_stream_len(MYFILE *f, size_t begin); 154 | 155 | 156 | StreamObj:: StreamObj() { 157 | stream = NULL; 158 | len = 0; 159 | decompressed = false; 160 | } 161 | 162 | int StreamObj:: write (FILE *f) 163 | { 164 | if (!dict.contains("Length")){ 165 | PdfObject *item = this->dict.newItem("Length"); 166 | item->type = PDF_OBJ_INT; 167 | } 168 | this->dict["Length"]->integer = this->len; 169 | this->dict.write(f); 170 | fprintf(f,"\nstream\n"); 171 | 172 | if (this->len){ 173 | assert (this->stream!=NULL);// TODO : remove it later 174 | if (fwrite(this->stream, 1, this->len, f) != this->len){ 175 | message(FATAL, "StreamObj : fwrite() error"); 176 | } 177 | } 178 | fprintf(f, "\nendstream"); 179 | return 0; 180 | } 181 | 182 | bool StreamObj:: decompress() 183 | { 184 | if (decompressed) 185 | return true; 186 | PdfObject *p_obj = this->dict["Filter"]; 187 | if (!p_obj or len==0) { 188 | decompressed = true; 189 | return true; 190 | } 191 | switch (p_obj->type){ 192 | case PDF_OBJ_ARRAY: 193 | { 194 | for (PdfObject *filter : *p_obj->array){ 195 | assert(filter->type==PDF_OBJ_NAME); 196 | if (apply_decompress_filter(filter->name, &(this->stream), &(this->len), this->dict) != 0){ 197 | message(WARN, "failed to apply decompress filter %s", filter->name); 198 | return false; 199 | } 200 | } 201 | break; 202 | } 203 | case PDF_OBJ_NAME: 204 | if (apply_decompress_filter(p_obj->name, &(this->stream), &(this->len), this->dict) != 0){ 205 | message(WARN, "failed to apply decompress filter %s", p_obj->name); 206 | return false; 207 | } 208 | break; 209 | default: // FIXME : it can be indirect object 210 | message(WARN, "could not decompress stream obj of type %d", p_obj->type); 211 | return false; 212 | } 213 | this->dict.deleteItem("Filter"); 214 | decompressed = true; 215 | return true; 216 | } 217 | 218 | bool StreamObj:: compress (const char *filter) 219 | { 220 | char *ch; 221 | if (len==0) 222 | return true; 223 | 224 | if (apply_compress_filter(filter, &(this->stream), &(this->len), this->dict) != 0){ 225 | return false; 226 | } 227 | PdfObject *filter_obj = this->dict.get("Filter"); 228 | 229 | if (!filter_obj) { 230 | filter_obj = this->dict.newItem("Filter"); 231 | asprintf(&ch,"/%s",filter); 232 | filter_obj->readFromString(ch); 233 | free(ch); 234 | } 235 | else {// already contains a filter 236 | switch (filter_obj->type){ 237 | case PDF_OBJ_ARRAY: 238 | { 239 | asprintf(&ch,"/%s",filter); 240 | PdfObject *array_item = new PdfObject(); 241 | array_item->readFromString(ch); 242 | free(ch); 243 | filter_obj->array->append(array_item); 244 | } 245 | break; 246 | case PDF_OBJ_NAME: 247 | asprintf(&ch, " [ /%s /%s ] ", filter_obj->name, filter); 248 | filter_obj->clear(); 249 | filter_obj->readFromString(ch); 250 | free(ch); 251 | break; 252 | default: 253 | assert(0); 254 | } 255 | } 256 | return true; 257 | } 258 | 259 | StreamObj:: ~StreamObj() { 260 | if (stream!=NULL){ 261 | free(stream); 262 | } 263 | dict.deleteItems(); 264 | } 265 | 266 | 267 | // *********** -------------- Pdf Object ----------------- *********** 268 | 269 | enum { 270 | DICT_KEY, 271 | DICT_VAL 272 | }; 273 | 274 | PdfObject:: PdfObject() { 275 | type = PDF_OBJ_UNKNOWN; 276 | } 277 | 278 | void 279 | PdfObject:: setType(ObjectType obj_type) 280 | { 281 | if (obj_type!=PDF_OBJ_UNKNOWN) 282 | this->clear(); 283 | type = obj_type; 284 | switch (type) 285 | { 286 | case PDF_OBJ_DICT: 287 | dict = new DictObj(); 288 | break; 289 | case PDF_OBJ_ARRAY: 290 | array = new ArrayObj(); 291 | break; 292 | case PDF_OBJ_STREAM: 293 | stream = new StreamObj(); 294 | break; 295 | case PDF_OBJ_INDIRECT: 296 | indirect.obj = new PdfObject(); 297 | break; 298 | default: 299 | break; 300 | } 301 | } 302 | 303 | // create a MYFILE from given string and call PdfObject::get() 304 | bool 305 | PdfObject:: readFromString (const char *str) 306 | { 307 | MYFILE *f = stropen(str); 308 | if (f==NULL){ 309 | return false; 310 | } 311 | bool retval = this->read(f, NULL, NULL); 312 | myfclose(f); 313 | return retval; 314 | } 315 | 316 | /*To read an obj at particular pos, seek file in that pos and call this function. 317 | Returns false if object is completely unusable and should be discarded. 318 | Returns true if obj is usable, even if not read properly. 319 | Dictionary and Array return false only if ending bracket not found before reaching EOF. 320 | */ 321 | bool 322 | PdfObject:: read (MYFILE *f, ObjectTable *xref, Token *last_tok) 323 | { 324 | int stream_len = 0; 325 | size_t fpos/*, key_pos, nextkey_pos, val_pos*/; 326 | Token tok; 327 | if (last_tok==NULL){ 328 | last_tok=&tok; 329 | } 330 | //printf("get obj %ld\n", myftell(f)); 331 | while (last_tok->get(f)){ 332 | switch (last_tok->type){ 333 | case TOK_INT://maybe integer, indirect, or indirect reference obj 334 | { 335 | this->setType(PDF_OBJ_INT); 336 | this->integer = last_tok->integer; 337 | if (last_tok->sign){//it is integer, not indirect object 338 | return true; 339 | } 340 | fpos = myftell(f); 341 | last_tok->get(f); 342 | if (last_tok->type!=TOK_INT || last_tok->sign){// not indirect object 343 | last_tok->freeData(); 344 | if ( myfseek(f, fpos, SEEK_SET)==-1 ){ 345 | message(FATAL,"myfseek() error in file %s at line %d", __FILE__, __LINE__); 346 | } 347 | return true; 348 | } 349 | // we have two integers, check if there is 'obj' or 'R' next to it 350 | last_tok->get(f); 351 | if (last_tok->type!=TOK_ID){ 352 | last_tok->freeData(); 353 | if (myfseek(f,fpos,SEEK_SET)==-1){ 354 | message(FATAL,"myfseek() error in file %s at line %d",__FILE__, __LINE__); 355 | } 356 | return true; 357 | } 358 | this->indirect.major = this->integer; 359 | this->indirect.minor = last_tok->integer; 360 | 361 | if (strcmp(last_tok->id,"R")==0){ 362 | this->setType(PDF_OBJ_INDIRECT_REF); 363 | return true; 364 | } 365 | if (strcmp(last_tok->id,"obj")==0){ 366 | this->setType(PDF_OBJ_INDIRECT); 367 | if (not this->indirect.obj->read(f,xref,last_tok)){ 368 | debug("IndirectObj %d %d : failed to read", indirect.major, indirect.minor); 369 | return false; 370 | } 371 | last_tok->get(f); 372 | if (last_tok->type!=TOK_ID || strcmp(last_tok->id,"endobj")!=0){ 373 | debug("IndirectObj %d %d : endobj keyword not found", indirect.major, indirect.minor); 374 | } 375 | return true; 376 | } 377 | // two int numbers and a TOK_ID next to it other than 'R' and 'obj' 378 | if (myfseek(f,fpos,SEEK_SET)==-1){ 379 | message(FATAL,"myfseek() error in file %s at line %d",__FILE__,__LINE__); 380 | } 381 | return true; 382 | } 383 | case TOK_REAL: 384 | { 385 | this->setType(PDF_OBJ_REAL); 386 | this->real = last_tok->real; 387 | return true; 388 | } 389 | case TOK_NAME: 390 | { 391 | this->setType(PDF_OBJ_NAME); 392 | this->name = strdup(last_tok->name); 393 | return true; 394 | } 395 | case TOK_STR: 396 | { 397 | this->setType(PDF_OBJ_STR); 398 | this->str = last_tok->str; 399 | //this->str.type = last_tok->str.type; 400 | return true; 401 | } 402 | case TOK_BDICT:// dictionary or stream obj 403 | { 404 | std::map new_dict; 405 | PdfObject *obj, *val=NULL, *len_obj=NULL; 406 | std::string key; 407 | int next_obj = DICT_KEY; 408 | while ((obj = new PdfObject())) { 409 | if (not obj->read(f, xref, last_tok)) { 410 | delete obj; 411 | if (last_tok->type==TOK_EDICT or last_tok->type==TOK_EOF){ 412 | if (val) 413 | new_dict[key] = val; 414 | break; 415 | } 416 | next_obj = DICT_KEY;// if could not read key or val, next object should be key 417 | } 418 | else if (next_obj==DICT_KEY){ 419 | if (obj->type==PDF_OBJ_NAME){ 420 | if (val){ 421 | new_dict[key] = val; 422 | val = NULL; 423 | } 424 | key = obj->name; 425 | next_obj = DICT_VAL; 426 | } 427 | else if (val) {// have read object, but it is not PdfName 428 | delete val;// previous val is invalid 429 | val = NULL; 430 | } 431 | delete obj; 432 | } 433 | else {// next_obj==DICT_VAL 434 | val = obj; 435 | next_obj = DICT_KEY; 436 | } 437 | } 438 | if (last_tok->type==TOK_EOF){// last token should be TOK_EDICT 439 | debug("Dictionary : ending bracket not found"); 440 | this->setType(PDF_OBJ_DICT); 441 | this->dict->setDict(new_dict); 442 | return false; 443 | } 444 | fpos = myftell(f); 445 | // if dict has stream keyword, then it is stream object 446 | if ( (not last_tok->get(f)) 447 | || last_tok->type!=TOK_ID 448 | || strcmp(last_tok->id, "stream")!=0) { 449 | this->setType(PDF_OBJ_DICT); 450 | this->dict->setDict(new_dict); 451 | myfseek(f, fpos, SEEK_SET); 452 | return true; 453 | } 454 | this->setType(PDF_OBJ_STREAM); 455 | this->stream->dict.setDict(new_dict); 456 | // if stream length is indirect obj, get length as integer 457 | if (new_dict.count("Length")==0){ 458 | debug("StreamObj : /Length key not found"); 459 | return false; 460 | } 461 | len_obj = new_dict["Length"]; 462 | if (len_obj->type==PDF_OBJ_INDIRECT_REF){ 463 | fpos = myftell(f); 464 | xref->readObject(f, len_obj->indirect.major); 465 | len_obj = xref->table[len_obj->indirect.major].obj; 466 | myfseek(f, fpos, SEEK_SET); 467 | } 468 | if (!isInt(len_obj)){ 469 | debug("StreamObj : invalid stream length obj type %d", len_obj->type); 470 | return false; 471 | } 472 | stream_len = len_obj->integer; 473 | this->stream->dict.deleteItem("Length"); 474 | // read stream after the newline 475 | switch (mygetc(f)){ 476 | case EOF: 477 | return false; 478 | case CHAR_CR: 479 | if (mygetc(f)!=CHAR_LF){ 480 | myungetc(f); 481 | } 482 | case CHAR_LF: 483 | break; 484 | default: 485 | myungetc(f); 486 | break; 487 | } 488 | this->stream->begin = myftell(f); 489 | read_stream: 490 | this->stream->len = stream_len; 491 | if (stream_len){ 492 | this->stream->stream = (char*) malloc(stream_len); 493 | if (this->stream->stream==NULL){ 494 | message(WARN,"StreamObj : failed to allocate memory of size %d", stream_len); 495 | this->stream->len = 0; 496 | return false; 497 | } 498 | if (myfread(this->stream->stream,1,stream_len,f)!=(size_t)stream_len){ 499 | message(WARN,"failed to read stream data of size %d at pos %d", 500 | stream_len, this->stream->begin); 501 | this->stream->len = 0; 502 | return false; 503 | } 504 | } 505 | 506 | if (not last_tok->get(f) 507 | || last_tok->type!=TOK_ID 508 | || strcmp(last_tok->id,"endstream")!=0)// may be wrong stream Length 509 | { 510 | stream_len = get_correct_stream_len(f, this->stream->begin); 511 | if (stream_len == -1){ 512 | debug("StreamObj : endstream keyword not found"); 513 | return false; 514 | } 515 | debug("StreamObj : fixing wrong value of stream length"); 516 | if (this->stream->stream){ 517 | free(this->stream->stream); 518 | this->stream->stream = NULL; 519 | } 520 | assert(myfseek(f, this->stream->begin, SEEK_SET)==0); 521 | goto read_stream; 522 | } 523 | return true; 524 | } 525 | case TOK_BARRAY: 526 | { 527 | this->setType(PDF_OBJ_ARRAY); 528 | // if start bracket is found, read until end bracked or EOF is reached 529 | PdfObject *item_obj; 530 | while ((item_obj = new PdfObject())){ 531 | if (item_obj->read(f,xref,last_tok)){ 532 | this->array->append(item_obj); 533 | } 534 | else { 535 | delete item_obj; 536 | if (last_tok->type==TOK_EARRAY || last_tok->type==TOK_EOF) 537 | break; 538 | } 539 | } 540 | if (last_tok->type!=TOK_EARRAY){ 541 | debug("Array : ending bracket not found"); 542 | return false; 543 | } 544 | return true; 545 | } 546 | case TOK_ID: 547 | { 548 | if (strcmp(last_tok->id,"null")==0){ 549 | this->setType(PDF_OBJ_NULL); 550 | return true; 551 | } 552 | if (strcmp(last_tok->id,"true")==0){ 553 | this->setType(PDF_OBJ_BOOL); 554 | this->boolean = true; 555 | return true; 556 | } 557 | if (strcmp(last_tok->id,"false")==0){ 558 | this->setType(PDF_OBJ_BOOL); 559 | this->boolean = false; 560 | return true; 561 | } 562 | debug("unknown id '%s'", last_tok->id); 563 | return false; 564 | } 565 | case TOK_EOF: 566 | case TOK_EARRAY: 567 | case TOK_EDICT: 568 | case TOK_UNKNOWN: 569 | default: 570 | return false; 571 | } 572 | } 573 | return false; 574 | } 575 | 576 | int 577 | PdfObject:: write (FILE * f) 578 | { 579 | int ret_val = -1; 580 | 581 | switch (this->type) 582 | { 583 | case PDF_OBJ_BOOL: 584 | if (this->boolean){ 585 | ret_val = fprintf(f, "true"); 586 | } 587 | else { 588 | ret_val = fprintf(f, "false"); 589 | } 590 | return ret_val<0?ret_val:0; 591 | case PDF_OBJ_INT: 592 | ret_val = fprintf(f, "%d", this->integer); 593 | return ret_val<0?ret_val:0; 594 | case PDF_OBJ_REAL: 595 | { 596 | ret_val = fprintf(f, "%s", double2str(this->real).c_str()); 597 | return ret_val<0?ret_val:0; 598 | } 599 | case PDF_OBJ_STR: 600 | ret_val = fwrite(this->str.data, this->str.len, 1, f); 601 | return ret_val<0?ret_val:0; 602 | case PDF_OBJ_NAME: 603 | ret_val = fprintf(f, "/%s", this->name); 604 | return ret_val<0?ret_val:0; 605 | case PDF_OBJ_ARRAY: 606 | return this->array->write(f); 607 | case PDF_OBJ_DICT: 608 | return this->dict->write(f); 609 | case PDF_OBJ_STREAM: 610 | return this->stream->write(f); 611 | case PDF_OBJ_NULL: 612 | fprintf(f, "null"); 613 | return 0; 614 | case PDF_OBJ_INDIRECT: 615 | fprintf(f, "%d %d obj\n", this->indirect.major, this->indirect.minor); 616 | this->indirect.obj->write(f); 617 | fprintf(f, "\nendobj\n"); 618 | return 0; 619 | case PDF_OBJ_INDIRECT_REF: 620 | fprintf(f, "%d %d R", this->indirect.major, this->indirect.minor); 621 | return 0; 622 | default: 623 | assert(0); 624 | } 625 | return 0; 626 | } 627 | 628 | int 629 | PdfObject:: copyFrom (PdfObject *src_obj){ 630 | // create deep copy of all objects 631 | this->setType(src_obj->type); 632 | switch (src_obj->type){ 633 | case PDF_OBJ_BOOL: 634 | this->boolean = src_obj->boolean; 635 | return true; 636 | case PDF_OBJ_INT: 637 | this->integer = src_obj->integer; 638 | return true; 639 | case PDF_OBJ_REAL: 640 | this->real = src_obj->real; 641 | return true; 642 | case PDF_OBJ_STR: 643 | str.len = src_obj->str.len; 644 | str.data = (char*) malloc2( str.len+1); 645 | memcpy(str.data, src_obj->str.data, str.len+1); 646 | return true; 647 | case PDF_OBJ_NAME: 648 | this->name = strdup(src_obj->name); 649 | assert(this->name!=NULL); 650 | return true; 651 | case PDF_OBJ_ARRAY: 652 | for (PdfObject *item : *src_obj->array){ 653 | PdfObject *new_item = new PdfObject(); 654 | new_item->copyFrom(item); 655 | this->array->append(new_item); 656 | } 657 | return true; 658 | case PDF_OBJ_DICT: 659 | for (auto it : *src_obj->dict){ 660 | PdfObject *new_obj = new PdfObject(); 661 | new_obj->copyFrom(it.second); 662 | this->dict->add(it.first, new_obj); 663 | } 664 | return true; 665 | case PDF_OBJ_STREAM: 666 | this->stream->len = src_obj->stream->len; 667 | // copy stream dictionary recursively 668 | for (auto it : src_obj->stream->dict){ 669 | PdfObject *new_obj = new PdfObject(); 670 | new_obj->copyFrom(it.second); 671 | this->stream->dict.add(it.first, new_obj); 672 | } 673 | if (src_obj->stream->len){ 674 | this->stream->stream = (char*) malloc2(src_obj->stream->len); 675 | memcpy(this->stream->stream, src_obj->stream->stream, src_obj->stream->len); 676 | } 677 | return true; 678 | case PDF_OBJ_INDIRECT: 679 | this->indirect.major = src_obj->indirect.major; 680 | this->indirect.minor = src_obj->indirect.minor; 681 | this->indirect.obj = new PdfObject(); 682 | this->indirect.obj->copyFrom(src_obj->indirect.obj); 683 | return true; 684 | case PDF_OBJ_INDIRECT_REF: 685 | this->indirect.major = src_obj->indirect.major; 686 | this->indirect.minor = src_obj->indirect.minor; 687 | return true; 688 | case PDF_OBJ_NULL: 689 | return true; 690 | default: 691 | assert(0); 692 | } 693 | return true; 694 | } 695 | 696 | void PdfObject:: clear() 697 | { 698 | switch (type) 699 | { 700 | case PDF_OBJ_STR: 701 | free(str.data); 702 | break; 703 | case PDF_OBJ_NAME: 704 | free(name); 705 | break; 706 | case PDF_OBJ_ARRAY: 707 | array->deleteItems(); 708 | delete array; 709 | break; 710 | case PDF_OBJ_DICT: 711 | dict->deleteItems(); 712 | delete dict; 713 | break; 714 | case PDF_OBJ_STREAM: 715 | delete stream; 716 | break; 717 | case PDF_OBJ_INDIRECT: 718 | delete indirect.obj; 719 | break; 720 | default: 721 | break; 722 | } 723 | this->type = PDF_OBJ_UNKNOWN; 724 | } 725 | 726 | PdfObject:: ~PdfObject() { 727 | clear(); 728 | } 729 | 730 | 731 | // *********** -------------- Pdf ObjectTable ----------------- *********** 732 | int 733 | ObjectTable:: count() { 734 | return table.size(); 735 | } 736 | 737 | void 738 | ObjectTable:: expandToFit (size_t size) { 739 | if (size > table.size()) { 740 | ObjectTableItem item = {NULL,0,0,0,0,0,0}; 741 | table.resize(size, item); 742 | } 743 | } 744 | 745 | // take obj no. and get and indirect object using this table 746 | bool 747 | ObjectTable:: readObject(MYFILE *f, int major) 748 | { 749 | if (table[major].obj != NULL) return true;// already read 750 | // read object if nonfree object 751 | if (table[major].type==NONFREE_OBJ) 752 | { 753 | int offset = table[major].offset; 754 | // some bad xref table may have offset==0, or offset > file size 755 | if (offset==0 or myfseek(f, offset, SEEK_SET)){ 756 | debug("object %d : invalid offset %d", major, offset); 757 | goto fail; 758 | } 759 | PdfObject obj; 760 | if (!obj.read(f, this, NULL) or obj.type!=PDF_OBJ_INDIRECT){ 761 | debug("object %d : failed to parse object", major); 762 | goto fail; 763 | } 764 | if (obj.indirect.major!=major || obj.indirect.minor!=table[major].minor){ 765 | debug("object %d : mismatched obj_no %d or gen_no %d", obj.indirect.major, obj.indirect.minor); 766 | } 767 | table[major].obj = obj.indirect.obj; 768 | obj.type = PDF_OBJ_UNKNOWN;// this is to prevent obj.indirect.obj from being deleted 769 | } 770 | // read object if compressed nonfree object 771 | else if (table[major].type==COMPRESSED_OBJ) { 772 | // this object is inside a object stream. 773 | int obj_stm_no = table[major].obj_stm; 774 | this->readObject(f, obj_stm_no); 775 | if (not isStream(table[obj_stm_no].obj)) { 776 | debug("object %d : invalid source obj stream %d", major, obj_stm_no); 777 | goto fail; 778 | } 779 | StreamObj *obj_stm = table[obj_stm_no].obj->stream; 780 | if (not obj_stm->decompress()) 781 | goto fail; 782 | int n = obj_stm->dict["N"]->integer; // number of objects in this stream 783 | int first = obj_stm->dict["First"]->integer;// offset of first member inside stream 784 | // open stream as file, parse and get all objects inside it 785 | // stream contains : obj_no1 offset1 obj_no2 offset2 ... obj_1 obj2 ... 786 | MYFILE *file = streamopen(obj_stm->stream, obj_stm->len); 787 | Token tok; 788 | for (int i=0; iread(file, this, NULL)){ 799 | debug("compressed obj %d : failed to read", obj_no); 800 | new_obj->type = PDF_OBJ_NULL; 801 | } 802 | table[obj_no].obj = new_obj; 803 | myfseek(file, last_seek, SEEK_SET); 804 | } 805 | myfclose(file); 806 | // the object stream is no longer required, as we have loaded all objects inside it 807 | delete table[obj_stm_no].obj; 808 | table[obj_stm_no].obj = NULL; 809 | table[obj_stm_no].type = FREE_OBJ; 810 | } 811 | return true; 812 | fail: 813 | table[major].obj = new PdfObject(); 814 | table[major].obj->type = PDF_OBJ_NULL; 815 | return false; 816 | } 817 | 818 | // read all objects after loading xref table 819 | void ObjectTable:: readObjects(MYFILE *f) 820 | { 821 | // at first load nonfree objects and then decompress object streams 822 | for (size_t i=1; itable[i].type); 824 | switch (table[i].type) { 825 | case FREE_OBJ: 826 | break; 827 | case NONFREE_OBJ: 828 | readObject(f, i); 829 | break; 830 | case COMPRESSED_OBJ: 831 | readObject(f, i);// here obj has been decompressed and read 832 | table[i].type = NONFREE_OBJ; 833 | break; 834 | default: 835 | debug("obj_table item %d : invalid obj type", i); 836 | } 837 | } 838 | } 839 | 840 | int getXrefType(MYFILE *f) 841 | { 842 | char line[LLEN]; 843 | skipspace(f); 844 | long fpos = myftell(f); 845 | if (myfgets(line, LLEN, f)==NULL){ 846 | return XREF_INVALID; 847 | } 848 | if (starts(line, "xref")) { 849 | myfseek(f, fpos, SEEK_SET); 850 | return XREF_TABLE; 851 | } 852 | // if it is indirect obj, then it is xref stream 853 | myfseek(f, fpos, SEEK_SET); 854 | Token tok; 855 | if (tok.get(f) && tok.type==TOK_INT && tok.get(f) && tok.type==TOK_INT 856 | && tok.get(f) && tok.type==TOK_ID){ 857 | myfseek(f, fpos, SEEK_SET); 858 | return XREF_STREAM; 859 | } 860 | tok.freeData(); 861 | return XREF_INVALID; 862 | } 863 | 864 | bool ObjectTable:: read (MYFILE *f, size_t xref_pos) 865 | { 866 | size_t pos=0; 867 | int len=0, object_id=0, object_count=0; 868 | char line[LLEN]; 869 | ObjectTableItem *elm; 870 | 871 | if (myfseek(f, xref_pos, SEEK_SET)==-1){ 872 | return false; 873 | } 874 | // bad pdf may contain a newline before 'xref' 875 | skipspace(f); 876 | if (myfgets(line,LLEN,f)==NULL){ 877 | return false; 878 | } 879 | if (!starts(line, "xref")) { 880 | return false; 881 | } 882 | //FILE *fd = fopen("xref", "wb"); 883 | while ((pos = myftell(f)) && myfgets(line,LLEN,f)!=NULL){ 884 | char *entry = line; 885 | while (isspace(*entry)) // fixes for leading spaces in xref table 886 | entry++; 887 | len = strlen(entry)-1; 888 | if (len==-1) continue; // skip empty lines 889 | while (len >= 0 && isspace((unsigned char)(entry[len]))){ 890 | entry[len] = 0; 891 | --len; 892 | } 893 | if (strlen(entry)==XREF_ENT_LEN){ 894 | int field1, field2; 895 | char obj_type; 896 | if (sscanf(entry,"%d %d %c", &field1, &field2, &obj_type)!=3){ 897 | break; 898 | } 899 | //fprintf(fd, "%s\n", entry); 900 | elm = &(this->table[object_id]); 901 | if (elm->type==0){ // skip if already set by next xreftable 902 | elm->major = object_id; 903 | elm->type = obj_type=='f'? FREE_OBJ : NONFREE_OBJ; 904 | elm->offset = field1; 905 | elm->minor = field2; 906 | } 907 | object_id++; 908 | object_count--; 909 | } 910 | else { 911 | int object_begin_tmp, object_count_tmp; 912 | if (sscanf(entry,"%d %d", &object_begin_tmp, &object_count_tmp)!=2){ 913 | myfseek(f, pos, SEEK_SET);// seek before trailer keyword 914 | break; 915 | } 916 | object_id = object_begin_tmp; 917 | object_count = object_count_tmp; 918 | this->expandToFit(object_begin_tmp + object_count); 919 | } 920 | } 921 | //fclose(fd); 922 | if (object_count!=0){ 923 | return false; 924 | } 925 | // in some pdf table size is found to be 0 926 | if (table.size()>0 and table[0].type!=FREE_OBJ){// obj 0 is nonfree in bad xref tables 927 | debug("obj no 0 is not free"); 928 | table[0].type = FREE_OBJ;// obj 0 is always free 929 | table[0].minor = 65535; // and it has maximum gen id 930 | table[0].offset = 0; 931 | } 932 | return true; 933 | } 934 | 935 | // from PDF 1.5 the xreftable can be a stream in an indirect object. 936 | // the dictionary of stream is the trailer dictionary. 937 | // essential keys : Type, Size and W . Optional keys : Index, Prev 938 | bool ObjectTable:: read (PdfObject *stream, PdfObject *p_trailer) 939 | { 940 | //FILE *fd; 941 | //fd = fopen("xref", "wb"); 942 | if (not stream->stream->decompress()) 943 | return false; 944 | // table_size is the max object number + 1 945 | int table_size = p_trailer->dict->get("Size")->integer; 946 | this->expandToFit(table_size); 947 | // split stream into table, W parameter is array of length 3 948 | ArrayObj *w_arr_obj = p_trailer->dict->get("W")->array; 949 | int w_arr[3]; 950 | for (int i=0; i<3; ++i) { 951 | w_arr[i] = w_arr_obj->at(i)->integer; 952 | } 953 | int row_len = w_arr[0] + w_arr[1] + w_arr[2]; 954 | // Index is array of pairs of integers. Each pair has obj number and obj count 955 | PdfObject *index = p_trailer->dict->get("Index"); 956 | if (index==NULL) { 957 | index = new PdfObject(); 958 | char s[24]; 959 | snprintf(s, 23, "[ 0 %d ]", table_size); 960 | assert( index->readFromString(s) ); 961 | p_trailer->dict->add("Index", index); 962 | } 963 | auto item = index->array->begin(); 964 | for (int i=0; item != index->array->end(); item++) { 965 | int first = (*item)->integer; 966 | item++; 967 | int count = (*item)->integer; 968 | for (int major=first; majortable[major].type!=FREE_OBJ){//skip when already set by next xref table 970 | i+=row_len; 971 | continue; 972 | } 973 | char *row = stream->stream->stream + i; 974 | int field1 = w_arr[0] ? arr2int(row, w_arr[0]) : 1;// this field may be absent 975 | int field2 = arr2int(row+w_arr[0], w_arr[1]); 976 | int field3 = w_arr[2] ? arr2int(row+w_arr[0]+w_arr[1], w_arr[2]) : 0; 977 | //fprintf(fd, "%d %d %d %d\n", major, field1, field2, field3); 978 | ObjectTableItem *elm = &(this->table[major]); 979 | elm->major = major; 980 | elm->type = field1; 981 | switch (field1) { 982 | case FREE_OBJ: 983 | elm->next_free = field2; 984 | elm->minor = field3; 985 | break; 986 | case NONFREE_OBJ: 987 | elm->offset = field2; 988 | elm->minor = field3; 989 | break; 990 | case COMPRESSED_OBJ: 991 | elm->obj_stm = field2;// minor=0 992 | elm->index = field3; 993 | break; 994 | default: 995 | break; 996 | } 997 | i += row_len; 998 | } 999 | } 1000 | //fflush(fd); 1001 | //fclose(fd); 1002 | if (table[0].type!=FREE_OBJ){//in some bad xref tables 1003 | debug("obj no 0 is not free"); 1004 | table[0].type = FREE_OBJ; 1005 | table[0].minor = 65535; 1006 | table[0].offset = 0; 1007 | } 1008 | return true; 1009 | } 1010 | 1011 | int ObjectTable:: addObject (PdfObject *obj) 1012 | { 1013 | int major = table.size(); 1014 | ObjectTableItem item = {NULL,0,0,0,0,0,0}; 1015 | table.resize(major+1, item); 1016 | 1017 | table[major].major = major; 1018 | table[major].type = NONFREE_OBJ; 1019 | table[major].obj = obj; 1020 | return major; 1021 | } 1022 | 1023 | PdfObject* ObjectTable:: getObject(int major, int minor) 1024 | { 1025 | if (major<(int)table.size() && minor==table[major].minor) 1026 | return table[major].obj; 1027 | debug("warning : could not get object (%d,%d) from ObjectTable", major,minor); 1028 | return NULL; 1029 | } 1030 | 1031 | void ObjectTable:: writeObjects (FILE *f) 1032 | { 1033 | for (size_t i=1; iwrite(f)<0){ 1043 | message(FATAL,"writeObjects() : I/O error"); 1044 | } 1045 | if (fprintf(f,"\nendobj\n")<0){ 1046 | message(FATAL,"writeObjects() : I/O error"); 1047 | } 1048 | break; 1049 | default: 1050 | assert(0); 1051 | } 1052 | } 1053 | } 1054 | 1055 | void ObjectTable:: writeXref (FILE *f) 1056 | { 1057 | fprintf(f, "xref\n%d %d\n", 0, (int)table.size()); 1058 | for (size_t i=0; i=0 && index<(int)table.size()); 1068 | return table[index]; 1069 | } 1070 | 1071 | // *********** ------------- Token Parser ----------------- *********** 1072 | 1073 | // unlike c_str() this is not null terminated string 1074 | typedef struct { 1075 | char *buff; 1076 | size_t len;// content size 1077 | size_t buff_size; 1078 | } mystring; 1079 | 1080 | static mystring mystring_new() 1081 | { 1082 | mystring str = {NULL, 0, 16}; 1083 | str.buff = (char*) malloc2(str.buff_size); 1084 | return str; 1085 | } 1086 | 1087 | static void mystring_add_char(mystring *str, char c) 1088 | { 1089 | if (str->buff_size == str->len){ 1090 | str->buff_size *= 2; 1091 | str->buff = (char*) realloc(str->buff, str->buff_size); 1092 | if (str->buff==NULL){ 1093 | message(FATAL, "realloc() failed !"); 1094 | } 1095 | } 1096 | str->buff[str->len] = c; 1097 | str->len++; 1098 | } 1099 | 1100 | 1101 | Token:: Token() { 1102 | type = TOK_UNKNOWN; 1103 | } 1104 | 1105 | bool 1106 | Token:: get (MYFILE * f) 1107 | { 1108 | int c, minus=0, parenthes, number; 1109 | double real_number, frac; 1110 | // skip whitespace characters 1111 | int newline = 0; 1112 | while (1){ 1113 | c = mygetc(f); 1114 | switch (c){ 1115 | case EOF: 1116 | this->type = TOK_EOF; 1117 | return true; 1118 | // white space 1119 | case CHAR_FF: 1120 | case CHAR_SP: 1121 | case CHAR_TAB: 1122 | newline = 0; 1123 | break; 1124 | // new line 1125 | case CHAR_LF: 1126 | case CHAR_CR: 1127 | newline = 1; 1128 | break; 1129 | default: 1130 | goto end_wh_sp; 1131 | } 1132 | } 1133 | end_wh_sp: 1134 | this->new_line = newline; 1135 | switch (c){ 1136 | case '0': 1137 | case '1': 1138 | case '2': 1139 | case '3': 1140 | case '4': 1141 | case '5': 1142 | case '6': 1143 | case '7': 1144 | case '8': 1145 | case '9': 1146 | case '-': 1147 | case '+': 1148 | case '.': 1149 | if (c!='+' && c!='-'){// digit 1150 | number = c-'0'; 1151 | this->sign = 0; 1152 | } 1153 | else { 1154 | number = 0; 1155 | switch (c) { 1156 | case '+': 1157 | this->sign = 1; 1158 | break; 1159 | case '-': 1160 | minus = -1; 1161 | this->sign = -1; 1162 | break; 1163 | } 1164 | } 1165 | if (c=='.'){ // number beginning with '.' eg - .21 1166 | goto real_num; 1167 | } 1168 | while ((c=mygetc(f))!=EOF && isdigit(c)){ 1169 | number = number*10+(c-'0'); 1170 | } 1171 | switch (c){ 1172 | // white spaces 1173 | case CHAR_FF: 1174 | case CHAR_SP: 1175 | case CHAR_TAB: 1176 | case CHAR_LF: 1177 | case CHAR_CR: 1178 | default: 1179 | myungetc(f); 1180 | case EOF: 1181 | this->type = TOK_INT; 1182 | this->integer = number * ((minus==-1)?-1:1); 1183 | return true; 1184 | case '.': 1185 | break; 1186 | } 1187 | real_num: 1188 | real_number = number; 1189 | frac = 10; 1190 | while ((c=mygetc(f))!=EOF && isdigit(c)){ 1191 | real_number = real_number + (c-'0')/frac; 1192 | frac = frac * 10; 1193 | } 1194 | switch(c){ 1195 | /*bily znak*/ 1196 | case CHAR_FF: 1197 | case CHAR_SP: 1198 | case CHAR_TAB: 1199 | // new line 1200 | case CHAR_LF: 1201 | case CHAR_CR: 1202 | case ']': 1203 | case '>': 1204 | case '/': 1205 | myungetc(f); 1206 | case EOF: 1207 | this->type = TOK_REAL; 1208 | this->real = real_number * ((minus==-1)?-1:1); 1209 | return true; 1210 | default: 1211 | this->type = TOK_UNKNOWN; 1212 | return false; 1213 | } 1214 | break; 1215 | 1216 | case '[': /*begin array*/ 1217 | this->type = TOK_BARRAY; 1218 | return true; 1219 | case ']': /*end array*/ 1220 | this->type = TOK_EARRAY; 1221 | return true; 1222 | case '<': /*hexadecimal string or dictionary*/ 1223 | { 1224 | if ((c=mygetc(f))=='<'){ 1225 | this->type = TOK_BDICT; 1226 | return true; 1227 | } 1228 | //this->str.type=PDF_STR_HEX; 1229 | mystring mstr = mystring_new(); 1230 | mystring_add_char(&mstr, '<'); 1231 | /*hexadecimal string*/ 1232 | while (c!=EOF && c!='>'){ 1233 | mystring_add_char(&mstr,c); 1234 | c = mygetc(f); 1235 | } 1236 | if (c=='>') { 1237 | mystring_add_char(&mstr, '>'); 1238 | char *buff = (char*) realloc(mstr.buff, mstr.len);// shrink buffer 1239 | if (buff){// even realloc to smaller size may fail 1240 | mstr.buff = buff; 1241 | } 1242 | this->type = TOK_STR; 1243 | this->str.len = mstr.len; 1244 | this->str.data = mstr.buff; 1245 | return true; 1246 | } 1247 | //EOF 1248 | free(mstr.buff); 1249 | this->type = TOK_UNKNOWN; 1250 | return false; 1251 | } 1252 | case '>': //end dictionary 1253 | { 1254 | if (mygetc(f)=='>'){ 1255 | this->type = TOK_EDICT; 1256 | return true; 1257 | } 1258 | this->type = TOK_UNKNOWN; 1259 | myungetc(f); 1260 | return false; 1261 | } 1262 | case '(': // literal string, it may contain balanced parentheses 1263 | { 1264 | parenthes = 0; 1265 | //this->str.type=PDF_STR_CHR; 1266 | mystring mstr = mystring_new(); 1267 | mystring_add_char(&mstr, '('); 1268 | while ((c=mygetc(f))!=EOF){ 1269 | switch(c){ 1270 | case '\\': 1271 | mystring_add_char(&mstr,c); 1272 | c = mygetc(f); 1273 | break; 1274 | case '(': 1275 | parenthes++; 1276 | break; 1277 | case ')': 1278 | if (parenthes==0){ 1279 | goto end_lit_str; 1280 | } 1281 | --parenthes; 1282 | break; 1283 | } 1284 | mystring_add_char(&mstr,c); 1285 | } 1286 | end_lit_str: 1287 | if (c==')') { 1288 | mystring_add_char(&mstr, ')'); 1289 | char *buff = (char*) realloc(mstr.buff, mstr.len);// shrink buffer 1290 | if (buff){ 1291 | mstr.buff = buff; 1292 | } 1293 | this->type = TOK_STR; 1294 | this->str.len = mstr.len; 1295 | this->str.data = mstr.buff; 1296 | return true; 1297 | } 1298 | // EOF 1299 | free(mstr.buff); 1300 | this->type = TOK_UNKNOWN; 1301 | return false; 1302 | } 1303 | case '/': //name object 1304 | { 1305 | int i=0; 1306 | while ((c=mygetc(f))!=EOF){ 1307 | switch(c){ 1308 | case CHAR_FF: 1309 | case CHAR_SP: 1310 | case CHAR_TAB: 1311 | case CHAR_LF: 1312 | case CHAR_CR: 1313 | case '<': 1314 | case '>': 1315 | case '{': 1316 | case '}': 1317 | case '/': 1318 | case '%': 1319 | case '(': 1320 | case ')': 1321 | case '[': 1322 | case ']': 1323 | myungetc(f); 1324 | goto end_name; 1325 | } 1326 | if (i+1name[i] = c; 1328 | ++i; 1329 | } 1330 | else { 1331 | break; 1332 | } 1333 | } 1334 | end_name: 1335 | this->name[i] = 0; 1336 | this->type = TOK_NAME; 1337 | return true; 1338 | } 1339 | case '%': //comment, skip characters to end of line, then find next token 1340 | while ((c=mygetc(f))!=EOF && c!=CHAR_LF && c!=CHAR_CR) 1341 | ; 1342 | if (c==EOF){ 1343 | this->type = TOK_UNKNOWN; 1344 | return true; 1345 | } 1346 | else { 1347 | myungetc(f); 1348 | return this->get(f); 1349 | } 1350 | break; 1351 | default: 1352 | int i=0; 1353 | do { 1354 | switch (c){ 1355 | case CHAR_FF: 1356 | case CHAR_SP: 1357 | case CHAR_TAB: 1358 | case CHAR_LF: 1359 | case CHAR_CR: 1360 | case '<': 1361 | case '>': 1362 | case '{': 1363 | case '}': 1364 | case '/': 1365 | case '%': 1366 | case '(': 1367 | case ')': 1368 | case '[': 1369 | case ']': 1370 | myungetc(f); 1371 | goto end_id; 1372 | } 1373 | if (i+1id[i] = c; 1375 | ++i; 1376 | } 1377 | else { 1378 | break; 1379 | } 1380 | } while ((c=mygetc(f))!=EOF); 1381 | end_id: 1382 | this->id[i] = 0; 1383 | this->type = TOK_ID; 1384 | return true; 1385 | } 1386 | return true; 1387 | } 1388 | 1389 | void 1390 | Token:: freeData() 1391 | { 1392 | switch (this->type) { 1393 | case TOK_STR: 1394 | free(this->str.data); 1395 | break; 1396 | default: 1397 | break; 1398 | } 1399 | } 1400 | 1401 | 1402 | 1403 | static int char2int(char input) 1404 | { 1405 | if(input >= '0' && input <= '9') 1406 | return input - '0'; 1407 | if(input >= 'A' && input <= 'F') 1408 | return input - 'A' + 10; 1409 | if(input >= 'a' && input <= 'f') 1410 | return input - 'a' + 10; 1411 | throw std::invalid_argument("Invalid input string"); 1412 | } 1413 | 1414 | // convert literal and hex pdfstring to normal string 1415 | std::string pdfstr2bytes(String str, int *str_type) 1416 | { 1417 | std::string out_str=""; 1418 | if (str.data[0]=='(' && str.data[str.len-1]==')') 1419 | { 1420 | *str_type = BYTE_STR; 1421 | 1422 | for (int i=1; i') 1464 | { 1465 | *str_type = HEX_STR; 1466 | // if no. of chars is odd, last char is assumed to be 0 1467 | if (str.len%2!=0) 1468 | str.data[str.len-1] = '0'; 1469 | 1470 | for (int i=1; i'); 1492 | } 1493 | else {//BYTE_STR 1494 | tmp_str.push_back('('); 1495 | for (unsigned int i=0; i