├── screenshots ├── fb-12.png └── fb-23.png ├── drmhovacui.c ├── fbhovacui.c ├── xhovacui.c ├── cairoio-drm.h ├── cairoio-fb.h ├── cairoio-x11.h ├── hovacui.conf ├── hovacui.desktop ├── hovacui.h ├── hovacui.conf.testing ├── vt.h ├── pdfannot.1 ├── cairofb.h ├── pdftextview ├── cairodrm.h ├── cairodrm-main.c ├── hovacui-main.c ├── Makefile ├── pdftoebook ├── vt.c ├── pdfrects.1 ├── pdfinteractive ├── pdfrecur.1 ├── README.md ├── pdftext.h ├── cairoui.h ├── cairofb.c ├── cairoio.h ├── textarea.txt ├── pdfrecur.c ├── cairoio-fb.c ├── pdftoroff.c ├── pdffit.1 ├── cairoui-main.c ├── pdfhscript ├── cairoio-drm.c ├── pdfrects-main.c ├── pdffit.c ├── white-support.txt ├── pdfrects.h ├── cairoio-x11.c ├── pdfannot.c ├── pdftoroff.1 ├── pdftext.c └── cairodrm.c /screenshots/fb-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgerwk/pdftoroff/HEAD/screenshots/fb-12.png -------------------------------------------------------------------------------- /screenshots/fb-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgerwk/pdftoroff/HEAD/screenshots/fb-23.png -------------------------------------------------------------------------------- /drmhovacui.c: -------------------------------------------------------------------------------- 1 | #include "cairoio-drm.h" 2 | #include "hovacui.h" 3 | 4 | /* 5 | * main 6 | */ 7 | int main(int argn, char *argv[]) { 8 | return hovacui(argn, argv, &cairodevicedrm); 9 | } 10 | -------------------------------------------------------------------------------- /fbhovacui.c: -------------------------------------------------------------------------------- 1 | #include "cairoio-fb.h" 2 | #include "hovacui.h" 3 | 4 | /* 5 | * main 6 | */ 7 | int main(int argn, char *argv[]) { 8 | return hovacui(argn, argv, &cairodevicefb); 9 | } 10 | -------------------------------------------------------------------------------- /xhovacui.c: -------------------------------------------------------------------------------- 1 | #include "cairoio-x11.h" 2 | #include "hovacui.h" 3 | 4 | /* 5 | * main 6 | */ 7 | int main(int argn, char *argv[]) { 8 | return hovacui(argn, argv, &cairodevicex11); 9 | } 10 | -------------------------------------------------------------------------------- /cairoio-drm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoio-drm.h 3 | */ 4 | 5 | #ifdef _CAIRO_DRM 6 | #else 7 | #define _CAIRO_DRM 8 | 9 | /* 10 | * the cairo device for drm 11 | */ 12 | extern struct cairodevice cairodevicedrm; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cairoio-fb.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoio-fb.h 3 | */ 4 | 5 | #ifdef _CAIRO_FB 6 | #else 7 | #define _CAIRO_FB 8 | 9 | /* 10 | * the cairo device for the framebuffer 11 | */ 12 | extern struct cairodevice cairodevicefb; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /cairoio-x11.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoio-x11.h 3 | */ 4 | 5 | #ifdef _CAIROIO_X11 6 | #else 7 | #define _CAIROIO_X11 8 | 9 | /* 10 | * the cairodevice for x11 11 | */ 12 | extern struct cairodevice cairodevicex11; 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /hovacui.conf: -------------------------------------------------------------------------------- 1 | # example hovacui configuration file 2 | # copy to $HOME/.config/hovacui/ and change as needed 3 | totalpages 4 | postsave pdftoroff -t selection-%d.pdf > selection-%d.txt 5 | script pdfhscript 'l[links]_s[save document]y[yank text]N[view notes]A[add notes]E[edit file]' 6 | -------------------------------------------------------------------------------- /hovacui.desktop: -------------------------------------------------------------------------------- 1 | [Desktop Entry] 2 | Name=Hovacui 3 | MimeType=application/pdf; 4 | GenericName=PDF Viewer 5 | GenericName[it]=Visualizzatore PDF 6 | Exec=hovacui %f 7 | Icon=text-x-generic 8 | Type=Application 9 | Terminal=false 10 | X-KDE-StartupNotify=false 11 | Categories=Graphics; 12 | -------------------------------------------------------------------------------- /hovacui.h: -------------------------------------------------------------------------------- 1 | /* 2 | * hovacui.h 3 | */ 4 | 5 | #ifdef _HOVACUI_H 6 | #else 7 | #define _HOVACUI_H 8 | 9 | #include "cairoio.h" 10 | 11 | /* 12 | * show a pdf file on an arbitrary cairo device 13 | */ 14 | int hovacui(int argn, char *argv[], struct cairodevice *cairodevice); 15 | 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /hovacui.conf.testing: -------------------------------------------------------------------------------- 1 | aspect 4:3 2 | minwidth 560 3 | notutorial 4 | noinitlabels 5 | totalpages 6 | clock 7 | postsave pdftoroff -t selection-%d.pdf > selection-%d.txt 8 | script pdfhscript 'l[links]_s[save document]y[yank text]N[view notes]A[add notes]E[edit file]0[test]1[test]2[test]3[test]' 9 | # nopagelabel 10 | # immediate 11 | # order quick 12 | # presentation 13 | # fifo fifoname 14 | # outfile boxes.txt 15 | -------------------------------------------------------------------------------- /vt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * vt.h 3 | * 4 | * vt switch handling 5 | */ 6 | 7 | #ifdef _VT_H 8 | #else 9 | #define _VT_H 10 | 11 | /* terminal is suspended (switched out) */ 12 | extern int vt_suspend; 13 | 14 | /* terminal needs redrawing */ 15 | extern int vt_redraw; 16 | 17 | /* setup virtual terminal for suspend and resume */ 18 | void vt_setup(void (*switcher)(int, void *), void *data); 19 | 20 | #endif 21 | 22 | -------------------------------------------------------------------------------- /pdfannot.1: -------------------------------------------------------------------------------- 1 | .TH pdfannot 1 "Sept. 28, 2019" 2 | 3 | . 4 | . 5 | . 6 | .SH NAME 7 | 8 | pdfannot - extract annotations and actions from PDF files 9 | 10 | . 11 | . 12 | . 13 | .SH SYNOPSIS 14 | 15 | .B pdfannot 16 | [\fI-t\fP] 17 | [\fI-w\fP] 18 | [\fI-a\fP] 19 | [\fI-l\fP] 20 | [\fI-d\fP] 21 | [\fI-h\fP] 22 | .I file.pdf 23 | [\fIpagenumber\fP] 24 | 25 | . 26 | . 27 | . 28 | .SH OPTIONS 29 | 30 | .TP 31 | .B -t 32 | output is text only 33 | .TP 34 | .B -w 35 | output is html 36 | .TP 37 | .B -a 38 | only output annotations 39 | .TP 40 | .B -l 41 | only output links 42 | .TP 43 | .B -d 44 | also output some text at destination of inner links 45 | .TP 46 | .B -h 47 | online help 48 | 49 | . 50 | . 51 | . 52 | .SH SEE ALSO 53 | 54 | .I pdftoroff(1) 55 | 56 | -------------------------------------------------------------------------------- /cairofb.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairofb.h 3 | * 4 | * a cairo context for drawing in a linux framebuffer 5 | */ 6 | 7 | #ifdef _CAIROFB_H 8 | #else 9 | #define _CAIROFB_H 10 | 11 | #include 12 | 13 | struct cairofb { 14 | /* public */ 15 | cairo_surface_t *surface; 16 | cairo_t *cr; 17 | int width; 18 | int height; 19 | 20 | /* private */ 21 | int dev; 22 | unsigned char *img; 23 | unsigned char *dbuf; 24 | int length; 25 | }; 26 | 27 | struct cairofb *cairofb_init(char *devname, int doublebuffering); 28 | void cairofb_clear(struct cairofb *cairofb, 29 | double red, double green, double blue); 30 | int cairofb_doublebuffering(struct cairofb *cairofb); 31 | void cairofb_flush(struct cairofb *cairofb); 32 | void cairofb_finish(struct cairofb *cairo); 33 | 34 | #endif 35 | 36 | -------------------------------------------------------------------------------- /pdftextview: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # view text of a pdf file 4 | # 5 | # pdfview [-h] [-w] [-d distance] file.pdf 6 | # 7 | # default distance is 15; lower values like 10 or 12 may be useful on 8 | # multiple-column pages or documents with headers or footers 9 | 10 | [ $# -lt 1 ] && echo "pdf file name missing" && exit 1 11 | FILTER=less 12 | while echo "$1" | grep -q '^-'; 13 | do 14 | [ "$1" = -h ] && FILTER=head && shift 15 | [ "$1" = -w ] && WAIT=read && shift 16 | [ "$1" = -d ] && DISTANCE="-d $2" && shift && shift 17 | done 18 | [ ! -f "$1" ] && echo "file $1 does not exist" && exit 1 19 | 20 | [ "$COLUMN" != "" ] && WIDTH=80 || \ 21 | WIDTH=$(stty -a | grep -o 'columns [0-9][0-9]*' | cut -d' ' -f2) 22 | 23 | pdftoroff -t -m 3 $DISTANCE "$1" | \ 24 | sed 's,^,_PAR_,' | \ 25 | fmt -s -w $WIDTH | \ 26 | sed 's,^_PAR_, ,' | \ 27 | $FILTER 28 | 29 | $WAIT 30 | 31 | -------------------------------------------------------------------------------- /cairodrm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairodrm.h 3 | * 4 | * a cairo context for drawing on the linux direct rendering infrastructure 5 | */ 6 | 7 | #ifdef _CAIRODRM_H 8 | #else 9 | #define _CAIRODRM_H 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #define CAIRODRM_DOUBLEBUFFERING 0x0001 16 | #define CAIRODRM_EXACT 0x0002 17 | 18 | struct cairodrm { 19 | /* public */ 20 | cairo_surface_t *surface; 21 | cairo_t *cr; 22 | int width; 23 | int height; 24 | 25 | /* private */ 26 | int dev; 27 | int handle; 28 | int buf_id; 29 | void *img; 30 | void *dbuf; 31 | int size; 32 | 33 | drmModeResPtr resptr; 34 | int *enabled; 35 | drmModeCrtcPtr *curr, *prev; 36 | }; 37 | 38 | struct cairodrm *cairodrm_init(char *devname, 39 | char *connectors, char *size, int flags); 40 | void cairodrm_switcher(struct cairodrm *cairodrm, int inout); 41 | void cairodrm_clear(struct cairodrm *cairofb, 42 | double red, double green, double blue); 43 | int cairodrm_doublebuffering(struct cairodrm *cairofb); 44 | void cairodrm_flush(struct cairodrm *cairodrm); 45 | void cairodrm_finish(struct cairodrm *cairo); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cairodrm-main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cairodrm-main.c 3 | * 4 | * testing program for cairodrm.{c,h} 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "cairodrm.h" 12 | 13 | int main(int argc, char *argv[]) { 14 | struct cairodrm *cairodrm; 15 | 16 | /* create a cairodrm */ 17 | 18 | cairodrm = cairodrm_init("/dev/dri/card0", argv[1], 19 | argc - 1 > 1 ? argv[2] : NULL, 0); 20 | if (cairodrm == NULL) 21 | exit(EXIT_FAILURE); 22 | 23 | /* draw something */ 24 | 25 | cairodrm_clear(cairodrm, 1.0, 1.0, 1.0); 26 | cairo_set_source_rgb(cairodrm->cr, 0.0, 1.0, 0.0); 27 | cairo_rectangle(cairodrm->cr, 28 | cairodrm->width - 100, cairodrm->height - 100, 100, 100); 29 | cairo_fill(cairodrm->cr); 30 | 31 | cairo_select_font_face(cairodrm->cr, "serif", 32 | CAIRO_FONT_SLANT_NORMAL, CAIRO_FONT_WEIGHT_BOLD); 33 | cairo_set_font_size(cairodrm->cr, 32.0); 34 | cairo_set_source_rgb(cairodrm->cr, 0.0, 0.0, 1.0); 35 | cairo_move_to(cairodrm->cr, 10.0, 50.0); 36 | cairo_show_text(cairodrm->cr, "Hello, world"); 37 | 38 | cairo_set_source_rgb(cairodrm->cr, 1.0, 0.0, 0.0); 39 | cairo_set_line_width(cairodrm->cr, 5.0); 40 | cairo_move_to(cairodrm->cr, 20.0, 10.0); 41 | cairo_line_to(cairodrm->cr, 220.0, 70.0); 42 | cairo_stroke(cairodrm->cr); 43 | 44 | /* flush */ 45 | 46 | cairodrm_flush(cairodrm); 47 | getchar(); 48 | 49 | /* finish */ 50 | 51 | cairodrm_finish(cairodrm); 52 | 53 | return EXIT_SUCCESS; 54 | } 55 | -------------------------------------------------------------------------------- /hovacui-main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "cairoio.h" 5 | #include "cairoio-fb.h" 6 | #include "cairoio-drm.h" 7 | #include "cairoio-x11.h" 8 | #include "hovacui.h" 9 | 10 | int main(int argn, char *argv[]) { 11 | int opt; 12 | struct cairodevice *cairodevice; 13 | int cargn; 14 | char **cargv; 15 | char *usage; 16 | 17 | /* collect usage of output devices */ 18 | 19 | usage = malloc(strlen(cairodevicefb.usage) + 1 + 20 | strlen(cairodevicedrm.usage) + 1 + 21 | strlen(cairodevicex11.usage) + 1); 22 | strcpy(usage, ""); 23 | // strcat(usage, cairodevicefb.usage); 24 | // strcat(usage, "\n"); 25 | strcat(usage, cairodevicedrm.usage); 26 | strcat(usage, "\n"); 27 | strcat(usage, cairodevicex11.usage); 28 | 29 | /* determine device */ 30 | 31 | cairodevice = NULL; 32 | if (getenv("DISPLAY")) 33 | cairodevice = &cairodevicex11; 34 | 35 | opterr = 0; 36 | cargv = malloc(argn * sizeof(char *)); 37 | 38 | optind = 1; 39 | cargn = argn; 40 | memcpy(cargv, argv, argn * sizeof(char *)); 41 | while (-1 != (opt = getopt(cargn, cargv, cairodevicedrm.options))) 42 | if (opt != '?') 43 | cairodevice = &cairodevicedrm; 44 | 45 | optind = 1; 46 | cargn = argn; 47 | memcpy(cargv, argv, argn * sizeof(char *)); 48 | while (-1 != (opt = getopt(cargn, cargv, cairodevicex11.options))) 49 | if (opt != '?') 50 | cairodevice = &cairodevicex11; 51 | 52 | free(cargv); 53 | 54 | if (cairodevice == NULL) { 55 | cairodevice = &cairodevicefb; 56 | cairodevice->usage = usage; 57 | } 58 | 59 | /* run hovacui */ 60 | 61 | opterr = 1; 62 | optind = 1; 63 | return hovacui(argn, argv, cairodevice); 64 | } 65 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROGS=pdftoroff pdffit pdfrects pdfrecur pdfannot \ 2 | hovacui fbhovacui drmhovacui xhovacui cairoui 3 | 4 | CFLAGS+=-g -Wall -Wextra -Wformat -Wformat-security 5 | CFLAGS+=${shell pkg-config --cflags poppler-glib} 6 | LDLIBS+=${shell pkg-config --libs poppler-glib} 7 | fbhovacui drmhovacui hovacui cairoui: LDLIBS+=\ 8 | ${shell pkg-config --libs ncurses || echo '' -lncurses -ltinfo} 9 | drmhovacui.o cairoio-drm.o cairodrm.o: CFLAGS+=\ 10 | ${shell pkg-config --cflags libdrm} 11 | drmhovacui hovacui cairodrm: LDLIBS+=${shell pkg-config --libs libdrm} 12 | xhovacui hovacui cairoui: LDLIBS+=${shell pkg-config --libs x11} 13 | 14 | all: ${PROGS} 15 | 16 | install: all 17 | mkdir -p ${DESTDIR}/etc 18 | cp hovacui.conf ${DESTDIR}/etc 19 | mkdir -p ${DESTDIR}/usr/bin 20 | cp hovacui pdfhscript pdfinteractive ${DESTDIR}/usr/bin 21 | cp pdftoroff pdftoebook ${DESTDIR}/usr/bin 22 | cp pdffit pdfrects pdfrecur pdfannot ${DESTDIR}/usr/bin 23 | mkdir -p ${DESTDIR}/usr/share/man/man1 24 | cp hovacui.1 pdftoroff.1 ${DESTDIR}/usr/share/man/man1 25 | cp pdffit.1 pdfrects.1 pdfrecur.1 ${DESTDIR}/usr/share/man/man1 26 | 27 | pdftoroff: pdftext.o 28 | pdfrects: pdfrects-main.o 29 | pdftoroff pdffit pdfrects pdfrecur: pdfrects.o 30 | hovacui fbhovacui drmhovacui xhovacui: pdfrects.o 31 | fbhovacui: cairofb.o vt.o cairoio-fb.o cairoui.o hovacui.o fbhovacui.o 32 | drmhovacui: cairodrm.o vt.o cairoio-drm.o cairoui.o hovacui.o drmhovacui.o 33 | xhovacui: cairoio-x11.o cairoui.o hovacui.o xhovacui.o 34 | hovacui: cairofb.o cairodrm.o vt.o cairoio-x11.o cairoio-fb.o cairoio-drm.o \ 35 | cairoui.o hovacui.o hovacui-main.o 36 | cairoui: cairofb.o vt.o cairoio-fb.o cairoio-x11.o cairoui.o cairoui-main.o 37 | cairodrm: cairodrm-main.o 38 | 39 | clean: 40 | rm -f *.o ${PROGS} cairodrm cairoui-out.txt hovacui-out.txt 41 | 42 | -------------------------------------------------------------------------------- /pdftoebook: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # reformat a pdf to a smaller page by extracting text and compiling with groff 4 | # 5 | # -h for inline help 6 | # 7 | # to select only a region of each page, either: 8 | # pdftoebook -p '-m 2 -b [2,3-460,682]' file.pdf 9 | # pdftoebook -p '-m 3 -b [2,3-460,682]' file.pdf 10 | # to obtain the coordinates, press key 'b' in hovacui to save the current box 11 | # in hovacui-boxes.txt; enlarge slightly (~5.0) since only characters that fall 12 | # completely in the box are included, and the current box may lack for example 13 | # tall letters in the first line 14 | 15 | # options 16 | 17 | WIDTH=200 18 | HEIGHT=250 19 | MARGIN=5 20 | RECUR=-n 21 | 22 | while getopts "?w:h:m:p:" opt; 23 | do 24 | case $opt in 25 | w) WIDTH=$OPTARG;; 26 | h) HEIGHT=$OPTARG;; 27 | m) MARGIN=$OPTARG;; 28 | n) RECUR="";; 29 | p) CONVOPTS="$OPTARG";; 30 | *) echo -en "usage:\n\tpdftoebook [-w width] [-h height] " 31 | echo -e "[-m margin] in.pdf [out.pdf]" 32 | echo -e "\t\t-w width\twidth in points, default 200" 33 | echo -e "\t\t-w height\theight in points, default 250" 34 | echo -e "\t\t-w margin\tmargin in points, default 5" 35 | echo -e "\t\t-p options\tpass options to pdftoroff" 36 | exit 1;; 37 | esac 38 | done 39 | 40 | # input and output file names 41 | 42 | shift $((OPTIND - 1)) 43 | [ $# -lt 1 ] && echo "pdf file missing" && exit 1 44 | IN="$1" 45 | shift 1 46 | OUT=$(basename "$IN" .pdf)-ebook.pdf 47 | [ $# -gt 0 ] && OUT="$1" 48 | 49 | echo "$IN -> $OUT ${WIDTH}x$HEIGHT +$MARGIN" 50 | 51 | # calculate line and page 52 | 53 | LINE=$(($WIDTH-$MARGIN*2)) 54 | PAGE=$(($HEIGHT-$MARGIN*2)) 55 | 56 | # extract text from pdf and recompile with groff 57 | 58 | { 59 | cat < "$OUT" 73 | 74 | -------------------------------------------------------------------------------- /vt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * vt.c 3 | * 4 | * vt switch handling 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | /* 16 | * whether the terminal is switched out or not 17 | */ 18 | volatile int vt_suspend; 19 | 20 | /* 21 | * terminal needs redrawing 22 | */ 23 | volatile int vt_redraw; 24 | 25 | /* 26 | * called when the terminal switches in (1) or out (0) 27 | */ 28 | void *switcherdata; 29 | void (*switcher)(int, void *); 30 | void noswitcher(int inout, void *data) { 31 | (void) inout; 32 | (void) data; 33 | } 34 | 35 | /* 36 | * signal handler: leave the virtual terminal 37 | */ 38 | void sigusr1(int s) { 39 | (void) s; 40 | switcher(0, switcherdata); 41 | ioctl(STDIN_FILENO, VT_RELDISP, 1); 42 | vt_suspend = 1; 43 | } 44 | 45 | /* 46 | * signal handler: enter the virtual terminal 47 | */ 48 | void sigusr2(int s) { 49 | (void) s; 50 | switcher(1, switcherdata); 51 | ioctl(STDIN_FILENO, VT_RELDISP, VT_ACKACQ); 52 | vt_suspend = 0; 53 | vt_redraw = 1; 54 | } 55 | 56 | /* 57 | * setup virtual terminal for suspend and resume 58 | */ 59 | void vt_setup(void (*switcherfunction)(int, void *), void *data) { 60 | struct vt_mode vtmode; 61 | int ttyfd; 62 | int res; 63 | 64 | switcher = switcherfunction ? switcherfunction : noswitcher; 65 | switcherdata = data; 66 | 67 | vt_suspend = 0; 68 | vt_redraw = 0; 69 | signal(SIGUSR1, sigusr1); 70 | signal(SIGUSR2, sigusr2); 71 | 72 | ttyfd = STDIN_FILENO; 73 | 74 | res = ioctl(ttyfd, VT_GETMODE, &vtmode); 75 | if (res == -1) { 76 | perror("VT_GETMODE"); 77 | return; 78 | } 79 | vtmode.mode = VT_PROCESS; 80 | vtmode.relsig = SIGUSR1; 81 | vtmode.acqsig = SIGUSR2; 82 | res = ioctl(ttyfd, VT_SETMODE, &vtmode); 83 | if (res == -1) { 84 | perror("VT_SETMODE"); 85 | return; 86 | } 87 | 88 | // tell the kernel not to restore the text on page 89 | // uncomment when program is finished 90 | // res = ioctl(tty, KDSETMODE, KD_GRAPHICS); 91 | // check res 92 | } 93 | 94 | -------------------------------------------------------------------------------- /pdfrects.1: -------------------------------------------------------------------------------- 1 | .TH pdfrects 1 "November 1, 2017" 2 | . 3 | . 4 | . 5 | .SH NAME 6 | pdfrects - bounding box or text area of the pages of a pdf file 7 | . 8 | . 9 | . 10 | .SH SYNOPSYS 11 | 12 | .PD 0 13 | .TP 9 14 | .B pdfrects 15 | [\fB-f\fP \fIpage\fP] [\fB-l\fP \fIpage\fP] 16 | [\fB-p\fP|\fB-t\fP] [\fB-b\fP] [\fB-e\fP \fIdirection\fP] 17 | .IP 18 | [\fB-d\fP \fIdistance\fP] 19 | [\fB-n\fP [\fB-s\fP \fIn\fP] [\fB-i\fP]] 20 | .IP 21 | [\fB-a\fP] [\fB-r\fP \fIlevel\fP] [\fB-h\fP] \fIfile.pdf\fP 22 | .PD 23 | 24 | .SH DESCRIPTION 25 | 26 | .B pdfrects 27 | finds either the bounding box or the area of the text in the pages of a pdf 28 | file. The first is the smallest rectangle encosing all text. The second is a 29 | list of rectangles enclosing the text as tightly as possible while obeying a 30 | minimal distance between them (15 by default). 31 | 32 | These rectangles are drawn on the pages of the output \fIfile-boxes.pdf\fP in 33 | random colors, and printed on stdout in YAML format. 34 | 35 | .SH OPTIONS 36 | 37 | .TP 38 | \fB-f\fP \fIpage\fP 39 | first page 40 | .TP 41 | \fB-f\fP \fIpage\fP 42 | last page 43 | .TP 44 | .B -p 45 | use the painted area instead of the textarea; it is obtained by dividing the 46 | page into a grid of squares and checking which ones contain something 47 | .TP 48 | .B -t 49 | use the rows instead of the textarea; they are obtained by joining 50 | vertically-overlapping characters 51 | .TP 52 | .B -b 53 | bounding box; the default is the textarea 54 | .TP 55 | .B -m 56 | only the largest block of text in each page 57 | .TP 58 | .BI -e " direction 59 | text extents in the given direction (\fIhorizontal\fP or \fIvertical\fP); this 60 | is like the textarea, but boxes that overlap horizontally or vertically are 61 | merged; the resulting set of rectangles tells how much horizontal or vertical 62 | space text takes 63 | .TP 64 | \fB-d\fP \fIdistance\fP 65 | the minimal distance between boxes in the textarea; a lower values implies a 66 | larger number of rectangles 67 | .TP 68 | .B -n 69 | number boxes in the pdf output 70 | .TP 71 | .B -i 72 | print the box numbers inside the boxes, rather than on their side 73 | .TP 74 | \fB-s\fP \fIn\fP 75 | sort boxes by method \fIn\fP: 0=quick, 1=twostep, 2=char 76 | .TP 77 | .B -a 78 | test the box addition function: a 100x100 box is added to a location free of 79 | text 80 | .TP 81 | \fB-r\fP \fIlevel\fP 82 | test the textarea algorithm 83 | .TP 84 | .B -h 85 | help 86 | . 87 | . 88 | . 89 | .SH SEE ALSO 90 | \fBpdffit\fP(\fI1\fP), 91 | \fBpdftoroff\fP(\fI1\fP) 92 | 93 | -------------------------------------------------------------------------------- /pdfinteractive: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # operate on a point or rectangle of a pdf file 4 | # emphasize 5 | # clip all pages 6 | # erase rectangle (draw in white) 7 | # cover rectangle (draw in black) 8 | # write text 9 | 10 | #SIMULATE=echo 11 | 12 | # tty settings 13 | 14 | clear 15 | PREVSTTY=$(stty -g) 16 | onexit() { 17 | setterm --cursor off 18 | stty $PREVSTTY 19 | } 20 | trap onexit EXIT 21 | stty icrnl onlcr echo icanon 22 | setterm --cursor on 23 | 24 | # arguments 25 | 26 | echo "$@" 27 | KEY="$1" 28 | ID="$2" 29 | FILE="$3" 30 | PAGE="$4" 31 | BOXNUMBER="$5" 32 | SCROLLX="$6" 33 | SCROLLY="$7" 34 | TOTPAGES="$8" 35 | TEXTBOX="$9" 36 | DEST="${10}" 37 | RECT="${11}" 38 | 39 | [ $RECT = '[]' ] && RECT="$TEXTBOX" 40 | echo $RECT 41 | 42 | # coordinates 43 | 44 | X0=$(echo "$RECT" | cut -d, -f1 | cut -d[ -f2) 45 | Y0=$(echo "$RECT" | cut -d, -f2 | cut -d- -f1) 46 | X1=$(echo "$RECT" | cut -d- -f2 | cut -d, -f1) 47 | Y1=$(echo "$RECT" | cut -d, -f3 | cut -d] -f1) 48 | CORNER="[$X0,$Y0]" 49 | echo "file $FILE" 50 | echo "page $PAGE" 51 | echo "box $RECT" 52 | echo "corner $CORNER" 53 | 54 | # output file 55 | 56 | BASE=${FILE%%.pdf} 57 | BASE=${BASE%%-edit-*} 58 | EDIT=${BASE}-edit-%d.pdf 59 | I=1 60 | while [ -f $(printf $EDIT $I) ]; 61 | do 62 | I=$((I+1)) 63 | done 64 | OUT=$(printf $EDIT $I) 65 | TEMP=$(printf "${BASE}-temp-%d.pdf" $I) 66 | echo $OUT 67 | 68 | # commands 69 | 70 | COMMANDS='emphasize clip_page clip fit erase cover exit' 71 | [ $X0 = $X1 ] && [ $Y0 = $Y1 ] && COMMANDS='write red exit' 72 | N=$(echo $COMMANDS | wc -w) 73 | 74 | # operate on file 75 | 76 | PS3="choose command [1-$N]: " 77 | select COMMAND in $COMMANDS; 78 | do 79 | case $COMMAND in 80 | emphasize) 81 | echo emphasize "$RECT" of page "$PAGE" of file "$FILE" 82 | $SIMULATE \ 83 | pdfdrawover "$FILE" output "$OUT" page "$PAGE" \ 84 | color [1.0,0.0,0.0,0.5] filledbox "$RECT";; 85 | clip_page) 86 | echo clip page "$RECT" of page "$PAGE" of file "$FILE" 87 | $SIMULATE \ 88 | pdfdrawover "$FILE" output "$OUT" page "$PAGE" \ 89 | emptypage clip "$RECT" paste;; 90 | clip) 91 | echo clip "$RECT" of page "$PAGE" of file "$FILE" 92 | $SIMULATE \ 93 | pdfdrawover "$FILE" output "$OUT" \ 94 | emptypage clip "$RECT" paste;; 95 | fit) 96 | echo fit "$RECT" of file "$FILE" 97 | $SIMULATE \ 98 | pdfdrawover "$FILE" output "$TEMP" \ 99 | emptypage clip "$RECT" paste 100 | $SIMULATE \ 101 | pdffit -o "$OUT" -x "$RECT" "$TEMP" 102 | rm -f "$TEMP";; 103 | erase) 104 | echo erase "$RECT" of page "$PAGE" of file "$FILE" 105 | $SIMULATE \ 106 | pdfdrawover "$FILE" output "$OUT" page "$PAGE" \ 107 | color white filledbox "$RECT";; 108 | cover) 109 | echo cover "$RECT" of page "$PAGE" of file "$FILE" 110 | $SIMULATE \ 111 | pdfdrawover "$FILE" output "$OUT" page "$PAGE" \ 112 | filledbox "$RECT";; 113 | red) COLOR="color [1.0,0.0,0.0]";& 114 | write) 115 | echo write at "$CORNER" in page "$PAGE" of file "$FILE" 116 | echo -n "string to write: " 117 | read S 118 | $SIMULATE \ 119 | pdfdrawover "$FILE" output "$OUT" page "$PAGE" \ 120 | $COLOR moveto "$CORNER" print "$S";; 121 | exit) 122 | echo -n "unchanged" 1>&3 123 | echo 0 > pdfinteractive-ret.txt 124 | exit 0;; 125 | esac 126 | break 127 | done 128 | 129 | echo "$PAGE $BOXNUMBER $SCROLLX $SCROLLY" 1>&3 130 | echo -n "$OUT" 1>&3 131 | echo 3 > pdfinteractive-ret.txt 132 | exit 3 133 | 134 | -------------------------------------------------------------------------------- /pdfrecur.1: -------------------------------------------------------------------------------- 1 | .TH pdfrecur 1 "September 11, 2019" 2 | 3 | . 4 | . 5 | . 6 | .SH NAME 7 | pdfrecur - locate or remove the recurring blocks of text in a PDF file 8 | 9 | . 10 | . 11 | . 12 | .SH SYNOPOSIS 13 | .TP 9 14 | .B pdfrecur 15 | [\fI-s height\fP] [\fI-t distance\fP] [\fI-d\fP] 16 | [\fI-m\fP] [\fI-c\fP] [\fI-n\fP] [\fI-h\fP] 17 | .I file.pdf 18 | 19 | . 20 | . 21 | . 22 | .SH DESCRIPTION 23 | 24 | Locate or remove page numbers, headers and footers from a PDF file. 25 | 26 | These are not part of the main content of the document. Removing them improves 27 | the quality of a following text extraction, for example by \fIpdftoroff(1)\fP. 28 | 29 | . 30 | . 31 | . 32 | .SH OPTIONS 33 | .TP 34 | .BI -s " height 35 | maximal height of recurring text 36 | .TP 37 | .BI -t " distance 38 | text-to-text distance 39 | .TP 40 | .B -c 41 | do not remove recurring text; useful with \fI-d\fP 42 | .TP 43 | .B -m 44 | remove everything outside the main text in the page 45 | .TP 46 | .BI -d 47 | draw a box around recurring text; with \fI-m\fP, shade the main text in the 48 | page 49 | .TP 50 | .B -n 51 | only print location of recurring elemens 52 | .TP 53 | .B -h 54 | online help 55 | 56 | . 57 | . 58 | . 59 | .SH DETECTION 60 | 61 | Page numbers, headers and footers are located by three features they usually 62 | exhibit: 63 | 64 | .IP " * " 4 65 | they are short, one or two lines tall at most 66 | .IP " * " 67 | they have the same size in more than one page 68 | .IP " * " 69 | they have the same vertical placement and similar horizontal placement in many 70 | pages 71 | 72 | .P 73 | 74 | For example, page numbers are blocks of text made of a single line; this is the 75 | first feature that helps identifying them. If they are located in the 76 | lower-right corner in odd pages and lower-left in even pages, numbers 3, 5, 7 77 | and 9 have the very same position and size; numbers 11, 13, 15 also have. This 78 | is the second feature: same position in more than one page. Finally, 9 and 11, 79 | as well as any other pair such as 5 and 25, have same vertical placement and 80 | they do not differ too much horizontally. Still better, the space taken by one 81 | contains the space taken by the other (the space of 11 contains that of 9). 82 | 83 | Detecting all three features relies on identifying the blocks of text in the 84 | page, which requires the text-to-text distance (the minimal white space that 85 | makes two pieces of text not to be considered in the same block of text). This 86 | amount can be specified by option \fI-t\fP. 87 | 88 | Option \fI-s\fP tells the maximal height of blocks of text considered for being 89 | candidate recurring elements. If the font in the headers is at most 12 points 90 | tall, passing 12 helps the algorithm by excluding all blocks of text made of 91 | two lines or more. 92 | 93 | Alternatively, instead of removing the recurring elements, the largest 94 | rectangle remaining in the page when they are removed is used to clip the text, 95 | if option \fI-m\fP is used. 96 | 97 | The remaining options control whether the recurring elements are only to be 98 | detected without producing an output file (\fI-n\fP), whether to draw a box 99 | around detected recurring elements (for testing purposes, option \fI-d\fP), and 100 | whether the output file contains the recurring elements (again for testing, 101 | option \fI-c\fP). 102 | 103 | . 104 | . 105 | . 106 | .SH SEE ALSO 107 | 108 | \fIpdfrects(1)\fP, \fIpdftoroff(1)\fP, \fIpdffit(1)\fP, \fIhovacui(1)\fP 109 | 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pdftoroff 2 | 3 | pdf viewer, scaler, converter (to text, html, etc.) by blocks of text 4 | 5 | - hovacui: a pdf viewer for the Linux framebuffer and X11 6 | - pdffit: scale a pdf file to fit a given page size with given margins 7 | - pdftoroff: convert from pdf to roff, html, text, TeX 8 | 9 | ## hovacui 10 | 11 | The **hovacui** pdf viewer for the Linux framebuffer and X11 automatically 12 | zooms to the blocks of text. It is aimed at viewing files on small screens, and 13 | is especially handy for multiple-columns documents. Details in the 14 | [hovacui web page](http://sgerwk.altervista.org/hovacui/hovacui.html). 15 | 16 | - a screenshots of hovacui with the goto page dialog: 17 | 18 | ![hovacui: screenshot of the goto to page field](/screenshots/fb-12.png?raw=true "hovacui: the gotopage dialog") 19 | 20 | - the main menu: 21 | 22 | ![hovacui: screenshot of the main manu](/screenshots/fb-23.png?raw=true "hovacui: the main menu") 23 | 24 | ## pdffit 25 | 26 | The **pdffit** scaler shrinks or enlarges the pages of a pdf file so that 27 | their text fits into a given paper size (e.g., A4 or letter) with a given 28 | margin. It can also be used to reduce or increase the margin in the 29 | document (the white area around the text). 30 | 31 | The related program **pdfrects** finds the bounding box or the text 32 | area of the pages of a pdf file. 33 | 34 | ## pdftoroff 35 | 36 | The **pdftoroff** program extracts text from pdf files, trying to undo page, 37 | column and paragraph formatting while retaining italic and bold faces. It 38 | outputs text in various text formats: groff, html, plain TeX, text, or 39 | user-defined format. It is used by the included **pdftoebook** script to 40 | reformat a pdf file to a smaller page, so that it becomes suited to be read on 41 | a small tablet or e-ink ebook reader. 42 | 43 | ## pdfrecur 44 | 45 | The **pdfrecur** filter removes page numbers, headers and footers. 46 | 47 | ## installation 48 | 49 | Generic instructions: 50 | 51 | ``` 52 | make 53 | make install 54 | ``` 55 | 56 | ### archlinux 57 | 58 | This package is in AUR. Still, the PKGBUILD file is also accessible as an asset 59 | from github. 60 | 61 | - go to the [release page](../../releases) and download the latest ``PKGBUILD`` file to an empty directory 62 | - in that directory, run `makepkg` 63 | - install: `sudo pacman -U pdftoroff...tar.xz` 64 | 65 | ### opensuse 66 | 67 | If the latest release in the [release page](../../releases) is for 68 | example `1.0.0`: 69 | 70 | - download sources: `curl -L -o $HOME/rpmbuild/SOURCES/pdftoroff-1.0.0.tar.gz https://github.com/sgerwk/pdftoroff/archive/v1.0.0.tar.gz` (replace `1.0.0` with the latest version number) 71 | - download `pdftoroff.spec` from the [release page](../../releases) 72 | - make the package: `rpmbuild -bb pdftoroff.spec` 73 | - install: `sudo rpm -i $HOME/rpmbuild/RPMS/pdftoroff-version-etc.rpm` 74 | 75 | ### debian 76 | 77 | - download the sources, unpack and compile them (do not install yet) 78 | - make a directory `somewhere/pkg/DEBIAN` 79 | - download there the `control` file from the [release page](../../releases) 80 | - check and possibly replace the field `Architecture:` in `control` 81 | - in the pdftoroff source directory run `make DESTDIR=somewhere/pkg install` 82 | - create the package: `dpkg-deb -b somewhere/pkg .` 83 | - install: `sudo dpkg -i pdftoroff...deb` 84 | 85 | ## what's new 86 | 87 | - optionally show the current time (Sept. 2022) 88 | - select only the visible part of the current textbox (Sept. 2022) 89 | - night mode: show pdf in reverse colors (Apr. 2023) 90 | - key 'G': move back to the position prior to jumping or searching (Apr. 2023) 91 | - cache position and search string (Apr. 2023) 92 | - key 'x' to position a cursor on the page (May 2023) 93 | - some editing via an external script (May 2023) 94 | - pdffit can try to ignore headers and footers (June 2023) 95 | 96 | -------------------------------------------------------------------------------- /pdftext.h: -------------------------------------------------------------------------------- 1 | /* 2 | * pdftext.h 3 | * 4 | * convert pdf to text or rich text (roff, html, tex) 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "pdfrects.h" 12 | 13 | /* 14 | * parameters for the input (D=delta, %=percentage of page) 15 | */ 16 | struct measure { 17 | int newline; /* more Dy than this is a newline */ 18 | int newpar; /* more Dy than this is a new paragraph */ 19 | int rightreturn; /* line end before this x% is a new paragraph */ 20 | int newcolumnx; /* more than this Dx% is new column (and) */ 21 | int newcolumny; /* more than this -Dy% is new column (and) */ 22 | int indent; /* more than this at start of line is indent */ 23 | int headfooter; /* ignore x,y of chars at begin/end of page */ 24 | int blockdistance; /* distance between blocks of text */ 25 | char hyphen; /* this character is an hyphen */ 26 | }; 27 | 28 | /* 29 | * output strings 30 | */ 31 | struct format { 32 | char *parstart; /* paragraph start */ 33 | char *parend; /* paragraph end */ 34 | 35 | char *fontname; /* format for printing font names, or NULL */ 36 | 37 | char *plain; /* set font face */ 38 | char *italic; 39 | char *bold; 40 | char *bolditalic; 41 | 42 | char *italicbegin; /* begin/end font face */ 43 | char *italicend; 44 | char *boldbegin; 45 | char *boldend; 46 | 47 | gboolean reset; /* reset and restart face at par breaks */ 48 | 49 | char *backslash; 50 | char *firstdot; /* substitute this for dot at start of line */ 51 | char *less; 52 | char *greater; 53 | char *and; 54 | }; 55 | 56 | /* 57 | * known output formats 58 | */ 59 | extern struct format format_roff; 60 | extern struct format format_html; 61 | extern struct format format_tex; 62 | extern struct format format_textfont; 63 | extern struct format format_text; 64 | 65 | /* print reason for a paragraph break */ 66 | extern gboolean debugpar; 67 | 68 | /* previous character, keep START at the end */ 69 | #define NONE '\0' 70 | #define START '\1' 71 | 72 | /* data for processing the characters */ 73 | struct scandata; 74 | 75 | /* start processing a document */ 76 | void startdocument(FILE *fd, 77 | int method, struct measure *measure, struct format *format, 78 | struct scandata *scandata); 79 | 80 | /* start processing a page (no end needed) */ 81 | void startpage(struct scandata *scanpage); 82 | 83 | /* show the characters in a box in a page */ 84 | void showregion(FILE *fd, PopplerRectangle *zone, RectangleList *textarea, 85 | char *text, GList *attrlist, 86 | PopplerRectangle *rects, guint nrects, 87 | struct measure *measure, struct format *format, 88 | struct scandata *scandata, gboolean detectcolumn); 89 | 90 | /* show the characters in a page */ 91 | void showpage(FILE *fd, PopplerPage *page, 92 | PopplerRectangle *zone, 93 | int method, int order, 94 | struct measure *measure, struct format *format, 95 | struct scandata *scandata); 96 | 97 | /* end processing a document */ 98 | void enddocument(FILE *fd, 99 | int method, struct measure *measure, struct format *format, 100 | struct scandata *scandata); 101 | 102 | /* show some pages of a pdf document */ 103 | void showdocumentpart(FILE *fd, PopplerDocument *doc, int first, int last, 104 | PopplerRectangle *zone, 105 | int method, int order, 106 | struct measure *measure, struct format *format); 107 | 108 | /* show a pdf document */ 109 | void showdocument(FILE *fd, PopplerDocument *doc, 110 | PopplerRectangle *zone, 111 | int method, int order, 112 | struct measure *measure, struct format *format); 113 | 114 | /* show a pdf file */ 115 | void showfile(FILE *fd, char *filename, int first, int last, 116 | PopplerRectangle *zone, 117 | int method, int order, 118 | struct measure *measure, struct format *format); 119 | 120 | /* parse a string into a struct format */ 121 | struct format *parseformat(char *s); 122 | 123 | -------------------------------------------------------------------------------- /cairoui.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoui.h 3 | */ 4 | 5 | #ifdef _CAIROUI_H 6 | #else 7 | #define _CAIROUI_H 8 | 9 | /* 10 | * return values of windows, other than a window 11 | */ 12 | #define CAIROUI_DONE -1 13 | #define CAIROUI_LEAVE -2 14 | #define CAIROUI_FAIL -3 15 | #define CAIROUI_INVALID -4 16 | #define CAIROUI_UNCHANGED -5 17 | #define CAIROUI_CHANGED -6 18 | #define CAIROUI_REFRESH -7 19 | #define CAIROUI_EXIT -8 20 | #define CAIROUI_OUT(res) (_cairoui_out(res)) 21 | int _cairoui_out(int res); 22 | 23 | /* 24 | * the cairoui structure, passed to all functions 25 | */ 26 | struct cairoui { 27 | /* cairo device */ 28 | struct cairodevice *cairodevice; 29 | 30 | /* cairo surface */ 31 | cairo_t *cr; 32 | 33 | /* destination rectangle */ 34 | int usearea; 35 | cairo_rectangle_t area; 36 | cairo_rectangle_t dest; 37 | int margin; 38 | 39 | /* size of font */ 40 | int fontsize; 41 | cairo_font_extents_t extents; 42 | 43 | /* whether the output is to be flushed */ 44 | int flush; 45 | 46 | /* whether the document has to be redrawn */ 47 | int redraw; 48 | 49 | /* whether to update/reload the document */ 50 | int reload; 51 | 52 | /* if not NO_TIMEOUT, stop input on timeout and return KEY_TIMEOUT */ 53 | int timeout; 54 | 55 | /* pasted text */ 56 | char *paste; 57 | 58 | /* external command */ 59 | struct command command; 60 | 61 | /* log file */ 62 | int log; 63 | char *outname; 64 | FILE *outfile; 65 | 66 | /* callback data */ 67 | void *cb; 68 | 69 | /* window list */ 70 | struct windowlist { 71 | int window; 72 | char *name; 73 | int (*function)(int, struct cairoui *); 74 | } *windowlist; 75 | 76 | /* label list */ 77 | void (**labellist)(struct cairoui *); 78 | 79 | /* draw the document */ 80 | void (*draw)(struct cairoui *cairoui); 81 | 82 | /* resize function */ 83 | void (*resize)(struct cairoui *cairoui); 84 | 85 | /* update/reload function */ 86 | void (*update)(struct cairoui *cairoui); 87 | 88 | /* external command */ 89 | int (*external)(struct cairoui *cairoui, int window); 90 | }; 91 | 92 | /* 93 | * a changeable rectangle 94 | */ 95 | int cairoui_rectangle(int c, struct cairoui *cairoui, 96 | int *corner, cairo_rectangle_t *rect, int cross); 97 | 98 | /* 99 | * a list of strings, possibly with a selected one 100 | */ 101 | int cairoui_list(int c, struct cairoui *cairoui, char *viewtext[], 102 | int *line, int *selected); 103 | 104 | /* 105 | * a textfield 106 | */ 107 | int cairoui_field(int c, struct cairoui *cairoui, 108 | char *prompt, char *current, int *pos, char *error); 109 | 110 | /* 111 | * a textfield for an integer number 112 | */ 113 | int cairoui_number(int c, struct cairoui *cairoui, 114 | char *prompt, char *current, int *pos, char *error, 115 | int *destination, double min, double max); 116 | 117 | /* 118 | * a label at the given number of labels from the bottom 119 | */ 120 | void cairoui_label(struct cairoui *cairoui, char *string, int bottom); 121 | 122 | /* 123 | * formatted print to a label; timeout=NO_TIMEOUT means infinite 124 | */ 125 | int cairoui_printlabel(struct cairoui *cairoui, char *help, int timeout, 126 | char *format, ...); 127 | 128 | /* 129 | * initialize the labels 130 | */ 131 | void cairoui_initlabels(struct cairoui *cairoui); 132 | 133 | /* 134 | * cairoui structure default 135 | */ 136 | void cairoui_default(struct cairoui *cairoui); 137 | 138 | /* 139 | * cairoui reset 140 | */ 141 | void cairoui_reset(struct cairoui *cairoui); 142 | 143 | /* 144 | * ensure the output file is open 145 | */ 146 | int ensureoutputfile(struct cairoui *cairoui); 147 | 148 | /* 149 | * logging function 150 | */ 151 | #define LEVEL_MAIN 0x0001 152 | #define LEVEL_DRAW 0x0002 153 | void cairoui_logstatus(int level, char *prefix, int window, 154 | struct cairoui *cairoui, int c); 155 | 156 | /* 157 | * main loop 158 | */ 159 | void cairoui_main(struct cairoui *cairoui, int firstwindow); 160 | 161 | #endif 162 | 163 | -------------------------------------------------------------------------------- /cairofb.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cairofb.c 3 | * 4 | * a cairo context for drawing on a linux framebuffer 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "cairofb.h" 16 | 17 | /* 18 | * create a cairo context from a framebuffer device 19 | */ 20 | struct cairofb *cairofb_init(char *devname, int doublebuffering) { 21 | struct cairofb *cairofb; 22 | struct fb_fix_screeninfo finfo; 23 | struct fb_var_screeninfo vinfo; 24 | int res; 25 | int stride; 26 | cairo_format_t format; 27 | cairo_status_t status; 28 | 29 | cairofb = malloc(sizeof(struct cairofb)); 30 | 31 | /* open device, get info */ 32 | 33 | cairofb->dev = open(devname, O_RDWR); 34 | if (cairofb->dev == -1) { 35 | perror(devname); 36 | free(cairofb); 37 | return NULL; 38 | } 39 | 40 | res = ioctl(cairofb->dev, FBIOGET_FSCREENINFO, &finfo); 41 | if (res == -1) { 42 | perror("FBIOGET_FSCREENINFO"); 43 | free(cairofb); 44 | return NULL; 45 | } 46 | res = ioctl(cairofb->dev, FBIOGET_VSCREENINFO, &vinfo); 47 | if (res == -1) { 48 | perror("FBIOGET_VSCREENINFO"); 49 | free(cairofb); 50 | return NULL; 51 | } 52 | 53 | cairofb->width = vinfo.xres; 54 | cairofb->height = vinfo.yres; 55 | stride = finfo.line_length; 56 | cairofb->length = finfo.smem_len; 57 | format = CAIRO_FORMAT_INVALID; 58 | if (finfo.type == FB_TYPE_PACKED_PIXELS && 59 | finfo.visual == FB_VISUAL_TRUECOLOR) { 60 | if (vinfo.bits_per_pixel == 16) 61 | format = CAIRO_FORMAT_RGB16_565; 62 | if (vinfo.bits_per_pixel == 32) 63 | format = CAIRO_FORMAT_RGB24; 64 | /* TBD: also check grayscale, offset/length */ 65 | /* TBF: if bpp, grayscale or offset/length are not supported by 66 | cairo, try changing vinfo by FBIOPUT_VSCREENINFO */ 67 | } 68 | if (format == CAIRO_FORMAT_INVALID) { 69 | printf("ERROR: unsupported type/visual\n"); 70 | free(cairofb); 71 | return NULL; 72 | } 73 | 74 | /* from framebuffer to cairo */ 75 | 76 | cairofb->img = mmap(0, cairofb->length, 77 | PROT_READ | PROT_WRITE, MAP_SHARED, cairofb->dev, 0); 78 | if (cairofb->img == MAP_FAILED) { 79 | perror("mmap"); 80 | free(cairofb); 81 | return NULL; 82 | } 83 | cairofb->dbuf = doublebuffering ? 84 | malloc(cairofb->length) : cairofb->img; 85 | 86 | cairofb->surface = cairo_image_surface_create_for_data(cairofb->dbuf, 87 | format, cairofb->width, cairofb->height, stride); 88 | status = cairo_surface_status(cairofb->surface); 89 | if (status != CAIRO_STATUS_SUCCESS) 90 | printf("WARNING: cairo status=%d\n", status); 91 | cairofb->cr = cairo_create(cairofb->surface); 92 | 93 | /* set cairo clip */ 94 | 95 | cairo_rectangle(cairofb->cr, 0, 0, cairofb->width, cairofb->height); 96 | cairo_clip(cairofb->cr); 97 | 98 | return cairofb; 99 | } 100 | 101 | /* 102 | * clear the cairo context 103 | */ 104 | void cairofb_clear(struct cairofb *cairofb, 105 | double red, double green, double blue) { 106 | cairo_identity_matrix(cairofb->cr); 107 | cairo_set_source_rgb(cairofb->cr, red, green, blue); 108 | cairo_rectangle(cairofb->cr, 0, 0, cairofb->width, cairofb->height); 109 | cairo_fill(cairofb->cr); 110 | } 111 | 112 | /* 113 | * return whether double buffering is used 114 | */ 115 | int cairofb_doublebuffering(struct cairofb *cairofb) { 116 | return cairofb->img != cairofb->dbuf; 117 | } 118 | 119 | /* 120 | * flush output, if double buffering 121 | */ 122 | void cairofb_flush(struct cairofb *cairofb) { 123 | if (cairofb_doublebuffering(cairofb)) 124 | memcpy(cairofb->img, cairofb->dbuf, cairofb->length); 125 | } 126 | 127 | /* 128 | * deallocate the cairo context and related data 129 | */ 130 | void cairofb_finish(struct cairofb *cairofb) { 131 | cairo_destroy(cairofb->cr); 132 | cairo_surface_destroy(cairofb->surface); 133 | if (cairofb->img != cairofb->dbuf) 134 | free(cairofb->dbuf); 135 | munmap(cairofb->img, cairofb->length); 136 | close(cairofb->dev); 137 | free(cairofb); 138 | } 139 | 140 | -------------------------------------------------------------------------------- /cairoio.h: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoio.h 3 | */ 4 | 5 | /* 6 | * the cairodevice 7 | * 8 | * the cairodevice structure allows the cairoui interface to be run whenever a 9 | * cairo context can be obtained and input is available 10 | * 11 | * void init(struct cairodevice *cairodevice, 12 | * char *device, int doublebuffering, 13 | * int argn, char *argv[], char *opts); 14 | * create the cairo context 15 | * 16 | * void finish(struct cairodevice *cairodevice); 17 | * undo what done by init 18 | * 19 | * cairo_t *context(struct cairodevice *cairodevice); 20 | * double width(struct cairodevice *cairodevice); 21 | * double height(struct cairodevice *cairodevice); 22 | * double screenwidth(struct cairodevice *cairodevice); 23 | * double screenheight(struct cairodevice *cairodevice); 24 | * return the cairo context and its size 25 | * 26 | * int doublebuffering(struct cairodevice *cairodevice); 27 | * return whether double buffering is used 28 | * 29 | * void clear(struct cairodevice *cairodevice); 30 | * void blank(struct cairodevice *cairodevice); 31 | * void flush(struct cairodevice *cairodevice); 32 | * clear and flush 33 | * 34 | * int isactive(struct cairodevice *cairodevice); 35 | * whether the output is active 36 | * do not draw on the framebuffer when the vt is switched out 37 | * 38 | * int input(struct cairodevice *cairodevice, 39 | * int timeout, struct command *command); 40 | * return a key 41 | * on external command: store it in command->command, return KEY_EXTERNAL 42 | * block for at most timeout milliseconds, NO_TIMEOUT=infinite 43 | */ 44 | 45 | #ifdef _CAIROOUTPUT_H 46 | #else 47 | #define _CAIROOUTPUT_H 48 | 49 | #include 50 | 51 | /* 52 | * include curses to get the key macros 53 | */ 54 | #include 55 | 56 | /* 57 | * imaginary keys 58 | */ 59 | #define KEY_NONE ((KEY_MAX) + 1) 60 | #define KEY_INIT ((KEY_MAX) + 2) 61 | #define KEY_FINISH ((KEY_MAX) + 3) 62 | #ifdef KEY_REFRESH 63 | #else 64 | #define KEY_REFRESH ((KEY_MAX) + 4) 65 | #endif 66 | #define KEY_REDRAW ((KEY_MAX) + 5) 67 | #ifdef KEY_RESIZE 68 | #else 69 | #define KEY_RESIZE ((KEY_MAX) + 6) 70 | #endif 71 | #define KEY_TIMEOUT ((KEY_MAX) + 7) 72 | #ifdef KEY_SUSPEND 73 | #else 74 | #define KEY_SUSPEND ((KEY_MAX) + 8) 75 | #endif 76 | #define KEY_SIGNAL ((KEY_MAX) + 9) 77 | #define KEY_EXTERNAL ((KEY_MAX) + 10) 78 | #define KEY_PASTE ((KEY_MAX) + 11) 79 | 80 | #define ISIMAGINARYKEY(c) ( \ 81 | ((c) == KEY_NONE) || \ 82 | ((c) == KEY_INIT) || \ 83 | ((c) == KEY_REFRESH) || \ 84 | ((c) == KEY_REDRAW) || \ 85 | ((c) == KEY_RESIZE) || \ 86 | ((c) == KEY_TIMEOUT) || \ 87 | ((c) == KEY_SUSPEND) || \ 88 | ((c) == KEY_SIGNAL) || \ 89 | ((c) == KEY_EXTERNAL) || \ 90 | ((c) == KEY_PASTE) \ 91 | ) 92 | #define ISREALKEY(c) (! ISIMAGINARYKEY(c)) 93 | 94 | /* 95 | * no timeout 96 | */ 97 | #define NO_TIMEOUT (-1) 98 | 99 | /* 100 | * external command 101 | */ 102 | struct command { 103 | int fd; 104 | FILE *stream; 105 | char *command; 106 | int max; 107 | }; 108 | 109 | /* 110 | * a cairo device 111 | */ 112 | struct cairodevice { 113 | char *options; 114 | char *usage; 115 | struct cairoio *cairoio; 116 | int (*init)(struct cairodevice *cairodevice, 117 | char *device, int doublebuffering, 118 | int argn, char *argv[], char *opts); 119 | void (*finish)(struct cairodevice *cairodevice); 120 | cairo_t *(*context)(struct cairodevice *cairodevice); 121 | double (*width)(struct cairodevice *cairodevice); 122 | double (*height)(struct cairodevice *cairodevice); 123 | double (*screenwidth)(struct cairodevice *cairodevice); 124 | double (*screenheight)(struct cairodevice *cairodevice); 125 | int (*doublebuffering)(struct cairodevice *cairodevice); 126 | void (*clear)(struct cairodevice *cairodevice); 127 | void (*blank)(struct cairodevice *cairodevice); 128 | void (*flush)(struct cairodevice *cairodevice); 129 | int (*isactive)(struct cairodevice *cairodevice); 130 | int (*input)(struct cairodevice *cairodevice, int timeout, 131 | struct command *command); 132 | }; 133 | 134 | #endif 135 | 136 | -------------------------------------------------------------------------------- /textarea.txt: -------------------------------------------------------------------------------- 1 | The text area 2 | ============= 3 | 4 | The text area is the part of the page taken by characters. It is represented by 5 | a set of rectangles. Characters closer to a certain amount (the text distance) 6 | are in the same rectangle. 7 | 8 | The algorithm for determining the text area is based on a double rectangle 9 | subtraction and an iterated merge of rectangles. 10 | 11 | Subtraction A-B removes a set of rectangles B from another set of rectangles A. 12 | More specifically, it subtracts every b in B from every a in A. This 13 | single-rectangle subtraction a-b is calculated by generating up to four new 14 | rectangles. The limit case is when b is strictly contained in a. 15 | 16 | +-----------------+ +---+-------+-----+ 17 | | | | | a1 | | 18 | | +-------+ | +---+-------+-----+ 19 | | | | | | | | | 20 | | | b | | ==> |a2 | | a3 | 21 | | | | | | | | | 22 | | +-------+ | +---+-------+-----+ 23 | | | | | a4 | | 24 | | a | | | | | 25 | +-----------------+ +---+-------+-----+ 26 | 27 | In this figure, a1 and a4 are as large as a, while a2 and a3 are as tall as a. 28 | When b is not strictly contained into a, some of these four rectangles have 29 | width or height null or negative. These are removed. Also removed are 30 | rectangles thinner or shorter than the text distance. 31 | 32 | A single-rectangle subtraction a-b results in up to four rectangles 33 | a1,a2,a3,a4. A set collects these new rectangles: A-b = u{a-b | a in A}. 34 | Subtraction is repeated on this resulting set for another b' in B, resulting in 35 | (A-b)-b'. This is done again for all other elements of B, producing A-B. The 36 | result is the subarea of A that is not taken by B. 37 | 38 | Starting from the rectangles enclosing the characters in the page, a first step 39 | collects adjacent characters; this reduces the number of rectangles, but is not 40 | necessary for correctness. Then, the white area in the page is calculated by 41 | subtracting these rectangles from a rectangle as large as the page. The result 42 | is a representation of the area of the page where there is no characters. The 43 | text area is calculated by subtracting it from a rectangle as large as the 44 | page. 45 | 46 | - collect adjacent characters in words 47 | - white_area = page - words 48 | - black_area = page - white_area 49 | - join touching rectangles in black_area 50 | 51 | The last step is necessary because the rectangles in the black area usually 52 | overlap. All overlapping rectangles are merged: they are replaced by the 53 | smallest rectangle including them. 54 | 55 | +---------+ +-----------------+ 56 | | | | . | 57 | | B | | . | 58 | +-------+---+ | |.............. | 59 | | | | | ==> | . . | 60 | | A +---+-----+ | A+B ........| 61 | | | | . | 62 | +-----------+ +-----------------+ 63 | 64 | This operation cannot be done only once, because the merged rectangle may 65 | include another rectangle that did not previously overlap any of merged ones. 66 | Once rectangles are merged, they have to be checked again for overlapping. This 67 | is done over and over again until no new merge is produced. For the text area 68 | this should not be a problem because the rectangles are usually few at this 69 | point. For a large number of rectangles with few overlappings, a better 70 | solution exists [white-support.txt]. 71 | 72 | +-----+ +-----+ 73 | | C | | C | 74 | | | +---------+ | +---+-------------+ 75 | +-----+ | | +-+---+ . | 76 | | B | | . | 77 | +-------+---+ | |............ | 78 | | | | | ==> | . . | 79 | | A +---+-----+ | A+B ..........| 80 | | | | . | 81 | +-----------+ +-----------------+ 82 | 83 | -------------------------------------------------------------------------------- /pdfrecur.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pdfrecur.c 3 | * 4 | * locate or remove the recurring blocks of text in a PDF document 5 | * 6 | * aimed at removing page numbers, headers and footers 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "pdfrects.h" 18 | 19 | /* 20 | * main 21 | */ 22 | int main(int argc, char *argv[]) { 23 | int opt; 24 | gboolean usage = FALSE; 25 | char *infile, *outfile; 26 | gdouble recurheight = -1, distance = -1; 27 | gboolean draw = FALSE, clip = TRUE, noout = FALSE, usemain = FALSE; 28 | 29 | PopplerDocument *doc; 30 | PopplerPage *page; 31 | int npages, n; 32 | gdouble width, height; 33 | RectangleList *textarea, *flist; 34 | PopplerRectangle *maintext; 35 | 36 | cairo_surface_t *surface; 37 | cairo_t *cr; 38 | 39 | /* arguments */ 40 | 41 | while ((opt = getopt(argc, argv, "s:t:mcdnh")) != -1) 42 | switch(opt) { 43 | case 's': 44 | height = atof(optarg); 45 | break; 46 | case 't': 47 | distance = atof(optarg); 48 | break; 49 | case 'm': 50 | usemain = TRUE; 51 | break; 52 | case 'c': 53 | clip = FALSE; 54 | break; 55 | case 'd': 56 | draw = TRUE; 57 | break; 58 | case 'n': 59 | noout = TRUE; 60 | break; 61 | case 'h': 62 | usage = TRUE; 63 | break; 64 | } 65 | 66 | if (! usage && argc - 1 < optind) { 67 | printf("input file name missing\n"); 68 | usage = TRUE; 69 | } 70 | if (usage) { 71 | printf("usage:\n"); 72 | printf("\tpdfrecur "); 73 | printf("[-s height] [-t distance] [-d] [-m] [-c] [-n] "); 74 | printf("[-h] file.pdf\n"); 75 | printf("\t\t-s height\tmaximal height of recurring text\n"); 76 | printf("\t\t-t distance\ttext-to-text distance\n"); 77 | printf("\t\t-d\t\tdraw a box around removed rectangles\n"); 78 | printf("\t\t-m\t\tuse the main text block in the page\n"); 79 | printf("\t\t-c\t\tdo not remove recurring rectangles\n"); 80 | printf("\t\t-n\t\tonly print recurring rectangles\n"); 81 | printf("\t\t-h\t\tthis help\n"); 82 | exit(EXIT_FAILURE); 83 | } 84 | infile = filenametouri(argv[optind]); 85 | if (! infile) 86 | exit(EXIT_FAILURE); 87 | outfile = pdfaddsuffix(argv[optind], "norecur"); 88 | debugfrequent = 0x02 | 0x04; 89 | 90 | /* open file */ 91 | 92 | doc = poppler_document_new_from_file(infile, NULL, NULL); 93 | if (doc == NULL) { 94 | printf("error opening pdf file\n"); 95 | exit(EXIT_FAILURE); 96 | } 97 | 98 | /* pages */ 99 | 100 | npages = poppler_document_get_n_pages(doc); 101 | if (npages < 1) { 102 | printf("no page in document\n"); 103 | exit(EXIT_FAILURE); 104 | } 105 | 106 | /* find the recurring text blocks */ 107 | 108 | flist = rectanglevector_frequent(doc, recurheight, distance); 109 | if (usemain) { 110 | maintext = rectanglevector_main(doc, flist, 111 | recurheight, distance); 112 | printf("maintext:\n"); 113 | rectangle_printyaml(stdout, " - ", " ", maintext); 114 | } 115 | if (noout) 116 | return 0; 117 | 118 | /* copy to destination */ 119 | 120 | surface = cairo_pdf_surface_create(outfile, 1, 1); 121 | 122 | printf("infile: %s\n", argv[optind]); 123 | printf("outfile: %s\n", outfile); 124 | printf("pages: \n"); 125 | 126 | for (n = 0; n < npages; n++) { 127 | printf(" - page: %d\n", n); 128 | page = poppler_document_get_page(doc, n); 129 | poppler_page_get_size(page, &width, &height); 130 | cairo_pdf_surface_set_size(surface, width, height); 131 | 132 | cr = cairo_create(surface); 133 | 134 | textarea = rectanglelist_textarea_distance(page, distance); 135 | cairo_save(cr); 136 | if (clip) { 137 | if (usemain) { 138 | rectangle_cairo(cr, maintext, 0); 139 | cairo_clip(cr); 140 | } 141 | else rectanglelist_clip_containing(cr, page, 142 | textarea, flist); 143 | } 144 | poppler_page_render_for_printing(page, cr); 145 | cairo_restore(cr); 146 | if (draw) { 147 | rectanglelist_draw(cr, flist, 148 | FALSE, FALSE, FALSE, FALSE); 149 | if (usemain) 150 | rectangle_draw(cr, maintext, 151 | FALSE, TRUE, FALSE); 152 | } 153 | 154 | cairo_destroy(cr); 155 | cairo_surface_show_page(surface); 156 | 157 | g_object_unref(page); 158 | } 159 | 160 | cairo_surface_destroy(surface); 161 | 162 | return EXIT_SUCCESS; 163 | } 164 | 165 | -------------------------------------------------------------------------------- /cairoio-fb.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "vt.h" 4 | #include "cairofb.h" 5 | #include "cairoio.h" 6 | 7 | /* 8 | * structure for init data 9 | */ 10 | struct initdata { 11 | }; 12 | 13 | /* 14 | * create a cairo context 15 | */ 16 | int cairoinit_fb(struct cairodevice *cairodevice, 17 | char *device, int doublebuffering, 18 | int argn, char *argv[], char *allopts) { 19 | struct cairofb *cairofb; 20 | WINDOW *w; 21 | 22 | (void) argn; 23 | (void) argv; 24 | (void) allopts; 25 | 26 | if (device == NULL) 27 | device = "/dev/fb0"; 28 | 29 | cairofb = cairofb_init(device, doublebuffering); 30 | if (cairofb == NULL) { 31 | printf("cannot open %s as a cairo surface\n", device); 32 | return -1; 33 | } 34 | 35 | if (getenv("ESCDELAY") == NULL) 36 | setenv("ESCDELAY", "200", 1); 37 | w = initscr(); 38 | cbreak(); 39 | noecho(); 40 | keypad(w, TRUE); 41 | curs_set(0); 42 | ungetch(KEY_INIT); 43 | getch(); 44 | timeout(0); 45 | 46 | vt_setup(NULL, NULL); 47 | 48 | cairodevice->cairoio = (struct cairoio *) cairofb; 49 | return 0; 50 | } 51 | 52 | /* 53 | * close a cairo context 54 | */ 55 | void cairofinish_fb(struct cairodevice *cairodevice) { 56 | if (cairodevice != NULL && cairodevice->cairoio != NULL) 57 | cairofb_finish((struct cairofb *) cairodevice->cairoio); 58 | clear(); 59 | refresh(); 60 | endwin(); 61 | } 62 | 63 | /* 64 | * get the cairo context from a cairo envelope 65 | */ 66 | cairo_t *cairocontext_fb(struct cairodevice *cairodevice) { 67 | return ((struct cairofb *) cairodevice->cairoio)->cr; 68 | } 69 | 70 | /* 71 | * get the width from a cairo envelope 72 | */ 73 | double cairowidth_fb(struct cairodevice *cairodevice) { 74 | return ((struct cairofb *) cairodevice->cairoio)->width; 75 | } 76 | 77 | /* 78 | * get the heigth from a cairo envelope 79 | */ 80 | double cairoheight_fb(struct cairodevice *cairodevice) { 81 | return ((struct cairofb *) cairodevice->cairoio)->height; 82 | } 83 | 84 | /* 85 | * return whether double buffering is used 86 | */ 87 | int cairodoublebuffering_fb(struct cairodevice *cairodevice) { 88 | return cairofb_doublebuffering((struct cairofb *) cairodevice->cairoio); 89 | } 90 | 91 | /* 92 | * clear 93 | */ 94 | void cairoclear_fb(struct cairodevice *cairodevice) { 95 | cairofb_clear((struct cairofb *) cairodevice->cairoio, 1.0, 1.0, 1.0); 96 | } 97 | 98 | /* 99 | * blank 100 | */ 101 | void cairoblank_fb(struct cairodevice *cairodevice) { 102 | cairofb_clear((struct cairofb *) cairodevice->cairoio, 0.0, 0.0, 0.0); 103 | } 104 | 105 | /* 106 | * flush 107 | */ 108 | void cairoflush_fb(struct cairodevice *cairodevice) { 109 | cairofb_flush((struct cairofb *) cairodevice->cairoio); 110 | } 111 | 112 | /* 113 | * whether the output is currently active 114 | */ 115 | int cairoisactive_fb(struct cairodevice *cairodevice) { 116 | (void) cairodevice; 117 | return ! vt_suspend; 118 | } 119 | 120 | /* 121 | * get a single input from a cairo envelope 122 | */ 123 | int cairoinput_fb(struct cairodevice *cairodevice, int timeout, 124 | struct command *command) { 125 | fd_set fds; 126 | int max, ret; 127 | struct timeval tv; 128 | int c, l, r; 129 | int different; 130 | 131 | (void) cairodevice; 132 | 133 | FD_ZERO(&fds); 134 | FD_SET(STDIN_FILENO, &fds); 135 | max = STDIN_FILENO; 136 | if (command->fd != -1) { 137 | FD_SET(command->fd, &fds); 138 | max = max > command->fd ? max : command->fd; 139 | } 140 | 141 | tv.tv_sec = timeout / 1000; 142 | tv.tv_usec = timeout % 1000; 143 | 144 | ret = select(max + 1, &fds, NULL, NULL, 145 | timeout != NO_TIMEOUT ? &tv : NULL); 146 | if (ret != -1 && command->fd != -1 && FD_ISSET(command->fd, &fds)) { 147 | fgets(command->command, command->max, command->stream); 148 | return KEY_EXTERNAL; 149 | } 150 | 151 | if (vt_suspend && timeout != 0) 152 | return KEY_SUSPEND; 153 | 154 | if (vt_redraw) { 155 | vt_redraw = FALSE; 156 | return KEY_REDRAW; 157 | } 158 | 159 | if (ret == -1) 160 | return KEY_SIGNAL; 161 | 162 | if (FD_ISSET(STDIN_FILENO, &fds)) { 163 | different = 0; 164 | for (l = getch(), r = 0; 165 | l != ERR && r < command->max - 1; 166 | l = getch()) { 167 | command->command[r++] = l; 168 | if (c != l && r > 1) 169 | different = 1; 170 | c = l; 171 | } 172 | command->command[r] = '\0'; 173 | return r < 4 || ! different ? c : KEY_PASTE; 174 | } 175 | 176 | return KEY_TIMEOUT; 177 | } 178 | 179 | /* 180 | * the cairo device for the framebuffer 181 | */ 182 | struct cairodevice cairodevicefb = { 183 | "", 184 | "", 185 | NULL, 186 | cairoinit_fb, cairofinish_fb, 187 | cairocontext_fb, 188 | cairowidth_fb, cairoheight_fb, 189 | cairowidth_fb, cairoheight_fb, 190 | cairodoublebuffering_fb, 191 | cairoclear_fb, cairoblank_fb, cairoflush_fb, 192 | cairoisactive_fb, cairoinput_fb 193 | }; 194 | 195 | -------------------------------------------------------------------------------- /pdftoroff.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pdftoroff.c 3 | * 4 | * convert a pdf into various text formats, trying to undo page, column and 5 | * paragraph formatting while retaining italic and bold face 6 | * 7 | * the output format can be: 8 | * -r roff 9 | * -w html 10 | * -p plain TeX 11 | * -f text with \[fontname] for font changes and \\ for backslashes 12 | * -t text 13 | * -s fmt arbitrary struct format as fmt 14 | * 15 | * the format is a comma-separate list of strings 16 | * for example, html can also be generated by: 17 | 18 | ./pdftoroff -s ' 19 |

,

20 | ,,,,,,,,,,true,\,.,<,>,&' file.pdf 21 | 22 | * 23 | * other options: 24 | * -m met method for converting: 0-3 25 | * -o ord method for sorting: 0-2 26 | * -d dis minimal distance between blocks of text in the page 27 | * -i n-m page range 28 | * 29 | * 30 | * todo: see man page, section BUGS 31 | */ 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "pdftext.h" 38 | 39 | /* 40 | * main 41 | */ 42 | int main(int argc, char *argv[]) { 43 | gboolean usage, opterr; 44 | int method = 1, order = 1; 45 | int first = 1, last = 0; 46 | struct measure measure = {8, 25, 80, 30, 40, 6, 20, 15, '-'}; 47 | struct format *format; 48 | PopplerRectangle *zone = NULL; 49 | 50 | /* arguments */ 51 | 52 | format = &format_roff; 53 | usage = FALSE; 54 | opterr = FALSE; 55 | while (argc > 1 && argv[1][0] == '-') { 56 | switch(argv[1][1]) { 57 | case 'r': 58 | format = &format_roff; 59 | break; 60 | case 'w': 61 | format = &format_html; 62 | break; 63 | case 'p': 64 | format = &format_tex; 65 | break; 66 | case 'f': 67 | format = &format_textfont; 68 | break; 69 | case 't': 70 | format = &format_text; 71 | break; 72 | case 's': 73 | if (argc - 1 < 2) { 74 | printf("-s requires a format\n"); 75 | usage = TRUE; 76 | opterr = TRUE; 77 | break; 78 | } 79 | format = parseformat(argv[2]); 80 | if (format == NULL) { 81 | printf("invalid format: %s\n", argv[2]); 82 | usage = TRUE; 83 | opterr = TRUE; 84 | break; 85 | } 86 | argc--; 87 | argv++; 88 | break; 89 | case 'm': 90 | method = atoi(argv[2]); 91 | if (argc - 1 < 2 || method < 0 || method > 4) { 92 | printf("-m requires a method (0-4)\n"); 93 | usage = TRUE; 94 | opterr = TRUE; 95 | break; 96 | } 97 | argc--; 98 | argv++; 99 | break; 100 | case 'd': 101 | if (argc - 1 < 2) { 102 | printf("-d requires a distance\n"); 103 | usage = TRUE; 104 | opterr = TRUE; 105 | break; 106 | } 107 | measure.blockdistance = atoi(argv[2]); 108 | argc--; 109 | argv++; 110 | break; 111 | case 'o': 112 | order = atoi(argv[2]); 113 | if (argc - 1 < 2 || order < 0 || order > 2) { 114 | printf("-o requires an algorithm (0-2)\n"); 115 | usage = TRUE; 116 | opterr = TRUE; 117 | break; 118 | } 119 | argc--; 120 | argv++; 121 | break; 122 | case 'i': 123 | if (argc - 1 < 2 || 124 | sscanf(argv[2], "%d:%d", &first, &last) != 2) { 125 | printf("-i requires a page range (n:m)\n"); 126 | usage = TRUE; 127 | opterr = TRUE; 128 | break; 129 | } 130 | argc--; 131 | argv++; 132 | break; 133 | case 'b': 134 | if (argc - 1 < 2) { 135 | printf("-b requires a box\n"); 136 | usage = TRUE; 137 | opterr = TRUE; 138 | break; 139 | } 140 | zone = rectangle_parse(argv[2]); 141 | if (zone == NULL) { 142 | printf("error parsing box\n"); 143 | usage = TRUE; 144 | opterr = TRUE; 145 | break; 146 | } 147 | argc--; 148 | argv++; 149 | break; 150 | case 'n': 151 | zone = poppler_rectangle_new(); 152 | zone->x1 = -100; 153 | zone->x2 = -100; 154 | zone->y2 = -1; 155 | break; 156 | case 'v': 157 | debugpar = TRUE; 158 | break; 159 | case 'h': 160 | usage = TRUE; 161 | opterr = FALSE; 162 | break; 163 | default: 164 | printf("option not recognized: %s\n", argv[1]); 165 | usage = TRUE; 166 | opterr = TRUE; 167 | break; 168 | } 169 | argc--; 170 | argv++; 171 | } 172 | if (argc - 1 < 1 || usage) { 173 | printf("pdftoroff converts pdf to various text formats\n"); 174 | printf("usage:\n\tpdftoroff [-r|-w|-p|-f|-t|-s fmt]"); 175 | printf(" [-m method [-d dist] [-o order]]\n"); 176 | printf("\t [-i range] [-b box] [-n] [-v] file.pdf\n"); 177 | printf("\t\t-r\t\tconvert to roff (default)\n"); 178 | printf("\t\t-w\t\tconvert to html\n"); 179 | printf("\t\t-p\t\tconvert to plain TeX\n"); 180 | printf("\t\t-f\t\tconvert to text with font changes\n"); 181 | printf("\t\t-t\t\tconvert to text\n"); 182 | printf("\t\t-s fmt\t\toutput format strings\n"); 183 | printf("\t\t-m method\tconversion method (0-3)\n"); 184 | printf("\t\t-d distance\tminimal distance between "); 185 | printf("blocks of text\n"); 186 | printf("\t\t-o order\tblock sorting algorithm (0-2)\n"); 187 | printf("\t\t-i range\tpages to convert (n:m)\n"); 188 | printf("\t\t-b box\t\tonly convert characters in box\n"); 189 | printf("\t\t-n\t\tdo not convert recurring elements\n"); 190 | printf("\t\t-v\t\treason for line breaks\n"); 191 | 192 | exit(opterr || ! usage ? EXIT_FAILURE : EXIT_SUCCESS); 193 | } 194 | 195 | /* show file */ 196 | 197 | showfile(stdout, argv[1], first - 1, last - 1, zone, 198 | method, order, &measure, format); 199 | 200 | return EXIT_SUCCESS; 201 | } 202 | 203 | -------------------------------------------------------------------------------- /pdffit.1: -------------------------------------------------------------------------------- 1 | .TH pdffit 1 "October 22, 2017" 2 | 3 | . 4 | . 5 | . 6 | .SH NAME 7 | pdffit - fit pages into given paper size 8 | 9 | . 10 | . 11 | . 12 | .SH SYNOPSIS 13 | \fBpdffit\fP 14 | [\fIoptions\fP] 15 | \fIfile.pdf\fP 16 | 17 | . 18 | . 19 | . 20 | .SH DESCRIPTION 21 | 22 | Resize the pages of a pdf file to fit a given paper size. The page scaling 23 | makes the text in the original page fill the output page but for the given 24 | margin (default 40.0). 25 | 26 | . 27 | . 28 | . 29 | .SH OPTIONS 30 | 31 | The options that are presumed to be most recurring in practice are \fI-p\fP 32 | (paper size), \fI-o\fP (output file), \fI-l\fP (landscape output), \fI-m\fP 33 | (margin), \fI-e\fP (fixed scale) and \fI-w\fP (whole page). 34 | 35 | .TP 36 | .B 37 | -p \fIpaper\fP 38 | paper size; 39 | it can be a name (case insensitive) 40 | like \fIa4\fP or \fIletter\fP, 41 | or a pair of measures in points \fIwidth,height\fP; 42 | the default is what in \fI/etc/papersize\fP; 43 | if this file does not exists or it is not readable, the default is \fIa4\fP 44 | .TP 45 | .B 46 | -o \fIoutfile\fP 47 | the output file name; 48 | default is \fIinput-paper.pdf\fP if the input is \fIinput.pdf\fP and the paper 49 | size is \fIpaper\fP 50 | .TP 51 | .B 52 | -l 53 | landscape output 54 | .TP 55 | .B 56 | -m \fImargin\fP 57 | margin in the output file; 58 | can be a single number or a quadruple \fIleft,top,right,bottom\fP; 59 | the default is \fI0.0\fP if \fI-w\fP is also given, otherwise \fI40.0\fP 60 | .TP 61 | .B 62 | -x \fIboundingbox\fP 63 | the rectangle in the page that is fit in the whole page, 64 | either \fIx1,y1,x2,y2\fP or \fI[x1,y1-x2,y2]\fP 65 | .TP 66 | .B 67 | -i 68 | scale each page separately; 69 | the default is to scale all pages the same 70 | .TP 71 | .B 72 | -c 73 | try to exclude headers and footers 74 | .TP 75 | .B 76 | -e 77 | skip empty pages 78 | .TP 79 | .B 80 | -w 81 | resize the whole page, not just its text area; 82 | this is the minimal change for modifying the paper size, 83 | like turning a letter-size pdf into a4 84 | .TP 85 | .B 86 | -r 87 | do not maintain aspect ratio 88 | .TP 89 | .B 90 | -k 91 | adapt to ebook viewing: output page 200x250, default margin 5.0 92 | .TP 93 | .B 94 | -g \fIx1,y1,x2,y2\fP 95 | the destination page; by default is the same as the paper size 96 | (see below for an explanation) 97 | .TP 98 | .B 99 | -f 100 | draw the border of the destination page 101 | .TP 102 | .B 103 | -s 104 | draw the border of the original page 105 | .TP 106 | .B 107 | -d 108 | draw a square in a corner to check margins; 109 | this is intended for testing: it shows how large the margin is 110 | .TP 111 | .B 112 | -b 113 | draw the bounding box of each page; 114 | this is intended for testing: it shows how large the page is considered to be 115 | .TP 116 | .B 117 | -h 118 | help 119 | . 120 | . 121 | . 122 | .SH DRAWINGS 123 | 124 | This program copies only the text of the original page into 125 | some part of the output page with the given margins. 126 | 127 | .nf 128 | ************************* 129 | * * 130 | * texttexttext * 131 | * texttexttext * 132 | * texttexttext * 133 | * texttexttext * 134 | * texttexttext * 135 | * * 136 | ************************* 137 | .fi 138 | 139 | To this extent, two boxes are defined in the original page: the page size (*) 140 | and the bounding box, the latter being the box enclosing only the text in the 141 | page. 142 | 143 | Three boxes are defined in the output page: 144 | the whole page (#), 145 | the output destination page (%), 146 | and the destination box (+). 147 | The destination page is where the text with the new margins goes. 148 | The destination box is the same without margins. 149 | 150 | .nf 151 | ################################### 152 | # # 153 | # %%%%%%%%%%%%%%%%%%%%% # 154 | # % % # 155 | # % ++++++++++++ % # 156 | # % ++++++++++++ % # 157 | # % ++++++++++++ % # 158 | # % ++++++++++++ % # 159 | # % ++++++++++++ % # 160 | # % % # 161 | # %%%%%%%%%%%%%%%%%%%%% # 162 | # # 163 | ################################### 164 | .fi 165 | 166 | This program maps the bounding box of the input file into the destination box 167 | of the output. In other words, it exactly fills the destination box with the 168 | text in the input. 169 | 170 | .nf 171 | ################################### 172 | # # 173 | # %%%%%%%%%%%%%%%%%%%%% # 174 | # % % # 175 | # % texttexttext % # 176 | # % texttexttext % # 177 | # % texttexttext % # 178 | # % texttexttext % # 179 | # % texttexttext % # 180 | # % % # 181 | # %%%%%%%%%%%%%%%%%%%%% # 182 | # # 183 | ################################### 184 | .fi 185 | 186 | These boxes can be drawn on the destination page by options 187 | \fI-f\fP, \fI-s\fP, \fI-d\fP and \fI-b\fP, 188 | but of course the bounding box and the destination box should coincide. 189 | . 190 | . 191 | . 192 | .SH TODO 193 | The bounding box only includes text; it should also include images. 194 | 195 | The page could be clipped to its bounding box when copying it to the cairo 196 | surface. This could only matter when only part of the page is actually copied, 197 | not its entire bounding box. 198 | 199 | -------------------------------------------------------------------------------- /cairoui-main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoui-main.c 3 | * 4 | * a test program for cairoui 5 | * show a square; change color with 'c', position with 'p' 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "cairoio.h" 13 | #include "cairoui.h" 14 | #include "cairoio-fb.h" 15 | #include "cairoio-x11.h" 16 | 17 | /*************************** CALLBACK DATA ************************/ 18 | 19 | /* 20 | * callback data 21 | */ 22 | struct cairoui_cb { 23 | int immediate; 24 | char help[80]; 25 | 26 | int color; 27 | int x; 28 | int y; 29 | }; 30 | 31 | /******************************* WINDOWS **************************/ 32 | 33 | /* 34 | * window names 35 | */ 36 | enum window { 37 | WINDOW_DOCUMENT, 38 | WINDOW_COLOR, 39 | WINDOW_POSITION 40 | }; 41 | 42 | /* 43 | * document 44 | */ 45 | int document(int c, struct cairoui *cairoui) { 46 | (void) cairoui; 47 | switch (c) { 48 | case 'c': 49 | return WINDOW_COLOR; 50 | case 'p': 51 | return WINDOW_POSITION; 52 | case 'q': 53 | return CAIROUI_EXIT; 54 | default: 55 | return WINDOW_DOCUMENT; 56 | } 57 | } 58 | 59 | /* 60 | * color 61 | */ 62 | int color(int c, struct cairoui *cairoui) { 63 | struct cairoui_cb *cairoui_cb = cairoui->cb; 64 | static char *colortext[] = { 65 | "select color", 66 | "red", 67 | "green", 68 | "blue", 69 | NULL 70 | }; 71 | static int line = 0; 72 | static int selected = 1; 73 | int res; 74 | 75 | if (c == KEY_INIT) 76 | selected = cairoui_cb->color + 1; 77 | 78 | res = cairoui_list(c, cairoui, colortext, &line, &selected); 79 | if (res == CAIROUI_LEAVE) 80 | return WINDOW_DOCUMENT; 81 | if (res != CAIROUI_DONE) 82 | return WINDOW_COLOR; 83 | switch (selected) { 84 | case 1: 85 | case 2: 86 | case 3: 87 | cairoui_cb->color = selected - 1; 88 | break; 89 | } 90 | return cairoui_cb->immediate ? CAIROUI_REFRESH : WINDOW_DOCUMENT; 91 | } 92 | 93 | /* 94 | * position 95 | */ 96 | int position(int c, struct cairoui *cairoui) { 97 | struct cairoui_cb *cairoui_cb = cairoui->cb; 98 | static char positionstring[100] = ""; 99 | static int pos = 0; 100 | int res; 101 | 102 | res = cairoui_number(c, cairoui, 103 | "position: ", positionstring, &pos, NULL, 104 | &cairoui_cb->x, 0, 200); 105 | cairoui_cb->y = cairoui_cb->x; 106 | if (res == CAIROUI_DONE) 107 | return cairoui_cb->immediate ? 108 | CAIROUI_REFRESH : WINDOW_DOCUMENT; 109 | if (res == CAIROUI_LEAVE) 110 | return WINDOW_DOCUMENT; 111 | cairoui_printlabel(cairoui, cairoui_cb->help, 112 | NO_TIMEOUT, "down=increase up=decrease"); 113 | return WINDOW_POSITION; 114 | } 115 | 116 | /* 117 | * window list 118 | */ 119 | struct windowlist windowlist[] = { 120 | { WINDOW_DOCUMENT, "DOCUMENT", document }, // always 0 121 | { WINDOW_COLOR, "COLOR", color }, 122 | { WINDOW_POSITION, "POSITION", position }, 123 | { 0, NULL, NULL } 124 | }; 125 | 126 | /***************************** LABELS ******************************/ 127 | 128 | /* 129 | * help label 130 | */ 131 | void helplabel(struct cairoui *cairoui) { 132 | struct cairoui_cb *cairoui_cb = cairoui->cb; 133 | 134 | if (cairoui_cb->help[0] == '\0') 135 | return; 136 | 137 | cairoui_label(cairoui, cairoui_cb->help, 1); 138 | cairoui_cb->help[0] = '\0'; 139 | } 140 | 141 | /* 142 | * list of labels 143 | */ 144 | void (*labellist[])(struct cairoui *) = { 145 | helplabel, 146 | NULL 147 | }; 148 | 149 | /************************ CAIROUI CALLBACKS ************************/ 150 | 151 | /* 152 | * draw 153 | */ 154 | void draw(struct cairoui *cairoui) { 155 | struct cairoui_cb *cairoui_cb = cairoui->cb; 156 | 157 | cairo_identity_matrix(cairoui->cr); 158 | switch (cairoui_cb->color) { 159 | case 0: 160 | cairo_set_source_rgb(cairoui->cr, 1.0, 0.0, 0.0); 161 | break; 162 | case 1: 163 | cairo_set_source_rgb(cairoui->cr, 0.0, 1.0, 0.0); 164 | break; 165 | case 2: 166 | cairo_set_source_rgb(cairoui->cr, 0.0, 0.0, 1.0); 167 | break; 168 | } 169 | cairo_rectangle(cairoui->cr, cairoui_cb->x, cairoui_cb->y, 100, 100); 170 | cairo_fill(cairoui->cr); 171 | } 172 | 173 | /******************************* MAIN ***************************/ 174 | 175 | /* 176 | * main 177 | */ 178 | int main(int argn, char *argv[]) { 179 | char *mainopts = "h", *allopts; 180 | char *outdev = NULL; 181 | int doublebuffering = 1; 182 | int canopen; 183 | struct cairoui_cb cairoui_cb; 184 | struct cairoui cairoui; 185 | int firstwindow = WINDOW_DOCUMENT; 186 | 187 | /* select the cairo device */ 188 | 189 | cairoui.cairodevice = 190 | getenv("DISPLAY") ? &cairodevicex11 : &cairodevicefb; 191 | 192 | /* merge general and device-specific options */ 193 | 194 | allopts = malloc(strlen(mainopts) + 195 | strlen(cairoui.cairodevice->options) + 1); 196 | strcpy(allopts, mainopts); 197 | strcat(allopts, cairoui.cairodevice->options); 198 | 199 | /* open output device as cairo */ 200 | 201 | canopen = cairoui.cairodevice->init(cairoui.cairodevice, 202 | outdev, doublebuffering, argn, argv, allopts); 203 | if (canopen == -1) { 204 | cairoui.cairodevice->finish(cairoui.cairodevice); 205 | exit(EXIT_FAILURE); 206 | } 207 | free(allopts); 208 | 209 | /* initialize the cairo user interface */ 210 | 211 | cairoui_default(&cairoui); 212 | cairoui.cb = &cairoui_cb; 213 | 214 | cairoui_cb.immediate = TRUE; 215 | cairoui_cb.help[0] = '\0'; 216 | cairoui_cb.color = 1; 217 | cairoui_cb.x = 10; 218 | cairoui_cb.y = 10; 219 | 220 | cairoui.draw = draw; 221 | cairoui.windowlist = windowlist; 222 | cairoui.labellist = labellist; 223 | 224 | cairoui.log = LEVEL_MAIN; 225 | 226 | // cairoui_cb.immediate = FALSE; 227 | // firstwindow = WINDOW_COLOR; 228 | 229 | /* event loop */ 230 | 231 | cairoui_main(&cairoui, firstwindow); 232 | 233 | return EXIT_SUCCESS; 234 | } 235 | 236 | -------------------------------------------------------------------------------- /pdfhscript: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # external script for hovacui 4 | # 5 | # l: show annotation and actions in the current page in a browser 6 | # s: save the document 7 | # A,N: add or edit notes in filename-notes.txt 8 | # A: add a note for the current page 9 | # N: edit the notes for the current page if some exist 10 | # E: create a modified document 11 | 12 | # arguments 13 | 14 | KEY="$1" 15 | ID="$2" 16 | FILE="$3" 17 | PAGE="$4" 18 | BOXNUMBER="$5" 19 | SCROLLX="$6" 20 | SCROLLY="$7" 21 | TOTPAGES="$8" 22 | TEXTBOX="$9" 23 | DEST="${10}" 24 | RECT="${11}" 25 | 26 | # actions required by multiple keystrokes 27 | 28 | [ "$KEY" = 'A' ] && EDIT=true 29 | [ "$KEY" = 'N' ] && EDIT=true 30 | $EDIT false && [ "$RECT" != "[]" ] && YANK=true 31 | [ "$KEY" = 'y' ] && YANK=true && COPIED=true 32 | 33 | # test 34 | 35 | if [ "$KEY" = '0' ]; # test echo and move 36 | then 37 | if [ $PAGE = $TOTPAGES ]; 38 | then 39 | echo -n "last page" 40 | exit 0 41 | else 42 | echo "$((PAGE+1)) 0 0 0" 43 | exit 1 44 | fi 45 | elif echo "$KEY" | grep -q '^[123]$'; # test reload 46 | then 47 | # same position 48 | [ "$KEY" = '1' ] && echo "$PAGE $BOXNUMBER $SCROLLX $SCROLLY" 49 | # new position 50 | [ "$KEY" = '2' ] && echo "2 $BOXNUMBER $SCROLLX $SCROLLY" 51 | # read position from cache file, if enabled and existing 52 | [ "$KEY" = '3' ] && echo "-1 0 0 0" 53 | if echo "$FILE" | grep -q edit-1.pdf; 54 | then 55 | echo ${FILE%%-edit-1.pdf}.pdf 56 | else 57 | echo ${FILE%%.pdf}-edit-1.pdf 58 | fi 59 | exit 1 60 | fi 61 | 62 | # links in page 63 | 64 | if [ "$KEY" = 'l' ]; 65 | then 66 | pdfannot -w -d "$FILE" $PAGE > hovacui-out.html 67 | [ $? = 0 ] && rm hovacui-out.html && \ 68 | echo -n "no links in page" && exit 69 | 70 | if [ "$DISPLAY" = "" ]; 71 | then 72 | elinks -remote 'openUrl(file:./hovacui-out.html,new-tab)' \ 73 | > /dev/null 2>&1 74 | [ $? != 0 ] && echo -n "elinks not running" && exit 75 | elinks -remote 'reload()' 76 | echo -n "passed links to elinks" 77 | exit 78 | fi 79 | 80 | if [ "$BROWSER" != "" ]; 81 | then 82 | $BROWSER hovacui-out.html & 83 | echo -n "passed links to $BROWSER" 84 | exit 85 | fi 86 | 87 | echo -n "no browser available" 88 | exit 89 | fi 90 | 91 | # save a copy of the document 92 | 93 | if [ "$KEY" = 's' ]; 94 | then 95 | I=1 96 | while [ -e selection-$I.pdf ]; 97 | do 98 | I=$((I+1)) 99 | done 100 | 101 | cp "$FILE" selection-$I.pdf 102 | echo -n "saved as selection-$I.pdf" 103 | exit 104 | fi 105 | 106 | # interactively operate on the selected rectangle 107 | 108 | if [ "$KEY" = 'E' ]; 109 | then 110 | which pdfinteractive > /dev/null || \ 111 | { echo -n 'pdfinteractive not installed'; exit; } 112 | which pdfdrawover > /dev/null || \ 113 | { echo -n 'pdfdrawover not installed'; exit; } 114 | if [ "$DISPLAY" = '' ]; 115 | then 116 | pdfinteractive "$@" 3>&1 > /dev/tty 117 | else 118 | xterm -e pdfinteractive "$@" 3>&1 119 | fi 120 | A=$(cat pdfinteractive-ret.txt) 121 | rm -f pdfinteractive-ret.txt 122 | exit $A 123 | fi 124 | 125 | # copy text in page or rectangle for pasting 126 | 127 | if $YANK false; 128 | then 129 | if [ "$RECT" != "[]" ]; 130 | then 131 | BOX="-b $RECT" 132 | else 133 | T=$(echo $TEXTBOX | tr - , | tr -d '[]') 134 | TX1=$(echo $T | cut -d, -f1 | cut -d. -f1) 135 | TY1=$(echo $T | cut -d, -f2 | cut -d. -f1) 136 | TX2=$(echo $T | cut -d, -f3 | cut -d. -f1) 137 | TY2=$(echo $T | cut -d, -f4 | cut -d. -f1) 138 | D=$(echo $DEST | tr - , | tr -d '[]') 139 | DX1=$(echo $D | cut -d, -f1 | cut -d. -f1) 140 | DY1=$(echo $D | cut -d, -f2 | cut -d. -f1) 141 | DX2=$(echo $D | cut -d, -f3 | cut -d. -f1) 142 | DY2=$(echo $D | cut -d, -f4 | cut -d. -f1) 143 | BOX="-b [" 144 | BOX=$BOX$([ $TX1 -gt $DX1 ] && echo $TX1 || echo $DX1), 145 | BOX=$BOX$([ $TY1 -gt $DY1 ] && echo $TY1 || echo $DY1)- 146 | BOX=$BOX$([ $TX2 -lt $DX2 ] && echo $TX2 || echo $DX2), 147 | BOX=$BOX$([ $TY2 -lt $DY2 ] && echo $TY2 || echo $DY2)] 148 | fi 149 | pdftoroff -t -i $PAGE:$PAGE $BOX "$FILE" | \ 150 | sed '/./,$ ! d' | $COPIER 2>&1 > /dev/null 151 | [ "$?" = 0 ] && $COPIED false && echo -n "text copied" && exit; 152 | fi 153 | 154 | # edit the notes of the pdf file 155 | 156 | if $EDIT false; 157 | then 158 | BASE=$(echo "$FILE" | sed -e 's,.pdf,,' -e 's,$,-notes.txt,') 159 | 160 | [ "$KEY" = 'N' ] && CHECK=true 161 | 162 | if [ -e "$BASE" ]; 163 | then 164 | if ! [ -f "$BASE" ]; 165 | then 166 | echo -n "$BASE is not a regular file" 167 | exit 168 | fi 169 | if $CHECK false && \ 170 | grep -A2 "^## Page $PAGE$" "$BASE" | tail -2 | grep -q "^##"; 171 | then 172 | echo -n "no notes for page $PAGE, next: " 173 | sed ' 174 | : begin 175 | /\n##/ { P; D; b } 176 | s,\n, ,g 177 | N 178 | b begin 179 | ' "$BASE" | \ 180 | sed -n "/^## Page $PAGE /,$ p" | \ 181 | { grep -m1 '## Page [0-9]* .' || echo ' none'; } | \ 182 | cut -d' ' -f3 | \ 183 | tr -d '\n' 184 | exit 185 | fi 186 | else 187 | if $CHECK false; 188 | then 189 | echo -n "no notes for page $PAGE" 190 | exit 191 | fi 192 | echo "# $(basename "$FILE")" > "$BASE" 193 | echo >> "$BASE" 194 | for P in $(seq 1 $TOTPAGES); 195 | do 196 | echo "## Page $P" >> "$BASE" 197 | echo >> "$BASE" 198 | done 199 | echo '## End' >> "$BASE" 200 | fi 201 | 202 | LINEFILE=$HOME/.cache/hovacui/line 203 | rm -f $FILELINE 204 | vi +"/## Page $PAGE$" "$BASE" > /dev/tty 205 | setterm --cursor off > /dev/tty 206 | if [ -f $LINEFILE ]; 207 | then 208 | # requires, in .vimrc: 209 | # autocmd BufUnload *-notes.txt execute '' 210 | # \| execute '!echo ' . line('.') . ' > 211 | # $HOME/.cache/hovacui/line' 212 | L=$(cat $LINEFILE) 213 | [ "$L" = "" ] && rm -f $LINEFILE && exit 0 214 | N=$(head -n "$L" "$BASE" | grep '^## Page [0-9][0-9]*' | \ 215 | tail -1 | cut -d' ' -f3) 216 | [ "$N" = "$PAGE" ] && exit 0 217 | [ "$N" = '' ] && N=1 218 | echo -n "$N 0 0 0" 219 | rm -f $LINEFILE 220 | exit 1 221 | fi 222 | fi 223 | 224 | -------------------------------------------------------------------------------- /cairoio-drm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cairoio-drm.c 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include "vt.h" 9 | #include "cairodrm.h" 10 | #include "cairoio.h" 11 | 12 | /* 13 | * structure for init data 14 | */ 15 | struct initdata { 16 | }; 17 | 18 | /* 19 | * check whether b is a prefix of a 20 | */ 21 | int _cairodrm_prefix(char *a, char *b) { 22 | return strncmp(a, b, strlen(b)); 23 | } 24 | 25 | /* 26 | * extract the last part of a string 27 | */ 28 | char *_cairodrm_second(char *a) { 29 | char *p; 30 | p = index(a, '='); 31 | return p == NULL ? p : p + 1; 32 | } 33 | 34 | /* 35 | * switch in and out the vt 36 | */ 37 | void drmswitcher(int inout, void *data) { 38 | cairodrm_switcher((struct cairodrm *) data, inout); 39 | } 40 | 41 | /* 42 | * create a cairo context 43 | */ 44 | int cairoinit_drm(struct cairodevice *cairodevice, 45 | char *device, int doublebuffering, 46 | int argn, char *argv[], char *allopts) { 47 | struct cairodrm *cairodrm; 48 | int opt; 49 | int flags; 50 | char *connectors, *size; 51 | WINDOW *w; 52 | 53 | if (device == NULL) 54 | device = "/dev/dri/card0"; 55 | 56 | connectors = "all"; 57 | size = NULL; 58 | flags = doublebuffering ? CAIRODRM_DOUBLEBUFFERING : 0; 59 | optind = 1; 60 | while (-1 != (opt = getopt(argn, argv, allopts))) { 61 | switch (opt) { 62 | case 'r': 63 | if (! strcmp(optarg, "default")) 64 | connectors = "all"; 65 | else if (! strcmp(optarg, ".")) { 66 | } 67 | else if (! strcmp(optarg, "all")) 68 | connectors = "all"; 69 | else if (! strcmp(optarg, "exact")) 70 | flags |= CAIRODRM_EXACT; 71 | else if (! _cairodrm_prefix(optarg, "connectors=")) 72 | connectors = _cairodrm_second(optarg); 73 | else if (! _cairodrm_prefix(optarg, "size=")) 74 | size = _cairodrm_second(optarg); 75 | else { 76 | printf("unknown -r suboption: %s\n", optarg); 77 | return -1; 78 | } 79 | break; 80 | } 81 | } 82 | 83 | cairodrm = cairodrm_init(device, connectors, size, flags); 84 | if (cairodrm == NULL) { 85 | if (! strstr(connectors, "list") && ! ! strcmp(size, "list")) 86 | printf("cannot open %s as a cairo surface\n", device); 87 | return -1; 88 | } 89 | 90 | if (getenv("ESCDELAY") == NULL) 91 | setenv("ESCDELAY", "200", 1); 92 | w = initscr(); 93 | cbreak(); 94 | noecho(); 95 | keypad(w, TRUE); 96 | curs_set(0); 97 | ungetch(KEY_INIT); 98 | getch(); 99 | timeout(0); 100 | 101 | vt_setup(drmswitcher, cairodrm); 102 | 103 | cairodevice->cairoio = (struct cairoio *) cairodrm; 104 | return 0; 105 | } 106 | 107 | /* 108 | * close a cairo context 109 | */ 110 | void cairofinish_drm(struct cairodevice *cairodevice) { 111 | if (cairodevice != NULL && cairodevice->cairoio != NULL) 112 | cairodrm_finish((struct cairodrm *) cairodevice->cairoio); 113 | clear(); 114 | refresh(); 115 | endwin(); 116 | } 117 | 118 | /* 119 | * get the cairo context from a cairo envelope 120 | */ 121 | cairo_t *cairocontext_drm(struct cairodevice *cairodevice) { 122 | return ((struct cairodrm *) cairodevice->cairoio)->cr; 123 | } 124 | 125 | /* 126 | * get the width from a cairo envelope 127 | */ 128 | double cairowidth_drm(struct cairodevice *cairodevice) { 129 | return ((struct cairodrm *) cairodevice->cairoio)->width; 130 | } 131 | 132 | /* 133 | * get the heigth from a cairo envelope 134 | */ 135 | double cairoheight_drm(struct cairodevice *cairodevice) { 136 | return ((struct cairodrm *) cairodevice->cairoio)->height; 137 | } 138 | 139 | /* 140 | * return whether double buffering is used 141 | */ 142 | int cairodoublebuffering_drm(struct cairodevice *cairodevice) { 143 | return cairodrm_doublebuffering( 144 | (struct cairodrm *) cairodevice->cairoio); 145 | } 146 | 147 | /* 148 | * clear 149 | */ 150 | void cairoclear_drm(struct cairodevice *cairodevice) { 151 | cairodrm_clear((struct cairodrm *) cairodevice->cairoio, 1.0, 1.0, 1.0); 152 | } 153 | 154 | /* 155 | * blank 156 | */ 157 | void cairoblank_drm(struct cairodevice *cairodevice) { 158 | cairodrm_clear((struct cairodrm *) cairodevice->cairoio, 0.0, 0.0, 0.0); 159 | } 160 | 161 | /* 162 | * flush 163 | */ 164 | void cairoflush_drm(struct cairodevice *cairodevice) { 165 | cairodrm_flush((struct cairodrm *) cairodevice->cairoio); 166 | } 167 | 168 | /* 169 | * whether the output is currently active 170 | */ 171 | int cairoisactive_drm(struct cairodevice *cairodevice) { 172 | (void) cairodevice; 173 | return ! vt_suspend; 174 | } 175 | 176 | /* 177 | * get a single input from a cairo envelope 178 | */ 179 | int cairoinput_drm(struct cairodevice *cairodevice, int timeout, 180 | struct command *command) { 181 | fd_set fds; 182 | int max, ret; 183 | struct timeval tv; 184 | int c, l, r; 185 | int different; 186 | 187 | (void) cairodevice; 188 | 189 | FD_ZERO(&fds); 190 | FD_SET(STDIN_FILENO, &fds); 191 | max = STDIN_FILENO; 192 | if (command->fd != -1) { 193 | FD_SET(command->fd, &fds); 194 | max = max > command->fd ? max : command->fd; 195 | } 196 | 197 | tv.tv_sec = timeout / 1000; 198 | tv.tv_usec = timeout % 1000; 199 | 200 | ret = select(max + 1, &fds, NULL, NULL, 201 | timeout != NO_TIMEOUT ? &tv : NULL); 202 | if (ret != -1 && command->fd != -1 && FD_ISSET(command->fd, &fds)) { 203 | fgets(command->command, command->max, command->stream); 204 | return KEY_EXTERNAL; 205 | } 206 | 207 | if (vt_suspend && timeout != 0) 208 | return KEY_SUSPEND; 209 | 210 | if (vt_redraw) { 211 | vt_redraw = FALSE; 212 | return KEY_REDRAW; 213 | } 214 | 215 | if (ret == -1) 216 | return KEY_SIGNAL; 217 | 218 | if (FD_ISSET(STDIN_FILENO, &fds)) { 219 | different = 0; 220 | for (l = getch(), r = 0; 221 | l != ERR && r < command->max - 1; 222 | l = getch()) { 223 | command->command[r++] = l; 224 | if (c != l && r > 1) 225 | different = 1; 226 | c = l; 227 | } 228 | command->command[r] = '\0'; 229 | return r < 4 || ! different ? c : KEY_PASTE; 230 | } 231 | 232 | return KEY_TIMEOUT; 233 | } 234 | 235 | /* 236 | * the cairo device for the framebuffer 237 | */ 238 | struct cairodevice cairodevicedrm = { 239 | "r:", 240 | "\t\t-r suboption\tdri options (connectors, size)", 241 | NULL, 242 | cairoinit_drm, cairofinish_drm, 243 | cairocontext_drm, 244 | cairowidth_drm, cairoheight_drm, 245 | cairowidth_drm, cairoheight_drm, 246 | cairodoublebuffering_drm, 247 | cairoclear_drm, cairoblank_drm, cairoflush_drm, 248 | cairoisactive_drm, cairoinput_drm 249 | }; 250 | 251 | -------------------------------------------------------------------------------- /pdfrects-main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * test program for pdfrects.c 3 | * 4 | * arguments: 5 | * -f first page 6 | * -l last page 7 | * -b bounding box 8 | * -e direction horizontal or vertical extents 9 | * -d distance minimal size of a white space 10 | * -r level the debugtextrectangles variables (-1 - 5, see below) 11 | * -n draw also the number of each rectangle 12 | * -i draw the numbers inside the box, not by its side 13 | * -s sort rectangles 14 | * -a test adding a new 100x100 rectangle in a free area 15 | * file.pdf file to read; output is always result.pdf 16 | */ 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include "pdfrects.h" 27 | 28 | enum extents_e { 29 | none, 30 | vertical, 31 | horizontal 32 | }; 33 | 34 | /* 35 | * main 36 | */ 37 | int main(int argc, char *argv[]) { 38 | int opt; 39 | gboolean usage = FALSE; 40 | gdouble distance = -1; 41 | gboolean numbers = FALSE; 42 | gboolean inside = FALSE; 43 | gboolean bb = FALSE; 44 | gboolean largest = FALSE; 45 | enum extents_e extents = none; 46 | gboolean painted = FALSE; 47 | gboolean tabular = FALSE; 48 | gboolean add = FALSE; 49 | int sort = -1; 50 | int first = -1, last = -1; 51 | char *infile, *outfile; 52 | 53 | gdouble width, height; 54 | 55 | PopplerDocument *doc; 56 | PopplerPage *page; 57 | int npages, n; 58 | RectangleList *textarea = NULL, *ve, *singlechars; 59 | PopplerRectangle *box = NULL; 60 | PopplerRectangle wholepage = {0.0, 0.0, 0.0, 0.0}; 61 | void (*order[])(RectangleList *, PopplerPage *) = { 62 | rectanglelist_quicksort, 63 | rectanglelist_twosort, 64 | rectanglelist_charsort 65 | }; 66 | 67 | PopplerRectangle insert = {200.0, 200.0, 300.0, 300.0}, moved; 68 | gboolean fits = FALSE; 69 | 70 | cairo_surface_t *surface; 71 | cairo_t *cr; 72 | 73 | /* arguments */ 74 | 75 | while ((opt = getopt(argc, argv, "f:l:nipts:bme:d:r:ah")) != -1) 76 | switch(opt) { 77 | case 'f': 78 | first = atoi(optarg); 79 | break; 80 | case 'l': 81 | last = atoi(optarg); 82 | break; 83 | case 'n': 84 | numbers = TRUE; 85 | break; 86 | case 'i': 87 | inside = TRUE; 88 | break; 89 | case 'p': 90 | painted = TRUE; 91 | break; 92 | case 't': 93 | tabular = TRUE; 94 | sort = -1; 95 | break; 96 | case 's': 97 | sort = atoi(optarg); 98 | break; 99 | case 'b': 100 | bb = TRUE; 101 | break; 102 | case 'm': 103 | largest = TRUE; 104 | break; 105 | case 'e': 106 | if (! strcmp(optarg, "none")) 107 | extents = none; 108 | else if (! strcmp(optarg, "horizontal")) 109 | extents = horizontal; 110 | else if (! strcmp(optarg, "vertical")) 111 | extents = vertical; 112 | else { 113 | printf("unsupported direction: %s\n", optarg); 114 | exit(EXIT_FAILURE); 115 | } 116 | break; 117 | case 'd': 118 | distance = atof(optarg); 119 | break; 120 | case 'r': 121 | debugtextrectangles = atoi(optarg); 122 | break; 123 | case 'a': 124 | add = TRUE; 125 | break; 126 | case 'h': 127 | usage = TRUE; 128 | break; 129 | } 130 | 131 | if (! usage && argc - 1 < optind) { 132 | printf("input file name missing\n"); 133 | usage = TRUE; 134 | } 135 | if (usage) { 136 | printf("usage:\n"); 137 | printf("\tpdfrects [-f page] [-l page] "); 138 | printf("[-b|-m] [-e direction] [-d distance]\n"); 139 | printf("\t [-p|-t] [-n [-s n]] [-a] [-r level] [-h] "); 140 | printf("file.pdf\n"); 141 | printf("\t\t-f page\t\tfirst page\n"); 142 | printf("\t\t-l page\t\tlast page\n"); 143 | printf("\t\t-b\t\tbounding box instead of textarea\n"); 144 | printf("\t\t-m\t\tonly the largest block of text\n"); 145 | printf("\t\t-e direction\thorizontal or vertical extents\n"); 146 | printf("\t\t-d distance\tminimal distance of text boxes\n"); 147 | printf("\t\t-n\t\tnumber boxes\n"); 148 | printf("\t\t-p\t\tuse painted squares instead of text\n"); 149 | printf("\t\t-t\t\tuse text rows instead of text boxes\n"); 150 | printf("\t\t-s n\t\tsort boxes by method n\n"); 151 | printf("\t\t-a\t\tadd a test box\n"); 152 | printf("\t\t-r level\tdebug textarea algorithm\n"); 153 | printf("\t\t-h\t\tthis help\n"); 154 | exit(EXIT_FAILURE); 155 | } 156 | infile = filenametouri(argv[optind]); 157 | if (! infile) 158 | exit(EXIT_FAILURE); 159 | outfile = pdfaddsuffix(argv[optind], "boxes"); 160 | 161 | /* open file */ 162 | 163 | doc = poppler_document_new_from_file(infile, NULL, NULL); 164 | if (doc == NULL) { 165 | printf("error opening pdf file\n"); 166 | exit(EXIT_FAILURE); 167 | } 168 | 169 | /* pages */ 170 | 171 | npages = poppler_document_get_n_pages(doc); 172 | if (npages < 1) { 173 | printf("no page in document\n"); 174 | exit(EXIT_FAILURE); 175 | } 176 | if (first == -1) 177 | first = 0; 178 | if (first < 0 || first >= npages) { 179 | printf("no such first page: %d\n", first); 180 | printf("number of pages is %d\n", npages); 181 | exit(EXIT_FAILURE); 182 | } 183 | if (last == -1) 184 | last = npages - 1; 185 | if (last < 0 || last >= npages) { 186 | printf("no such last page: %d\n", last); 187 | printf("number of pages is %d\n", npages); 188 | exit(EXIT_FAILURE); 189 | } 190 | 191 | /* copy to destination */ 192 | 193 | surface = cairo_pdf_surface_create(outfile, 1, 1); 194 | 195 | printf("infile: %s\n", argv[optind]); 196 | printf("outfile: %s\n", outfile); 197 | printf("pages: \n"); 198 | 199 | for (n = first; n <= last; n++) { 200 | printf(" - page: %d\n", n); 201 | page = poppler_document_get_page(doc, n); 202 | poppler_page_get_size(page, &width, &height); 203 | cairo_pdf_surface_set_size(surface, width, height); 204 | 205 | if (bb || largest) { 206 | box = 207 | painted ? 208 | rectanglelist_boundingbox_painted(page, 209 | distance) : 210 | largest ? 211 | rectanglelist_pagelargest(page) : 212 | rectanglelist_boundingbox(page); 213 | printf(" %s:\n", 214 | largest ? "largest" : "boundingbox"); 215 | rectangle_printyaml(stdout, 216 | " ", " ", box); 217 | } 218 | else { 219 | textarea = tabular ? 220 | rectanglelist_rows(page, distance) : 221 | painted ? 222 | rectanglelist_paintedarea_distance(page, 223 | distance) : 224 | rectanglelist_textarea_distance(page, 225 | distance); 226 | if (extents != none) { 227 | ve = extents == horizontal ? 228 | rectanglelist_hextents(textarea) : 229 | rectanglelist_vextents(textarea); 230 | rectanglelist_free(textarea); 231 | textarea = ve; 232 | } 233 | if (sort >= 0) 234 | order[sort](textarea, page); 235 | printf(" textarea:\n"); 236 | rectanglelist_printyaml(stdout, 237 | " - ", " ", textarea); 238 | fflush(stdout); 239 | } 240 | 241 | if (add) { 242 | singlechars = rectanglelist_characters(page); 243 | wholepage.x2 = width; 244 | wholepage.y2 = height; 245 | fits = rectanglelist_place(&wholepage, 246 | singlechars, &insert, &moved); 247 | rectanglelist_free(singlechars); 248 | } 249 | 250 | cr = cairo_create(surface); 251 | poppler_page_render_for_printing(page, cr); 252 | if (bb || largest) 253 | rectangle_draw(cr, box, TRUE, FALSE, FALSE); 254 | else 255 | rectanglelist_draw(cr, textarea, 256 | FALSE, FALSE, numbers, inside); 257 | if (fits) 258 | rectangle_draw(cr, &moved, TRUE, TRUE, FALSE); 259 | cairo_destroy(cr); 260 | cairo_surface_show_page(surface); 261 | 262 | rectanglelist_free(textarea); 263 | poppler_rectangle_free(box); 264 | 265 | g_object_unref(page); 266 | } 267 | 268 | cairo_surface_destroy(surface); 269 | 270 | return EXIT_SUCCESS; 271 | } 272 | 273 | -------------------------------------------------------------------------------- /pdffit.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fit a pdf in A4 (or some other paper size) 3 | * 4 | * TODO: bounding box should include images, not only text (pdfrect) 5 | * TODO: clip before copying page onto the cairo surface 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "pdfrects.h" 16 | 17 | /* 18 | * main 19 | */ 20 | int main(int argc, char *argv[]) { 21 | int opt; 22 | char *infile, *uri, *outfile = NULL; 23 | gboolean usage = FALSE, opterror = FALSE; 24 | gboolean landscape = FALSE, ratio = TRUE, individual = FALSE; 25 | gboolean largest = FALSE, wholepage = FALSE, emptypages = FALSE; 26 | gboolean givendest = FALSE, givenmargin = FALSE, givenbox = FALSE; 27 | gboolean orig = FALSE, frame = FALSE, drawbb = FALSE, debug = FALSE; 28 | gdouble w, h; 29 | char *paper = NULL; 30 | PopplerRectangle *pagesize = NULL; 31 | PopplerRectangle pagedest, outdest, dest, test; 32 | gdouble defaultmargin = 40.0; 33 | gdouble marginx1, marginy1, marginx2, marginy2; 34 | 35 | PopplerDocument *doc; 36 | PopplerPage *page; 37 | int npages, n; 38 | PopplerRectangle *boundingbox = NULL, *pageboundingbox; 39 | PopplerRectangle psize = {0.0, 0.0, 0.0, 0.0}; 40 | 41 | cairo_surface_t *surface; 42 | cairo_t *cr; 43 | 44 | /* arguments */ 45 | 46 | while ((opt = getopt(argc, argv, "hicewfskbrldm:x:p:g:o:")) != -1) 47 | switch(opt) { 48 | case 'l': 49 | landscape = TRUE; 50 | break; 51 | case 'r': 52 | ratio = FALSE; 53 | break; 54 | case 'i': 55 | individual = TRUE; 56 | break; 57 | case 'c': 58 | largest = TRUE; 59 | break; 60 | case 'e': 61 | emptypages = TRUE; 62 | break; 63 | case 'p': 64 | paper = optarg; 65 | if (2 == sscanf(optarg, "%lg,%lg", &w, &h)) { 66 | pagesize = poppler_rectangle_new(); 67 | pagesize->x1 = 0.0; 68 | pagesize->y1 = 0.0; 69 | pagesize->x2 = w; 70 | pagesize->y2 = h; 71 | } 72 | break; 73 | case 'm': 74 | givenmargin = TRUE; 75 | if (4 == sscanf(optarg, "%lg,%lg,%lg,%lg", 76 | &marginx1, &marginy1, &marginx2, &marginy2)) 77 | break; 78 | marginx1 = atof(optarg); 79 | marginy1 = atof(optarg); 80 | marginx2 = atof(optarg); 81 | marginy2 = atof(optarg); 82 | break; 83 | case 'x': 84 | givenbox = TRUE; 85 | boundingbox = poppler_rectangle_new(); 86 | if (4 == sscanf(optarg, "%lg,%lg,%lg,%lg", 87 | &boundingbox->x1, &boundingbox->y1, 88 | &boundingbox->x2, &boundingbox->y2)) 89 | break; 90 | if (4 == sscanf(optarg, "[%lg,%lg-%lg,%lg]", 91 | &boundingbox->x1, &boundingbox->y1, 92 | &boundingbox->x2, &boundingbox->y2)) 93 | break; 94 | printf("cannot parse box: %s\n", optarg); 95 | exit(EXIT_FAILURE); 96 | break; 97 | case 'o': 98 | outfile = optarg; 99 | break; 100 | case 'w': 101 | wholepage = TRUE; 102 | defaultmargin = 0.0; 103 | break; 104 | case 'g': 105 | if (4 != sscanf(optarg, "%lg,%lg,%lg,%lg", 106 | &outdest.x1, &outdest.y1, 107 | &outdest.x2, &outdest.y2)) { 108 | printf("cannot parse box: %s\n", optarg); 109 | exit(EXIT_FAILURE); 110 | } 111 | givendest = TRUE; 112 | break; 113 | case 'k': 114 | paper = "ebook"; 115 | pagesize = poppler_rectangle_new(); 116 | pagesize->x1 = 0.0; 117 | pagesize->y1 = 0.0; 118 | pagesize->x2 = 200; 119 | pagesize->y2 = 250; 120 | defaultmargin = 5.0; 121 | break; 122 | case 'b': 123 | drawbb = TRUE; 124 | break; 125 | case 'f': 126 | frame = TRUE; 127 | break; 128 | case 's': 129 | orig = TRUE; 130 | break; 131 | case 'd': 132 | debug = TRUE; 133 | break; 134 | case 'h': 135 | usage = TRUE; 136 | break; 137 | default: 138 | usage = TRUE; 139 | opterror = TRUE; 140 | } 141 | 142 | if (argc - 1 < optind) { 143 | printf("input file name missing\n"); 144 | usage = TRUE; 145 | opterror = TRUE; 146 | } 147 | 148 | /* usage */ 149 | 150 | if (usage) { 151 | printf("pdffit fits a pdf into an A4 page\n"); 152 | printf("usage:\n"); 153 | printf("\tpdffit [options] file.pdf\n"); 154 | printf("\t\t-l\t\tlandscape\n"); 155 | printf("\t\t-i\t\tscale each page individually\n"); 156 | printf("\t\t-c\t\ttry to exclude headers and footers\n"); 157 | printf("\t\t-e\t\tskip empty pages\n"); 158 | printf("\t\t-r\t\tdo not maintain aspect ratio\n"); 159 | printf("\t\t-p paper\tpaper size (a4, letter, 500,500...)\n"); 160 | printf("\t\t-w\t\tresize the whole page, without margins\n"); 161 | printf("\t\t-m margin\tminimal distance from border of page "); 162 | printf( "to text\n"); 163 | printf("\t\t-x box\t\tthe box that is fit into the page\n"); 164 | printf("\t\t-g box\t\tdestination box\n"); 165 | printf("\t\t-k\t\tadapt to ebook viewing\n"); 166 | printf("\t\t-f\t\tdraw the border of the destination page\n"); 167 | printf("\t\t-s\t\tdraw the border of the original page\n"); 168 | printf("\t\t-d\t\tdraw a square in a corner "); 169 | printf( "to check margins\n"); 170 | printf("\t\t-b\t\tdraw the bounding box of each page\n"); 171 | printf("\t\t-h\t\tthis help\n"); 172 | exit(opterror ? EXIT_FAILURE : EXIT_SUCCESS); 173 | } 174 | 175 | /* rectangles */ 176 | 177 | if (paper == NULL) 178 | paper = defaultpapersize(); 179 | if (paper == NULL) 180 | paper = "a4"; 181 | 182 | if (pagesize == NULL) 183 | pagesize = get_papersize(paper); 184 | if (pagesize == NULL) { 185 | printf("no such paper size: %s\n", paper); 186 | exit(EXIT_FAILURE); 187 | } 188 | 189 | pagedest.x1 = landscape ? pagesize->y1 : pagesize->x1; 190 | pagedest.y1 = landscape ? pagesize->x1 : pagesize->y1; 191 | pagedest.x2 = landscape ? pagesize->y2 : pagesize->x2; 192 | pagedest.y2 = landscape ? pagesize->x2 : pagesize->y2; 193 | 194 | if (! givendest) 195 | outdest = pagedest; 196 | 197 | if (! givenmargin) { 198 | marginx1 = defaultmargin; 199 | marginy1 = defaultmargin; 200 | marginx2 = defaultmargin; 201 | marginy2 = defaultmargin; 202 | } 203 | 204 | dest.x1 = outdest.x1 + marginx1; 205 | dest.y1 = outdest.y1 + marginy1; 206 | dest.x2 = outdest.x2 - marginx2; 207 | dest.y2 = outdest.y2 - marginy2; 208 | 209 | test.x1 = pagedest.x1; 210 | test.y1 = pagedest.y1; 211 | test.x2 = dest.x1; 212 | test.y2 = dest.y1; 213 | 214 | /* file names */ 215 | 216 | infile = argv[optind]; 217 | uri = filenametouri(infile); 218 | if (! uri) 219 | exit(EXIT_FAILURE); 220 | if (outfile == NULL) 221 | outfile = pdfaddsuffix(infile, paper); 222 | printf("%s -> %s\n", infile, outfile); 223 | 224 | /* input file */ 225 | 226 | doc = poppler_document_new_from_file(uri, NULL, NULL); 227 | if (doc == NULL) { 228 | printf("error opening pdf file\n"); 229 | exit(EXIT_FAILURE); 230 | } 231 | 232 | npages = poppler_document_get_n_pages(doc); 233 | if (npages < 1) { 234 | printf("no page in document\n"); 235 | exit(EXIT_FAILURE); 236 | } 237 | 238 | /* destination surface */ 239 | 240 | surface = cairo_pdf_surface_create(outfile, pagedest.x2, pagedest.y2); 241 | 242 | /* bounding box of all pages */ 243 | 244 | if (! individual && ! wholepage && ! givenbox) 245 | boundingbox = largest ? 246 | rectanglelist_largest_document(doc) : 247 | rectanglelist_boundingbox_document(doc); 248 | 249 | /* copy each page to destination */ 250 | 251 | for (n = 0; n < npages; n++) { 252 | printf("page %-5d ", n + 1); 253 | page = poppler_document_get_page(doc, n); 254 | poppler_page_get_size(page, &psize.x2, &psize.y2); 255 | pageboundingbox = rectanglelist_boundingbox(page); 256 | if (pageboundingbox == NULL && emptypages) { 257 | printf("\n"); 258 | continue; 259 | } 260 | 261 | if (individual && ! wholepage && ! givenbox) 262 | boundingbox = pageboundingbox; 263 | 264 | rectangle_print(stdout, wholepage ? &psize : boundingbox); 265 | printf(" -> "); 266 | rectangle_print(stdout, &dest); 267 | printf("\n"); 268 | 269 | cr = cairo_create(surface); 270 | rectangle_map_to_cairo(cr, &dest, 271 | wholepage ? &psize : boundingbox, 272 | FALSE, FALSE, 273 | ratio, 274 | individual && n == npages - 1, FALSE); 275 | poppler_page_render_for_printing(page, cr); 276 | 277 | if (drawbb) 278 | rectangle_draw(cr, boundingbox, TRUE, FALSE, FALSE); 279 | if (orig) 280 | rectangle_draw(cr, &psize, TRUE, FALSE, FALSE); 281 | cairo_identity_matrix(cr); 282 | if (frame) 283 | rectangle_draw(cr, &outdest, TRUE, FALSE, FALSE); 284 | if (debug) 285 | rectangle_draw(cr, &test, TRUE, TRUE, FALSE); 286 | 287 | cairo_destroy(cr); 288 | cairo_surface_show_page(surface); 289 | 290 | if (individual && ! wholepage && ! givenbox) 291 | poppler_rectangle_free(boundingbox); 292 | 293 | g_object_unref(page); 294 | } 295 | 296 | if (! individual && ! wholepage) 297 | poppler_rectangle_free(boundingbox); 298 | 299 | cairo_surface_destroy(surface); 300 | 301 | return EXIT_SUCCESS; 302 | } 303 | 304 | -------------------------------------------------------------------------------- /white-support.txt: -------------------------------------------------------------------------------- 1 | Rectangle merging by white support 2 | ================================== 3 | 4 | Currently, rectangles are merged by a loop: for every two rectangles B and C, 5 | if they touch they are merged into a new rectangle B+C. This new rectangle may 6 | include another rectangle A that touches neither B nor C. 7 | 8 | +-------+ 9 | +-------+ | A | 10 | | B | +-------+ 11 | | | 12 | | +---+-----+ 13 | | | | C | 14 | +---+---+-----+ 15 | 16 | If rectangles are checked in order A,B,C for contacts, thay have to be checked 17 | again. A does not touch anything. B touches C, so they are merged into B+C. 18 | Since A has already been checked, the only way to detect its contact with B+C 19 | is to check the new rectangle B+C against all other rectangles. A simpler but 20 | less efficient solution is to check all rectangles again until no merge is 21 | done. 22 | 23 | +-------+ 24 | +------------++ A | 25 | | ++------+ 26 | | B+C | 27 | | | 28 | | | 29 | +-------------+ 30 | 31 | Both solutions require checking all rectangles over and over. A better method 32 | is to reduce the white rectangles instead of merging the black rectangles. The 33 | current algorithm is: 34 | 35 | white_rectangles = page - characters 36 | black_rectangles = page - while_rectangles 37 | merge_touching(black_rectangles) 38 | 39 | Instead of merging black rectangles, white rectangles are reduced before 40 | subtracting them to obtain the black rectangles: 41 | 42 | white_rectangles = page - characters 43 | reduce_unsupported(white_rectangles) 44 | black_rectangles = page - while_rectangles 45 | 46 | The area between black rectangles is filled by white rectangles. This is also 47 | the case for the space that will be taken by the merged rectangles. 48 | 49 | . +-------+ 50 | +-------+ | A | 51 | | B | D +-------+ 52 | | | . E 53 | | +---x----y+....... 54 | | | | C | 55 | +---+---+-----+ 56 | 57 | The corners of the white rectangles tells where black rectangles merge. The 58 | figure emphasizes two corners x and y of the white rectangle D. The two sides 59 | of D at corner x both begin touching a black rectangle. Therefore, D will be 60 | reduced. Instead, the vertical side of D at corner y begins touching the other 61 | while rectangle E; therefore, D is not reduced at y so far. 62 | 63 | | 64 | black | white 65 | x---- ==> reduce white rectangle 66 | black 67 | 68 | | 69 | white | white ==> do not reduce the white rectangle at this moment 70 | ----y 71 | black 72 | 73 | A corner of a white rectangle stands (is not reduced) when it is supported by 74 | another white rectangle: at least one of its two sides touches another white 75 | rectangle at the corner. Otherwise, both sides touch a black rectangle at the 76 | corner. In this case, the corner is unsupported; the white rectangle is reduced 77 | up to closest points supported by another white rectangle. In the example, D is 78 | reduced from x to y horizontally and from x to the upper-right corner of A 79 | vertically. 80 | 81 | This reduction makes E unsupported at y, since D is now retracted. Technically, 82 | a corner is unsupported if none of its two sides begin with a white rectangle; 83 | black rectangles are not used for this. E is reduced horizontally up to the 84 | upper-right corner of B and vertically up to C. The process continues until the 85 | white area retracts from all regions that would be taken by the merge of A, B 86 | and C. 87 | 88 | . D +-------+ 89 | +-------+....| A | 90 | | B | +-------+ 91 | | | . E 92 | | +---x----y+....... 93 | | | | C | 94 | +---+---+-----+ 95 | 96 | The advantage of this procedure is that white rectangles are always reduced, 97 | never enlarged. No new touching is possible: it two white rectangles do not 98 | touch, they will not. The algorithm can check all rectangles for contacts at 99 | the beginning. When a rectangle is reduced, only the rectangles touching it 100 | have to be re-checked, not all of them. 101 | 102 | In general, reducing a white rectangle may split it. In the following example, 103 | corner x is unsupported (no white rectangles at both sides of C crossing at x). 104 | Rectangle C is reduced up to z and right to y. What is left of C is a 105 | non-rectangular area. Two rectangles are required to cover it. 106 | 107 | +.............+ +.............+ 108 | . . . . 109 | . . . C1 . 110 | +----z C . +----+.....+.......+ 111 | | A | . | A | . . 112 | | | . ==> | | . C2 . 113 | | +-x-----y.......+ | +-+-----+.......+ 114 | | | | | | | | | 115 | | | | B | | | | B | 116 | | | | | | | | | 117 | +--+-+-----+ +--+-+-----+ 118 | 119 | This is not a problem, since C1 and C2 may only touch rectangles originally 120 | touched by C. There is no need to recheck the contacts between all rectangles, 121 | only between C1 and C2 and the rectangles that originally touched C. 122 | 123 | - one or both subrectangles C1 and C2 may be empty; this is not a problem: some 124 | or all rectangles that originally touched C do not any longer 125 | 126 | - white rectangles may overlap instead of touching; the only caveat is that 127 | rectangles overlapping at the same corner do not suppor each other; white 128 | support is only given by another white rectangle on the other side of a side 129 | 130 | Black rectangles joined with no white corner are not a problem. This is for 131 | example the case of two rectangles of the same y and height. They are not a 132 | problem because they will automatically merged when white rectangles are 133 | subtracted from the page. 134 | 135 | +------+====+--------+ 136 | | | | | 137 | | A | | B | 138 | | | | | 139 | +------+====+--------+ 140 | 141 | This mechanism should work better than rechecking the contacts between all 142 | black rectangles every time if the page contains many black rectangles that 143 | only touch few others. An example is a page with many vectorial diagrams, and 144 | the aim is to find their individual bounding boxes. Every graphical element 145 | (line, rectangle, Bezier curve) creates a rectangle. It may touch some others 146 | in the same diagram, but none in the other diagrams. 147 | 148 | Checking support 149 | ---------------- 150 | 151 | Checking for support is done on white rectangles only. A corner is supported if 152 | at least one of the two sides crossing at it begins bordering with another 153 | white rectangle. Otherwise, the corner is unsupported, and has to be reduced. 154 | Both support and the amount of reduction depends on simple conditions on the 155 | coordinates of the other white rectangles. 156 | 157 | B | 158 | x---+----------+ 159 | | | | 160 | ----+---+ A | 161 | | | 162 | +--------------+ 163 | 164 | Corner x of A is supported by B. Since x is the upper-left corner of A, its 165 | coordinates are A.x1 and A.y1. Corner x is supported if it falls inside B or in 166 | the middle of it left or bottom side (corner-to-corner is not support). 167 | 168 | - B.x1 < A.x1 <= B.x2 and B.y1 <= A.y1 < B.y2 (left support) OR 169 | - B.x1 <= A.x1 < B.x2 and B.y1 < A.y1 <= B.y2 (upper support) 170 | 171 | Support for the other corners of A is given by similar conditions. If they are 172 | not met, the corner is unsupported. It is reduced up to the first supporting 173 | white rectangle, if any: 174 | 175 | x--------------+ 176 | | | 177 | ----+---+ A | 178 | | | | 179 | | | | 180 | B +---+----------+ 181 | | 182 | --------+ 183 | 184 | Corner x is reduced down to B.y1, since B.x1 < A.x1 <= B.x2. It is also reduced 185 | right to A.x2, since no other rectangle satisfies the condition B.y1 < A.y1 <= 186 | B.y2. In this cas, one of the two new rectangles has zero width and is 187 | therefore removed. 188 | 189 | More generally, reduction is to the highest B.y1 for a rectangle satisfying 190 | A.y1 < B.y1 and B.x1 < A.x1 <= B.x2, or to A.y2 if no rectangle satisfies that. 191 | A similar rule applies for the vertical reduction. Some inequalities are 192 | inverted for the other corners of A. 193 | 194 | Contact information 195 | ------------------- 196 | 197 | Storing a list of all rectangles contacting each rectangle takes quadratic time 198 | and space. An alternative for small rectangles is to build four double lists of 199 | rectangles ordered by their x1, y1, x2 and y2, with reverse pointers from each 200 | rectangle to its position in these lists. This structure only takes linear 201 | memory, and n log(n) time to be built. 202 | 203 | Checking support on the corner x1,y1 of a rectangle can be done in order on the 204 | x2 list: starting from position of the rectangle in the list, go back until x1, 205 | then forth looking for a supporting rectangle until x1+w, where w is the 206 | maximal width of rectangles. The mechanism is similar for the other corners. 207 | 208 | This saves checks when w is small. Rectangles wider than w may be dealt with 209 | separately, either before or after merging the other rectangles. 210 | 211 | -------------------------------------------------------------------------------- /pdfrects.h: -------------------------------------------------------------------------------- 1 | /* 2 | * pdfrects.h 3 | * 4 | * functions on poppler rectangles: 5 | * 6 | * - functions on individual rectangles (containment, join, etc.) 7 | * - functions for rectangle lists representing the union of their areas 8 | * - functions for blocks of text in a page 9 | * - functions for short, recurring blocks of text 10 | */ 11 | 12 | #ifdef _PDFRECTS_H 13 | #else 14 | #define _PDFRECTS_H 15 | 16 | /* 17 | * functions on individual rectangles 18 | */ 19 | 20 | /* print a rectangle */ 21 | void rectangle_print(FILE *, PopplerRectangle *); 22 | void rectangle_printyaml(FILE *, char *first, char *indent, PopplerRectangle *); 23 | 24 | /* parse a rectangle */ 25 | PopplerRectangle *rectangle_parse(char *s); 26 | 27 | /* normalize a rectangle: x1 <= x2 and y1 <= y2 */ 28 | void rectangle_normalize(PopplerRectangle *); 29 | 30 | /* width, height and area of a rectangle */ 31 | double rectangle_width(PopplerRectangle *); 32 | double rectangle_height(PopplerRectangle *); 33 | double rectangle_area(PopplerRectangle *r); 34 | 35 | /* check if two rectangles are the same */ 36 | gboolean rectangle_hequal(PopplerRectangle *a, PopplerRectangle *b); 37 | gboolean rectangle_vequal(PopplerRectangle *a, PopplerRectangle *b); 38 | gboolean rectangle_equal(PopplerRectangle *a, PopplerRectangle *b); 39 | 40 | /* check whether the first rectangle contains the second */ 41 | gboolean rectangle_hcontain(PopplerRectangle *, PopplerRectangle *); 42 | gboolean rectangle_vcontain(PopplerRectangle *, PopplerRectangle *); 43 | gboolean rectangle_contain(PopplerRectangle *, PopplerRectangle *); 44 | 45 | /* check if rectangles overlap */ 46 | gboolean rectangle_hoverlap(PopplerRectangle *, PopplerRectangle *); 47 | gboolean rectangle_voverlap(PopplerRectangle *, PopplerRectangle *); 48 | gboolean rectangle_overlap(PopplerRectangle *, PopplerRectangle *); 49 | 50 | /* check if rectangles touch (meet or overlap) */ 51 | gboolean rectangle_htouch(PopplerRectangle *a, PopplerRectangle *b); 52 | gboolean rectangle_vtouch(PopplerRectangle *a, PopplerRectangle *b); 53 | gboolean rectangle_touch(PopplerRectangle *, PopplerRectangle *); 54 | 55 | /* horizontal and vertical distance between rectangles */ 56 | gdouble rectangle_hdistance(PopplerRectangle *a, PopplerRectangle *b); 57 | gdouble rectangle_vdistance(PopplerRectangle *a, PopplerRectangle *b); 58 | 59 | /* copy and swap rectangles */ 60 | void rectangle_copy(PopplerRectangle *dest, PopplerRectangle *orig); 61 | void rectangle_swap(PopplerRectangle *a, PopplerRectangle *b); 62 | 63 | /* shift and expand a rectangle */ 64 | void rectangle_shift(PopplerRectangle *, gdouble x, gdouble y); 65 | void rectangle_expand(PopplerRectangle *, gdouble dx, gdouble dy); 66 | 67 | /* make the first rectangle the intersection of the other two */ 68 | void rectangle_intersect(PopplerRectangle *r, 69 | PopplerRectangle *a, PopplerRectangle *b); 70 | 71 | /* join rectangles: the first becomes the smallest rectangle containing both */ 72 | void rectangle_join(PopplerRectangle *, PopplerRectangle *); 73 | 74 | /* compare the position of two rectangles */ 75 | int rectangle_hcompare(PopplerRectangle *a, PopplerRectangle *b); 76 | int rectangle_vcompare(PopplerRectangle *a, PopplerRectangle *b); 77 | int rectangle_compare(PopplerRectangle *, PopplerRectangle *); 78 | 79 | /* compare the area of two rectangles */ 80 | int rectangle_areacompare(PopplerRectangle *a, PopplerRectangle *b); 81 | 82 | /* a rectangle as large as the page */ 83 | void rectangle_page(PopplerPage *page, PopplerRectangle *rect); 84 | 85 | /* 86 | * functions on lists of rectangles 87 | */ 88 | 89 | #define MAXRECT 4096 90 | typedef struct { 91 | /* public */ 92 | PopplerRectangle *rect; 93 | gint num; 94 | 95 | /* private */ 96 | gint max; 97 | } RectangleList; 98 | 99 | /* 100 | * minimal size for both dimensions of a rectangle and for each 101 | */ 102 | typedef struct { 103 | gdouble both; 104 | gdouble each; 105 | } RectangleBound; 106 | 107 | /* allocate a list with maximum number of elements, currently none */ 108 | RectangleList *rectanglelist_new(int); 109 | 110 | /* make a copy of a rectangle list */ 111 | RectangleList *rectanglelist_copy(RectangleList *src); 112 | 113 | /* thighten a rectangle list by deallocating the unused entries */ 114 | void rectanglelist_tighten(RectangleList *); 115 | 116 | /* free a rectangle list */ 117 | void rectanglelist_free(RectangleList *); 118 | 119 | /* print a rectangle list */ 120 | void rectanglelist_print(FILE *, RectangleList *); 121 | void rectanglelist_printyaml(FILE *, char *first, char *indent, 122 | RectangleList *); 123 | 124 | /* remove a rectangle from a list */ 125 | void rectanglelist_delete(RectangleList *, gint); 126 | 127 | /* append a rectangle to a list */ 128 | void rectanglelist_append(RectangleList *rl, PopplerRectangle *rect); 129 | 130 | /* add a rectangle to a list, if not redundant */ 131 | gboolean rectanglelist_add(RectangleList *, PopplerRectangle *); 132 | 133 | /* smallest rectangle enclosing all in a rectangle list */ 134 | PopplerRectangle *rectanglelist_joinall(RectangleList *); 135 | 136 | /* horizontal or vertical extents of a rectangle list */ 137 | RectangleList *rectanglelist_hextents(RectangleList *); 138 | RectangleList *rectanglelist_vextents(RectangleList *); 139 | 140 | /* total width and height of a rectangle list */ 141 | double rectanglelist_sumwidth(RectangleList *rl); 142 | double rectanglelist_sumheight(RectangleList *rl); 143 | 144 | /* average width and height of a rectangle list */ 145 | double rectanglelist_averagewidth(RectangleList *rl); 146 | double rectanglelist_averageheight(RectangleList *rl); 147 | 148 | /* index of first rectangle in list in a relation to another rectangle */ 149 | gint rectanglelist_contain(RectangleList *, PopplerRectangle *); 150 | gint rectanglelist_touch(RectangleList *, PopplerRectangle *); 151 | gint rectanglelist_overlap(RectangleList *, PopplerRectangle *); 152 | 153 | /* sort a rectangle list by position */ 154 | void rectanglelist_quicksort(RectangleList *, PopplerPage *); 155 | void rectanglelist_twosort(RectangleList *, PopplerPage *); 156 | void rectanglelist_charsort(RectangleList *, PopplerPage *); 157 | 158 | /* find the largest rectangle in a list or sort by area */ 159 | PopplerRectangle *rectanglelist_largest(RectangleList *); 160 | void rectanglelist_areasort(RectangleList *); 161 | 162 | /* position a rectangle in a page partially filled by others */ 163 | gboolean rectanglelist_place(PopplerRectangle *page, 164 | RectangleList *rl, PopplerRectangle *r, 165 | PopplerRectangle *moved); 166 | 167 | /* subtract a rectangle list from another: orig -= sub */ 168 | gboolean rectanglelist_subtract(RectangleList **orig, RectangleList *sub, 169 | PopplerRectangle *cont, RectangleBound *b); 170 | 171 | /* subtract a rectangle list from a single rectangle: res = r - rl */ 172 | RectangleList *rectanglelist_subtract1(PopplerRectangle *r, RectangleList *rl, 173 | PopplerRectangle *cont, RectangleBound *b); 174 | 175 | /* 176 | * functions on text-enclosing rectangles 177 | * rectangles of white spaces ' ' are made 0-width 178 | */ 179 | 180 | /* debug */ 181 | extern int debugtextrectangles; 182 | 183 | /* the rectangles of the single characters in the page */ 184 | RectangleList *rectanglelist_characters(PopplerPage *); 185 | 186 | /* area of text in a page */ 187 | RectangleList *rectanglelist_textarea(PopplerPage *); 188 | 189 | /* area of text in a page, with minimal distance considered a white space */ 190 | RectangleList *rectanglelist_textarea_distance(PopplerPage *, gdouble); 191 | 192 | /* bounding box of the page or document (NULL if no text) */ 193 | PopplerRectangle *rectanglelist_boundingbox(PopplerPage *); 194 | PopplerRectangle *rectanglelist_boundingbox_document(PopplerDocument *doc); 195 | 196 | /* largest box in a page or document (NULL if no text) */ 197 | PopplerRectangle *rectanglelist_pagelargest(PopplerPage *page); 198 | PopplerRectangle *rectanglelist_largest_document(PopplerDocument *doc); 199 | 200 | /* list of squares of a grid that are painted in a page */ 201 | RectangleList *rectanglelist_painted(PopplerPage *page, int distance); 202 | 203 | /* area of painted squares in a page, with minimal distance of white space */ 204 | RectangleList *rectanglelist_paintedarea_distance(PopplerPage *, gdouble); 205 | 206 | /* bounding box of a page, based on painted squares */ 207 | PopplerRectangle *rectanglelist_boundingbox_painted(PopplerPage *page, int d); 208 | 209 | /* list of rows in a page */ 210 | RectangleList *rectanglelist_rows(PopplerPage *page, gdouble distance); 211 | 212 | /* 213 | * functions for recurring blocks of text (page numbers, headers and footers) 214 | */ 215 | 216 | typedef struct { 217 | int num; 218 | int size; 219 | struct { 220 | int rank; 221 | PopplerRectangle rect; 222 | } rect[]; 223 | } RectangleVector; 224 | 225 | /* debug areas often taken by short blocks of text */ 226 | extern int debugfrequent; 227 | 228 | /* create an empty rectangle vector of a given size */ 229 | RectangleVector *rectanglevector_create(int size); 230 | 231 | /* print a rectangle vector */ 232 | void rectanglevector_print(FILE *fd, RectangleVector *v); 233 | void rectanglevector_printyaml(FILE *fd, char *first, char *indent, 234 | RectangleVector *v); 235 | 236 | /* make a rectangle list out of a rectangle vector */ 237 | RectangleList *rectanglevector_list(RectangleVector *c); 238 | 239 | /* insert a rectangle in a vector */ 240 | void rectanglevector_insert(RectangleVector *v, int rank, PopplerRectangle *r); 241 | 242 | /* add a rectangle to a frequency vector, allowing horizontal containment */ 243 | void rectanglevector_add(RectangleVector *v, PopplerRectangle *r); 244 | 245 | /* rectangles often taken by short blocks of text */ 246 | RectangleList *rectanglevector_frequent(PopplerDocument *doc, 247 | gdouble height, gdouble distance); 248 | 249 | /* a rectangle as large as the page minus headers and footers */ 250 | PopplerRectangle *rectanglevector_main(PopplerDocument *doc, 251 | RectangleList *recur, gdouble height, gdouble distance); 252 | 253 | /* 254 | * drawing-related functions 255 | */ 256 | 257 | /* use rectangle in cairo */ 258 | void rectangle_cairo(cairo_t *cr, PopplerRectangle *rect, gdouble enlarge); 259 | 260 | /* draw a rectangle, possibly filled or enclosing */ 261 | void rectangle_draw(cairo_t *, PopplerRectangle *, 262 | gboolean randomcolor, gboolean fill, gboolean enclosing); 263 | 264 | /* draw a rectangle list, possibly numbering each */ 265 | void rectanglelist_draw(cairo_t *, RectangleList *, 266 | gboolean fill, gboolean enclosing, gboolean num, gboolean inside); 267 | 268 | /* apply the current transformation to a rectangle */ 269 | void rectangle_transform(cairo_t *cr, PopplerRectangle *r); 270 | 271 | /* map a poppler rectangle into a cairo surface */ 272 | void rectangle_map_to_cairo(cairo_t *cr, 273 | PopplerRectangle *dst, PopplerRectangle *src, 274 | gboolean horizontal, gboolean vertical, 275 | gboolean ratio, gboolean topalign, gboolean leftalign); 276 | 277 | /* clip out all textarea rectangles containing any in the remove list */ 278 | void rectanglelist_clip_containing(cairo_t *cr, PopplerPage *page, 279 | RectangleList *textarea, RectangleList *rm); 280 | 281 | /* 282 | * helper functions 283 | */ 284 | 285 | /* from name to paper size (pointer to statically allocated structure) */ 286 | PopplerRectangle *get_papersize(char *name); 287 | 288 | /* default paper size, from /etc/papersize */ 289 | char *defaultpapersize(); 290 | 291 | /* from file name to uri */ 292 | char *filenametouri(char *); 293 | 294 | /* turn file.pdf into file-suffix.pdf */ 295 | char *pdfaddsuffix(char *infile, char *suffix); 296 | 297 | #endif 298 | 299 | -------------------------------------------------------------------------------- /cairoio-x11.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "cairoio.h" 12 | 13 | /* 14 | * name of the program, as used in the window title 15 | */ 16 | #define HOVACUI "hovacui" 17 | 18 | /* 19 | * structure for a window 20 | */ 21 | struct cairoio { 22 | cairo_surface_t *surface; 23 | cairo_t *cr; 24 | unsigned int width; 25 | unsigned int height; 26 | int screenwidth; 27 | int screenheight; 28 | Display *dsp; 29 | Window win; 30 | Drawable dbuf; 31 | int doublebuffering; 32 | }; 33 | 34 | /* 35 | * events we want to receive 36 | */ 37 | #define EVENTMASK \ 38 | (KeyPressMask | ButtonPressMask | PropertyChangeMask | \ 39 | ExposureMask | StructureNotifyMask) 40 | 41 | /* 42 | * maximal length of pasted text; must be shorter than command->command 43 | */ 44 | #define MAXPASTE 200 45 | 46 | /* 47 | * check whether b is a prefix of a 48 | */ 49 | int prefix(char *a, char *b) { 50 | return strncmp(a, b, strlen(b)); 51 | } 52 | 53 | /* 54 | * extract the last part of a string 55 | */ 56 | char *second(char *a) { 57 | char *p; 58 | p = index(a, '='); 59 | return p == NULL ? p : p + 1; 60 | } 61 | 62 | /* 63 | * create a cairo context 64 | */ 65 | int cairoinit_x11(struct cairodevice *cairodevice, 66 | char *device, int doublebuffering, 67 | int argn, char *argv[], char *allopts) { 68 | int opt; 69 | struct cairoio *xhovacui; 70 | char *display; 71 | char *geometry; 72 | char *title; 73 | Screen *scr; 74 | Visual *vis; 75 | int x, y; 76 | char *wintitle; 77 | Atom utf8, name, pid; 78 | int pidn; 79 | 80 | display = NULL; 81 | geometry = NULL; 82 | optind = 1; 83 | while (-1 != (opt = getopt(argn, argv, allopts))) { 84 | switch (opt) { 85 | case 'x': 86 | if (! strcmp(optarg, "default")) 87 | continue; 88 | else if (! prefix(optarg, "display=")) 89 | display = second(optarg); 90 | else if (! prefix(optarg, "geometry=")) 91 | geometry = second(optarg); 92 | else { 93 | printf("unknown -x suboption: %s\n", optarg); 94 | return -1; 95 | } 96 | break; 97 | } 98 | } 99 | title = argv[optind]; 100 | 101 | if (display != NULL) 102 | device = display; 103 | 104 | xhovacui = malloc(sizeof(struct cairoio)); 105 | xhovacui->dsp = XOpenDisplay(device); 106 | if (xhovacui->dsp == NULL) { 107 | printf("cannot open display %s\n", 108 | device == NULL ? getenv("DISPLAY") : device); 109 | free(xhovacui); 110 | return -1; 111 | } 112 | scr = DefaultScreenOfDisplay(xhovacui->dsp); 113 | vis = DefaultVisualOfScreen(scr); 114 | 115 | x = 200; 116 | y = 200; 117 | xhovacui->width = 600; 118 | xhovacui->height = 400; 119 | if (geometry != NULL) 120 | XParseGeometry(geometry, 121 | &x, &y, &xhovacui->width, &xhovacui->height); 122 | printf("geometry: %dx%d+%d+%d\n", 123 | xhovacui->width, xhovacui->height, x, y); 124 | 125 | xhovacui->screenwidth = WidthOfScreen(scr); 126 | xhovacui->screenheight = HeightOfScreen(scr); 127 | 128 | xhovacui->win = XCreateSimpleWindow(xhovacui->dsp, 129 | DefaultRootWindow(xhovacui->dsp), 130 | x, y, xhovacui->width, xhovacui->height, 0, 131 | BlackPixelOfScreen(scr), WhitePixelOfScreen(scr)); 132 | XSelectInput(xhovacui->dsp, xhovacui->win, EVENTMASK); 133 | 134 | xhovacui->doublebuffering = doublebuffering; 135 | xhovacui->dbuf = ! xhovacui->doublebuffering ? 136 | xhovacui->win : 137 | XCreatePixmap(xhovacui->dsp, xhovacui->win, 138 | xhovacui->width, xhovacui->height, 139 | DefaultDepth(xhovacui->dsp, 0)); 140 | xhovacui->surface = 141 | cairo_xlib_surface_create(xhovacui->dsp, xhovacui->dbuf, vis, 142 | xhovacui->width, xhovacui->height); 143 | xhovacui->cr = cairo_create(xhovacui->surface); 144 | 145 | wintitle = malloc(strlen(HOVACUI ": ") + strlen(title) + 1); 146 | strcpy(wintitle, HOVACUI ": "); 147 | strcat(wintitle, title); 148 | XStoreName(xhovacui->dsp, xhovacui->win, wintitle); 149 | utf8 = XInternAtom(xhovacui->dsp, "UTF8_STRING", False); 150 | name = XInternAtom(xhovacui->dsp, "_NET_WM_NAME", False); 151 | XChangeProperty(xhovacui->dsp, xhovacui->win, 152 | name, utf8, 8, PropModeReplace, 153 | (unsigned char *) wintitle, strlen(wintitle)); 154 | pid = XInternAtom(xhovacui->dsp, "_NET_WM_PID", False); 155 | pidn = getpid(); 156 | XChangeProperty(xhovacui->dsp, xhovacui->win, 157 | pid, XA_CARDINAL, 32, PropModeReplace, 158 | (unsigned char *) &pidn, 1); 159 | free(wintitle); 160 | 161 | XMapWindow(xhovacui->dsp, xhovacui->win); 162 | 163 | cairodevice->cairoio = xhovacui; 164 | return 0; 165 | } 166 | 167 | /* 168 | * close a cairo context 169 | */ 170 | void cairofinish_x11(struct cairodevice *cairodevice) { 171 | struct cairoio *xhovacui; 172 | xhovacui = cairodevice->cairoio; 173 | if (xhovacui == NULL) 174 | return; 175 | cairo_destroy(xhovacui->cr); 176 | cairo_surface_destroy(xhovacui->surface); 177 | if (xhovacui->doublebuffering) 178 | XFreePixmap(xhovacui->dsp, xhovacui->dbuf); 179 | XDestroyWindow(xhovacui->dsp, xhovacui->win); 180 | XCloseDisplay(xhovacui->dsp); 181 | free(xhovacui); 182 | } 183 | 184 | /* 185 | * get the cairo context 186 | */ 187 | cairo_t *cairocontext_x11(struct cairodevice *cairodevice) { 188 | return cairodevice->cairoio->cr; 189 | } 190 | 191 | /* 192 | * get the width of the window 193 | */ 194 | double cairowidth_x11(struct cairodevice *cairodevice) { 195 | return cairodevice->cairoio->width; 196 | } 197 | 198 | /* 199 | * get the heigth of the window 200 | */ 201 | double cairoheight_x11(struct cairodevice *cairodevice) { 202 | return cairodevice->cairoio->height; 203 | } 204 | 205 | /* 206 | * get the width of the screen 207 | */ 208 | double cairoscreenwidth_x11(struct cairodevice *cairodevice) { 209 | return cairodevice->cairoio->screenwidth; 210 | } 211 | 212 | /* 213 | * get the heigth of the screen 214 | */ 215 | double cairoscreenheight_x11(struct cairodevice *cairodevice) { 216 | return cairodevice->cairoio->screenheight; 217 | } 218 | 219 | /* 220 | * return whether double buffering is used 221 | */ 222 | int cairodoublebuffering_x11(struct cairodevice *cairodevice) { 223 | return cairodevice->cairoio->doublebuffering; 224 | } 225 | 226 | /* 227 | * clear 228 | */ 229 | void cairoclear_x11(struct cairodevice *cairodevice) { 230 | struct cairoio *xhovacui; 231 | xhovacui = cairodevice->cairoio; 232 | cairo_identity_matrix(xhovacui->cr); 233 | cairo_set_source_rgb(xhovacui->cr, 1.0, 1.0, 1.0); 234 | cairo_rectangle(xhovacui->cr, 0, 0, xhovacui->width, xhovacui->height); 235 | cairo_fill(xhovacui->cr); 236 | } 237 | 238 | /* 239 | * blank 240 | */ 241 | void cairoblank_x11(struct cairodevice *cairodevice) { 242 | struct cairoio *xhovacui; 243 | xhovacui = cairodevice->cairoio; 244 | cairo_identity_matrix(xhovacui->cr); 245 | cairo_set_source_rgb(xhovacui->cr, 0.0, 0.0, 0.0); 246 | cairo_rectangle(xhovacui->cr, 0, 0, xhovacui->width, xhovacui->height); 247 | cairo_fill(xhovacui->cr); 248 | } 249 | 250 | /* 251 | * flush 252 | */ 253 | void cairoflush_x11(struct cairodevice *cairodevice) { 254 | struct cairoio *xhovacui; 255 | xhovacui = cairodevice->cairoio; 256 | if (xhovacui->doublebuffering) 257 | XCopyArea(xhovacui->dsp, xhovacui->dbuf, xhovacui->win, 258 | DefaultGC(xhovacui->dsp, 0), 259 | 0, 0, xhovacui->width, xhovacui->height, 0, 0); 260 | } 261 | 262 | /* 263 | * whether the output is currently active 264 | */ 265 | int cairoisactive_x11(struct cairodevice *cairodevice) { 266 | (void) cairodevice; 267 | return TRUE; 268 | } 269 | 270 | /* 271 | * reconfigure 272 | */ 273 | void cairoreconfigure(struct cairoio *xhovacui, XConfigureEvent *xce) { 274 | xhovacui->width = xce->width; 275 | xhovacui->height = xce->height; 276 | 277 | if (! xhovacui->doublebuffering) { 278 | cairo_xlib_surface_set_size(xhovacui->surface, 279 | xhovacui->width, xhovacui->height); 280 | return; 281 | } 282 | 283 | XFreePixmap(xhovacui->dsp, xhovacui->dbuf); 284 | xhovacui->dbuf = XCreatePixmap(xhovacui->dsp, xhovacui->win, 285 | xhovacui->width, xhovacui->height, 286 | DefaultDepth(xhovacui->dsp, 0)); 287 | cairo_xlib_surface_set_drawable(xhovacui->surface, xhovacui->dbuf, 288 | xhovacui->width, xhovacui->height); 289 | return; 290 | } 291 | 292 | /* 293 | * all available expose events 294 | */ 295 | int cairoexpose(Display *dsp, XEvent *evt) { 296 | XExposeEvent *exp; 297 | int redraw; 298 | 299 | do { 300 | redraw = 0; 301 | switch (evt->type) { 302 | case Expose: 303 | exp = &evt->xexpose; 304 | printf("Expose %d,%d->%dx%d\n", 305 | exp->x, exp->y, 306 | exp->width, exp->height); 307 | redraw = 1; 308 | break; 309 | case GraphicsExpose: 310 | printf("GraphicsExpose\n"); 311 | break; 312 | case NoExpose: 313 | printf("NoExpose\n"); 314 | break; 315 | default: 316 | printf("event of type %d\n", evt->type); 317 | } 318 | } 319 | while (XCheckMaskEvent(dsp, ExposureMask, evt)); 320 | 321 | printf("\tend Exposure, redraw=%d\n", redraw); 322 | return redraw; 323 | } 324 | 325 | /* 326 | * next event or timeout 327 | */ 328 | int nextevent(Display *dsp, int timeout, XEvent *evt, struct command *command) { 329 | fd_set fds; 330 | int max, ret; 331 | struct timeval tv; 332 | 333 | /* the socket may be inactive but an event is already in the queue; 334 | * so: first check events, then possibly wait on the socket */ 335 | while (! XCheckMaskEvent(dsp, EVENTMASK, evt)) { 336 | FD_ZERO(&fds); 337 | FD_SET(ConnectionNumber(dsp), &fds); 338 | max = ConnectionNumber(dsp); 339 | if (command->fd != -1) { 340 | FD_SET(command->fd, &fds); 341 | max = max > command->fd ? max : command->fd; 342 | } 343 | 344 | tv.tv_sec = timeout / 1000; 345 | tv.tv_usec = (timeout % 1000) * 1000; 346 | 347 | ret = select(max + 1, &fds, NULL, NULL, 348 | timeout != NO_TIMEOUT ? &tv : NULL); 349 | if (ret == -1) 350 | return -1; 351 | 352 | if (command->fd != -1 && FD_ISSET(command->fd, &fds)) { 353 | fgets(command->command, command->max, command->stream); 354 | return KEY_EXTERNAL; 355 | } 356 | 357 | if (! FD_ISSET(ConnectionNumber(dsp), &fds)) 358 | return KEY_TIMEOUT; 359 | } 360 | 361 | return 0; 362 | } 363 | 364 | /* 365 | * get a single input 366 | */ 367 | int cairoinput_x11(struct cairodevice *cairodevice, int timeout, 368 | struct command *command) { 369 | struct cairoio *xhovacui; 370 | int res; 371 | XEvent evt; 372 | int key; 373 | int format; 374 | Atom type; 375 | unsigned long nitems, after; 376 | unsigned char *selection; 377 | 378 | xhovacui = cairodevice->cairoio; 379 | 380 | while (1) { 381 | res = nextevent(xhovacui->dsp, timeout, &evt, command); 382 | if (res != 0) 383 | return res; 384 | 385 | switch(evt.type) { 386 | case KeyPress: 387 | printf("Key\n"); 388 | key = XLookupKeysym(&evt.xkey, 0); 389 | switch (key) { 390 | case XK_Down: 391 | return KEY_DOWN; 392 | case XK_Up: 393 | return KEY_UP; 394 | case XK_Left: 395 | return KEY_LEFT; 396 | case XK_Right: 397 | return KEY_RIGHT; 398 | case XK_Page_Down: 399 | return KEY_NPAGE; 400 | case XK_Page_Up: 401 | return KEY_PPAGE; 402 | case XK_Escape: 403 | return 033; 404 | case XK_Home: 405 | return KEY_HOME; 406 | case XK_End: 407 | return KEY_END; 408 | case XK_Return: 409 | return '\n'; 410 | case XK_BackSpace: 411 | return KEY_BACKSPACE; 412 | case XK_slash: 413 | return '/'; 414 | case XK_space: 415 | return ' '; 416 | default: 417 | if (isalnum(key)) { 418 | if (evt.xkey.state & ShiftMask) 419 | return toupper(key); 420 | else 421 | return key; 422 | } 423 | /* finish: translate X keys to curses */ 424 | } 425 | break; 426 | case ButtonPress: 427 | printf("Button\n"); 428 | if (evt.xbutton.button == 2) { 429 | XConvertSelection(xhovacui->dsp, 430 | XA_PRIMARY, XA_STRING, XA_PRIMARY, 431 | xhovacui->win, CurrentTime); 432 | } 433 | break; 434 | case PropertyNotify: 435 | printf("Property\n"); 436 | if (evt.xproperty.atom != XA_PRIMARY) 437 | break; 438 | res = XGetWindowProperty(xhovacui->dsp, xhovacui->win, 439 | XA_PRIMARY, 0, MAXPASTE, True, XA_STRING, 440 | &type, &format, &nitems, &after, &selection); 441 | if (res != Success) 442 | break; 443 | if (type != XA_STRING) 444 | break; 445 | if (nitems > MAXPASTE) 446 | break; 447 | if (format != 8) 448 | break; 449 | strcpy(command->command, (char *) selection); 450 | XFree(selection); 451 | return KEY_PASTE; 452 | break; 453 | case ConfigureNotify: 454 | printf("Configure\n"); 455 | cairoreconfigure(xhovacui, &evt.xconfigure); 456 | return KEY_RESIZE; 457 | case Expose: 458 | case GraphicsExpose: 459 | case NoExpose: 460 | if (cairoexpose(xhovacui->dsp, &evt)) 461 | return KEY_REDRAW; 462 | break; 463 | case MapNotify: 464 | printf("MapNotify\n"); 465 | break; 466 | case ReparentNotify: 467 | printf("ReparentNotify\n"); 468 | break; 469 | default: 470 | printf("event of type %d\n", evt.type); 471 | } 472 | } 473 | 474 | return KEY_NONE; 475 | } 476 | 477 | /* 478 | * the cairo device for X11 479 | */ 480 | struct cairodevice cairodevicex11 = { 481 | "x:", 482 | "\t\t-x suboption\tx11 options (display, geometry)", 483 | NULL, 484 | cairoinit_x11, cairofinish_x11, 485 | cairocontext_x11, 486 | cairowidth_x11, cairoheight_x11, 487 | cairoscreenwidth_x11, cairoscreenheight_x11, 488 | cairodoublebuffering_x11, 489 | cairoclear_x11, cairoblank_x11, cairoflush_x11, 490 | cairoisactive_x11, cairoinput_x11 491 | }; 492 | 493 | -------------------------------------------------------------------------------- /pdfannot.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pdfannot.c 3 | * 4 | * print annotations and actions in a pdf file 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /* 13 | * separate pages or not 14 | */ 15 | gboolean headers; 16 | 17 | /* 18 | * formatting elements 19 | */ 20 | struct outformat { 21 | char *newline; 22 | char *separator; 23 | char *startpar; 24 | char *endpar; 25 | char *startheader; 26 | char *endheader; 27 | char *startdestination; 28 | char *enddestination; 29 | } *outformat; 30 | struct outformat textformat = { 31 | "\n", 32 | "\n=================\n", 33 | "", 34 | "\n-------\n", 35 | "==================", 36 | "", 37 | "%sdestination: ", 38 | "" 39 | }; 40 | struct outformat htmlformat = { 41 | "
\n", 42 | "\n
\n", 43 | "

\n", 44 | "

\n", 45 | "

\n", 46 | "

\n", 47 | "\n
\n", 48 | "
\n", 49 | }; 50 | 51 | /* 52 | * print a string and free it 53 | */ 54 | void printfree(gchar *prefix, gchar *s, gchar *suffix) { 55 | char *p; 56 | if (s == NULL) 57 | return; 58 | for (p = strchr(s, '\r'); p != NULL; p = strchr(p + 1, '\r')) 59 | *p = '\n'; 60 | printf("%s%s%s", prefix, s, suffix); 61 | g_free(s); 62 | } 63 | 64 | /* 65 | * print the header for a page 66 | */ 67 | void printheader(gchar *title, PopplerPage *page) { 68 | if (! headers) 69 | return; 70 | fputs(outformat->startheader, stdout); 71 | printf(" %s ON PAGE %d", title, poppler_page_get_index(page) + 1); 72 | fputs(outformat->endheader, stdout); 73 | printf("\n"); 74 | } 75 | 76 | /* 77 | * print the name of an annotation 78 | */ 79 | void printannotationname(PopplerAnnot *annot) { 80 | gint type; 81 | 82 | type = poppler_annot_get_annot_type(annot); 83 | 84 | // one day... 85 | // printfree(g_enum_to_string(PopplerAnnotType, type)); 86 | 87 | switch (type) { 88 | case POPPLER_ANNOT_TEXT: 89 | printf("text:"); 90 | break; 91 | case POPPLER_ANNOT_FREE_TEXT: 92 | printf("free text:"); 93 | break; 94 | case POPPLER_ANNOT_LINE: 95 | printf("line:"); 96 | break; 97 | case POPPLER_ANNOT_SQUARE: 98 | printf("square:"); 99 | break; 100 | case POPPLER_ANNOT_CIRCLE: 101 | printf("circle:"); 102 | break; 103 | case POPPLER_ANNOT_UNDERLINE: 104 | printf("underline:"); 105 | break; 106 | case POPPLER_ANNOT_HIGHLIGHT: 107 | printf("highlight:"); 108 | break; 109 | case POPPLER_ANNOT_SQUIGGLY: 110 | printf("squiggly:"); 111 | break; 112 | case POPPLER_ANNOT_STRIKE_OUT: 113 | printf("strike out:"); 114 | break; 115 | case POPPLER_ANNOT_FILE_ATTACHMENT: 116 | printf("file attachment:"); 117 | break; 118 | case POPPLER_ANNOT_STAMP: 119 | printf("stamp:%s", outformat->newline); 120 | break; 121 | case POPPLER_ANNOT_CARET: 122 | printf("caret:%s", outformat->newline); 123 | break; 124 | case POPPLER_ANNOT_WIDGET: 125 | printf("widget (unsupported)%s", outformat->newline); 126 | break; 127 | default: 128 | printf("annotation (%d):", type); 129 | break; 130 | } 131 | } 132 | 133 | /* 134 | * print a markup annotation 135 | */ 136 | int printannotationmarkup(PopplerAnnotMarkup *markup) { 137 | PopplerRectangle rect; 138 | PopplerAnnotFileAttachment *att; 139 | gint type; 140 | 141 | type = poppler_annot_get_annot_type(POPPLER_ANNOT(markup)); 142 | 143 | printannotationname(POPPLER_ANNOT(markup)); 144 | printfree(" ", poppler_annot_markup_get_label(markup), ""); 145 | printfree(" ", poppler_annot_markup_get_subject(markup), ""); 146 | 147 | if (type == POPPLER_ANNOT_FILE_ATTACHMENT) { 148 | att = POPPLER_ANNOT_FILE_ATTACHMENT(markup); 149 | printfree(" ", poppler_annot_file_attachment_get_name(att), ""); 150 | } 151 | 152 | if (! poppler_annot_markup_has_popup(markup)) { 153 | printf("%s", outformat->newline); 154 | return 0; 155 | } 156 | poppler_annot_markup_get_popup_rectangle(markup, &rect); 157 | printf(" %g,%g-%g,%g", rect.x1, rect.y1, rect.x2, rect.y2); 158 | printf("%s", outformat->newline); 159 | return 0; 160 | } 161 | 162 | /* 163 | * print content in a rectangle 164 | */ 165 | int printcontent(PopplerPage *dpage, PopplerRectangle r, char *indent) { 166 | char *d; 167 | 168 | d = poppler_page_get_selected_text(dpage, POPPLER_SELECTION_LINE, &r); 169 | printf(outformat->startdestination, indent); 170 | printf("%s", d); 171 | printf(outformat->enddestination, indent); 172 | free(d); 173 | return 0; 174 | } 175 | 176 | /* 177 | * print the annotations in a page 178 | */ 179 | int printannotations(PopplerPage *page) { 180 | GList *annots, *s; 181 | int present = FALSE; 182 | PopplerAnnotMapping *m; 183 | int type; 184 | PopplerRectangle r; 185 | 186 | if (! POPPLER_IS_PAGE(page)) 187 | return FALSE; 188 | 189 | annots = poppler_page_get_annot_mapping(page); 190 | 191 | for (s = annots; s != NULL; s = s->next) { 192 | m = (PopplerAnnotMapping *) s->data; 193 | type = poppler_annot_get_annot_type(m->annot); 194 | 195 | if (! present && type != POPPLER_ANNOT_LINK) { 196 | printheader("ANNOTATIONS", page); 197 | present = TRUE; 198 | } 199 | 200 | r = m->area; 201 | 202 | switch (type) { 203 | case POPPLER_ANNOT_LINK: 204 | continue; // links are actions, print there 205 | case POPPLER_ANNOT_TEXT: 206 | case POPPLER_ANNOT_FREE_TEXT: 207 | case POPPLER_ANNOT_LINE: 208 | case POPPLER_ANNOT_SQUARE: 209 | case POPPLER_ANNOT_CIRCLE: 210 | case POPPLER_ANNOT_UNDERLINE: 211 | case POPPLER_ANNOT_HIGHLIGHT: 212 | case POPPLER_ANNOT_SQUIGGLY: 213 | case POPPLER_ANNOT_STRIKE_OUT: 214 | case POPPLER_ANNOT_FILE_ATTACHMENT: 215 | printannotationmarkup(POPPLER_ANNOT_MARKUP(m->annot)); 216 | break; 217 | case POPPLER_ANNOT_STAMP: 218 | case POPPLER_ANNOT_CARET: 219 | case POPPLER_ANNOT_WIDGET: 220 | printannotationname(m->annot); 221 | break; 222 | /* others */ 223 | default: 224 | printf("annotation (%d)\n", type); 225 | } 226 | 227 | printfree("\tname: ", poppler_annot_get_name(m->annot), 228 | outformat->newline); 229 | printfree("\tcontent: ", 230 | poppler_annot_get_contents(m->annot), 231 | outformat->newline); 232 | 233 | printcontent(page, r, " "); 234 | fputs(outformat->separator, stdout); 235 | } 236 | 237 | poppler_page_free_annot_mapping(annots); 238 | return present; 239 | } 240 | 241 | /* 242 | * print the links in a page 243 | */ 244 | #define DESTCONTENT 0x01 245 | int printlinks(PopplerDocument *doc, PopplerPage *page, int flags) { 246 | gdouble width, height; 247 | GList *links, *l; 248 | int present = FALSE; 249 | PopplerLinkMapping *m; 250 | PopplerRectangle r; 251 | PopplerAction *a; 252 | PopplerActionAny *any; 253 | PopplerActionGotoDest *linkdest; 254 | PopplerActionGotoRemote *remote; 255 | PopplerActionUri *uri; 256 | PopplerActionNamed *named; 257 | PopplerDest *dest, *inter; 258 | PopplerPage *dpage; 259 | char *t; 260 | 261 | poppler_page_get_size(page, &width, &height); 262 | links = poppler_page_get_link_mapping(page); 263 | 264 | for (; links != NULL && links->next != NULL; links = links->next) { 265 | } 266 | 267 | for (l = links; l != NULL; l = l->prev) { 268 | if (! present) { 269 | printheader("ACTIONS", page); 270 | present = TRUE; 271 | } 272 | m = (PopplerLinkMapping *) l->data; 273 | a = m->action; 274 | 275 | r.x1 = m->area.x1 - 0; 276 | r.x2 = m->area.x2 + 0; 277 | r.y1 = height - m->area.y2 - 0; 278 | r.y2 = height - m->area.y1 + 0; 279 | // printf("%g,%g - %g,%g\n", r.x1, r.y1, r.x2, r.y2); 280 | t = poppler_page_get_selected_text(page, 281 | POPPLER_SELECTION_LINE, &r); 282 | 283 | fputs(outformat->startpar, stdout); 284 | if (outformat != &htmlformat || a->type != POPPLER_ACTION_URI) 285 | printf("%s%s", t, outformat->newline); 286 | 287 | switch (a->type) { 288 | case POPPLER_ACTION_NONE: 289 | any = (PopplerActionAny *) a; 290 | printf("none: %s", any->title); 291 | break; 292 | case POPPLER_ACTION_GOTO_DEST: 293 | linkdest = (PopplerActionGotoDest *) a; 294 | dest = linkdest->dest; 295 | printf("link "); 296 | while (dest != NULL && 297 | dest->type == POPPLER_DEST_NAMED) { 298 | printf("to %s: ", dest->named_dest); 299 | inter = poppler_document_find_dest(doc, 300 | dest->named_dest); 301 | if (dest != linkdest->dest) 302 | poppler_dest_free(dest); 303 | dest = inter; 304 | } 305 | if (dest == NULL) { 306 | printf("to nowhere"); 307 | break; 308 | } 309 | printf("to page %d, ", dest->page_num); 310 | switch (dest->type) { 311 | case POPPLER_DEST_XYZ: 312 | printf("point %g,%g", dest->left, dest->top); 313 | r.x1 = dest->left - 20; 314 | r.y1 = height - dest->top - 20; 315 | r.x2 = dest->left + 20; 316 | r.y2 = height - dest->top + 20; 317 | break; 318 | case POPPLER_DEST_FIT: 319 | // whole page 320 | // todo: other fit modes 321 | break; 322 | default: 323 | printf("rectangle "); 324 | printf("%g,%g - ", dest->left, dest->top); 325 | printf("%g,%g", dest->right, dest->bottom); 326 | r.x1 = dest->left; 327 | r.y1 = height - dest->top; 328 | r.x2 = dest->right; 329 | r.y2 = height - dest->bottom; 330 | break; 331 | } 332 | if (flags & DESTCONTENT) { 333 | dpage = poppler_document_get_page(doc, 334 | dest->page_num - 1); 335 | if (dpage != NULL) 336 | printcontent(dpage, r, "\n"); 337 | } 338 | if (dest != linkdest->dest) 339 | poppler_dest_free(dest); 340 | break; 341 | case POPPLER_ACTION_GOTO_REMOTE: 342 | remote = (PopplerActionGotoRemote *) a; 343 | printf("link to document %s", remote->file_name); 344 | break; 345 | /* POPPLER_ACTION_LAUNCH does not make sense */ 346 | case POPPLER_ACTION_URI: 347 | uri = (PopplerActionUri *) a; 348 | if (outformat == &textformat) 349 | printf("uri: %s", uri->uri); 350 | else 351 | printf("

%s

", 352 | uri->uri, 353 | uri->title != NULL ? uri->title : 354 | t != NULL && t[0] != '\0' ? t : 355 | uri->uri); 356 | break; 357 | case POPPLER_ACTION_NAMED: 358 | named = (PopplerActionNamed *) a; 359 | printf("predefined action: %s", named->named_dest); 360 | break; 361 | /* newer actions: 362 | case POPPLER_ACTION_MOVIE: 363 | case POPPLER_ACTION_RENDITION: 364 | case POPPLER_ACTION_OCG_STATE: */ 365 | /* do not support POPPLER_ACTION_JAVASCRIPT */ 366 | default: 367 | printf("action (%d)", a->type); 368 | } 369 | 370 | fputs(outformat->endpar, stdout); 371 | g_free(t); 372 | } 373 | 374 | poppler_page_free_link_mapping(links); 375 | return present; 376 | } 377 | 378 | /* 379 | * escape filenames 380 | */ 381 | char *filenameescape(char *filename) { 382 | char *res; 383 | int i, j; 384 | 385 | res = malloc(strlen(filename) * 3 + 1); 386 | for (i = 0, j = 0; filename[i] != '\0'; i++) 387 | if (filename[i] >= 32 && filename[i] != '%') 388 | res[j++] = filename[i]; 389 | else { 390 | sprintf(res + j, "%%%02X", filename[i]); 391 | j += 3; 392 | } 393 | res[j] = '\0'; 394 | 395 | return res; 396 | } 397 | 398 | /* 399 | * from file name to uri 400 | */ 401 | char *filenametouri(char *filename) { 402 | char *dir, *sep, *esc, *uri; 403 | 404 | if (filename[0] == '/') { 405 | dir = strdup(""); 406 | sep = ""; 407 | } 408 | else { 409 | dir = malloc(4096); 410 | if (dir == NULL) { 411 | printf("failed to allocate memory for directory\n"); 412 | return NULL; 413 | } 414 | if (getcwd(dir, 4096) == NULL) { 415 | printf("error in obtaining the current directory\n"); 416 | return NULL; 417 | } 418 | sep = "/"; 419 | } 420 | 421 | esc = filenameescape(filename); 422 | 423 | uri = malloc(strlen("file:") + strlen(dir) + 424 | strlen(sep) + strlen(esc) + 1); 425 | if (uri == NULL) { 426 | printf("failed to allocate memory for file name\n"); 427 | free(esc); 428 | return NULL; 429 | } 430 | strcpy(uri, "file:"); 431 | strcat(uri, dir); 432 | strcat(uri, sep); 433 | strcat(uri, esc); 434 | 435 | free(esc); 436 | free(dir); 437 | return uri; 438 | } 439 | 440 | /* 441 | * main 442 | */ 443 | int main(int argn, char *argv[]) { 444 | int opt, usage; 445 | char *filename, *uri; 446 | int first, last; 447 | gboolean annotations, links; 448 | int flags; 449 | PopplerDocument *doc; 450 | PopplerPage *page; 451 | int npages, n; 452 | int present; 453 | 454 | /* arguments */ 455 | 456 | usage = 0; 457 | headers = TRUE; 458 | outformat = &textformat; 459 | annotations = TRUE; 460 | links = TRUE; 461 | first = 0; 462 | last = -1; 463 | flags = 0; 464 | 465 | while (-1 != (opt = getopt(argn, argv, "wtaldh"))) 466 | switch (opt) { 467 | case 't': 468 | outformat = &textformat; 469 | break; 470 | case 'w': 471 | outformat = &htmlformat; 472 | break; 473 | case 'a': 474 | links = FALSE; 475 | break; 476 | case 'l': 477 | annotations = FALSE; 478 | break; 479 | case 'd': 480 | flags |= DESTCONTENT; 481 | break; 482 | case 'h': 483 | usage = 1; 484 | break; 485 | default: 486 | usage = 2; 487 | break; 488 | } 489 | if (usage == 0 && argn - optind < 1) { 490 | printf("error: filename missing\n"); 491 | usage = 2; 492 | } 493 | if (usage > 0) { 494 | printf("print annotations and actions in a pdf file\n"); 495 | printf("usage:\n\tpdfannot [-t] [-w] [-a] [-l] [-d] [-h] "); 496 | printf("file.pdf [page]\n"); 497 | printf("\t\t-t\toutput is text-only\n"); 498 | printf("\t\t-w\toutput is html\n"); 499 | printf("\t\t-a\tonly output annotations\n"); 500 | printf("\t\t-a\tonly output links\n"); 501 | printf("\t\t-d\tprint text at destination of inner links\n"); 502 | printf("\t\t-h\tthis help\n"); 503 | exit(usage == 1 ? EXIT_SUCCESS : EXIT_FAILURE); 504 | } 505 | filename = argv[optind]; 506 | uri = filenametouri(filename); 507 | if (argn - optind > 1) { 508 | first = atoi(argv[optind + 1]) - 1; 509 | last = atoi(argv[optind + 1]) - 1 + 1; 510 | headers = FALSE; 511 | } 512 | 513 | /* open document */ 514 | 515 | doc = poppler_document_new_from_file(uri, NULL, NULL); 516 | if (doc == NULL) { 517 | printf("cannot open %s\n", filename); 518 | exit(-EXIT_FAILURE); 519 | } 520 | 521 | /* scan pages */ 522 | 523 | npages = poppler_document_get_n_pages(doc); 524 | if (first < 0 || last > npages) { 525 | printf("no such page: %d\n", last - 1); 526 | return EXIT_FAILURE; 527 | } 528 | 529 | present = 0; 530 | for (n = first; n < (last == -1 ? npages : last); n++) { 531 | page = poppler_document_get_page(doc, n); 532 | if (annotations) 533 | present = present | (printannotations(page) << 0); 534 | if (links) 535 | present = present | (printlinks(doc, page, flags) << 1); 536 | g_object_unref(page); 537 | } 538 | 539 | return present; 540 | } 541 | 542 | -------------------------------------------------------------------------------- /pdftoroff.1: -------------------------------------------------------------------------------- 1 | .TH pdftoroff 1 "September 12, 2017" 2 | . 3 | . 4 | . 5 | .SH NAME 6 | pdftoroff - convert pdf to various text formats (roff, html, TeX, text) 7 | . 8 | . 9 | . 10 | .SH SYNOPSIS 11 | .TP 10 12 | \fBpdftoroff\fP 13 | [\fI-r\fP|\fI-w\fP|\fI-p\fP|\fI-f\fP|\fI-t\fP|\fI-s fmt\fP] 14 | [\fI-m method\fP [\fI-d distance\fP] [\fI-o order\fP]] 15 | [\fI-i range\fP] [\fI-b box\fP] [\fI-n\fP] [\fI-v\fP] 16 | \fIfile.pdf\fP 17 | . 18 | . 19 | . 20 | .SH DESCRIPTION 21 | 22 | Extract text from a pdf file undoing page, column and paragraph formatting if 23 | possible but retaining italic and bold faces. The output is in one of the 24 | following formats: groff(1), html, plain TeX, text with font changes, simple 25 | text or a user-given format. 26 | 27 | The groff output can be used to reformat the text to a smaller page size and a 28 | different font to make it more readable on a small tablet or e-ink ebook 29 | reader, as shown in the REFORMAT section. The \fIpdftoebook\fP script does 30 | this. 31 | . 32 | . 33 | . 34 | .SH OPTIONS 35 | .TP 36 | .B 37 | -r 38 | output in groff(1) format; it can be directly compiled by a pipe like 39 | \fIpdftoroff -r file.pdf | groff -Dutf8 -Tutf8 -\fP or prepended by code for 40 | page and character formatting, like in the REFORMAT section, below 41 | 42 | .TP 43 | .B 44 | -w 45 | output in html format; only the body of the html file is generated, not the 46 | header 47 | 48 | .TP 49 | .B 50 | -p 51 | convert to plain TeX; see BUGS below 52 | 53 | .TP 54 | .B 55 | -f 56 | text format; font changes are marked \fI\\[fontname]\fP, and backslashes 57 | escaped to \fI\\\\\fP 58 | 59 | .TP 60 | .B 61 | -t 62 | text only 63 | 64 | .TP 65 | \fB-s\fP \fIfmt\fP 66 | output using the parameters in \fIfmt\fP; 67 | see OUTPUT FORMAT, below 68 | 69 | .TP 70 | \fB-m\fP \fImethod\fP 71 | conversion method: 72 | 73 | .RS 74 | .IP 0 4 75 | detect columns on the fly 76 | .IP 1 77 | use the bounding box of the page 78 | .IP 2 79 | use the blocks of text on the page 80 | .IP 3 81 | use the blocks of text on the page, sorted 82 | .IP 4 83 | use rows of text 84 | .RE 85 | 86 | .IP 87 | the default method is 1, which is fast and usually gives good results on 88 | single-column documents; methods 2 is slower, but often produces better results 89 | on multiple-column documents; method 3 is even slower, but the sorting of the 90 | blocks may be necessary when the characters in the document are not in the 91 | correct order; method 4 is for tables; see \fICONVERSION METHODS\fP, below 92 | 93 | .TP 94 | \fB-d\fP \fIdistance\fP 95 | minimal distance between blocks of text in the page; 96 | for conversion method 4 the default is 0, for all others is 15; a smaller value 97 | like 10 may be appropriate when the document uses small fonts or has little 98 | space between columns or between the header/footer and the text; this value 99 | only affects methods 2, 3 and 4 100 | 101 | .TP 102 | \fB-o\fP \fIorder\fP 103 | the method used for sorting the blocks of text in the page: 104 | 105 | .RS 106 | .IP 0 4 107 | by their position, quick and approximate 108 | .IP 1 109 | by their position, exact 110 | .IP 2 111 | by the occurrence of their characters in the file 112 | .RE 113 | 114 | .TP 115 | \fB-i\fP \fIrange\fP 116 | pages to convert, in the format \fIfirst:last\fP; 117 | negative or zero is from the last page backwards; 118 | for example, \fI-2:0\fP is the range for converting the last three pages 119 | 120 | .TP 121 | \fB-b\fP \fI[x1,y1-x2,y2]\fP 122 | convert only the characters that are positioned 123 | within the coordinates \fIx1,y1\fP and \fIx2,y2\fP 124 | 125 | .TP 126 | .B -n 127 | do not convert the recurring elements in the page, such as page numbers, 128 | headers and footers; locating these elements takes time, making the conversion 129 | not to start immediately; it may fail, resulting in loss of text or these 130 | elements ending up in the output; see \fIpdfrecur(1)\fP for details 131 | 132 | .TP 133 | .B -v 134 | print markers to facilitate checking that the output is correct; see 135 | \fIMARKERS\fP, below 136 | 137 | .SH REFORMAT 138 | 139 | The following script re-formats a pdf file for a 200x250 page with 5pt margins 140 | and Helvetica font, so that it reads better to a small tablet or e-ink reader. 141 | It extracts the text from the pdf file, prepends it with some groff(7) page and 142 | font code and then compiles back to pdf. This is the core of the 143 | \fIpdftoebook\fP script. 144 | 145 | .nf 146 | .ft I 147 | { 148 | cat < new.pdf 158 | .ft P 159 | .fi 160 | 161 | . 162 | . 163 | . 164 | .SH OUTPUT FORMAT 165 | 166 | The text from the pdf file is scanned for font changes and paragraph breaks. 167 | Short lines, indents and vertical spaces are taken as the start of a new 168 | paragraph, otherwise the new line is considered the continuation of the 169 | previous. Font names are matched agains "Italic" and "Bold", which indicate the 170 | begin of an italic or bold face, and their lack as the end of the font face. 171 | 172 | The various output formats are obtained by adding the appropriate strings at 173 | paragraph breaks and font changes, and by substituting some characters (for 174 | example, a plain \fI<\fP is replaced by \fI<\fP for the html format). 175 | 176 | The \fI-s fmt\fP option allows arbitrary output strings. For example, the html 177 | format can be alternatively generated by the command: 178 | 179 | .nf 180 | \fI 181 | pdftoroff -s ' 182 |

,

183 | ,,,,,,,,,,true,\\,.,<,>,&' file.pdf 184 | \fP 185 | .fi 186 | 187 | The format string is a comma-separated list of the following fields. Some may 188 | be empty and some may contain newlines. 189 | 190 | .TP 191 | .I 192 | parstart 193 | the string printed when a paragraph begins 194 | .TP 195 | .I 196 | parend 197 | the string printed when a paragraph ends 198 | .TP 199 | .I 200 | fontname 201 | the \fIprintf(3)\fP format for printing the font name; 202 | for example, the \fI-f\fP option uses \fI\\\\[%s]\fP, so that when the text 203 | begins using the font TimesNewRomanCM this is marked 204 | \fI\\[TimesNewRomanCM]\fP in the output 205 | .TP 206 | .I 207 | plain 208 | printed when the font changes to non-italic and non-bold 209 | .br 210 | (example: \fI\\fR\fP in roff) 211 | .TP 212 | .I 213 | italic 214 | printed when the font changes to italic but not bold 215 | .br 216 | (example: \fI\\fI\fP in roff) 217 | .TP 218 | .I 219 | bold 220 | printed when the font changes to bold but not italic 221 | .br 222 | (example: \fI\\fB\fP in roff) 223 | .TP 224 | .I 225 | bolditalic 226 | printed when the font changes to both italic and bold 227 | .br 228 | (example: \fI\\f[BI]\fP in roff) 229 | .TP 230 | .I 231 | italicbegin 232 | printed when the text begins using an italic font 233 | .br 234 | (example: \fI\fP in html) 235 | .TP 236 | .I 237 | italicend 238 | printed when the text ends using an italic font 239 | .br 240 | (example: \fI\fP in html) 241 | .TP 242 | .I 243 | boldbegin 244 | printed when the text begins using a bold font 245 | .br 246 | (example: \fI\fP in html) 247 | .TP 248 | .I 249 | boldend 250 | printed when the text ends using a bold font 251 | .br 252 | (example: \fI\fP in html) 253 | .TP 254 | .I 255 | reset 256 | if this is \fItrue\fP, 257 | turn off all active font faces when a paragraph ends and restore them when the 258 | new one starts; for example, if the pdf starts using a bold font and then ends 259 | it after two paragraphs, the html output is \fI

first paragraph

260 |

second

\fP 261 | .TP 262 | .I 263 | backslash 264 | replace every backslash with this string 265 | .TP 266 | .I 267 | firstdot 268 | replace a dot at the start of a line with this string 269 | (this is only useful for roff output) 270 | .TP 271 | .I 272 | less 273 | replace the minus sign (\fI<\fP) with this 274 | .TP 275 | .I 276 | greater 277 | replace the greater sign (\fI>\fP) with this 278 | .TP 279 | .I 280 | and 281 | replace the ampersand (\fI&\fP) with this 282 | . 283 | . 284 | . 285 | .SH CONVERSION METHODS 286 | 287 | All conversion methods scan the characters in the page in the same order as in 288 | the pdf file. A new line is detected on: 289 | 290 | .IP \(bu 4 291 | a large vertical space from the previous character 292 | .IP \(bu 293 | a small vertical space from the previous character, if the previous character 294 | is not at the right of the column (short previous line) 295 | .IP \(bu 296 | a small vertical space from the previous character, if the current character is 297 | not at the left of the column (indented line) 298 | .RE 299 | 300 | The second and third conditions depend on the left and right border of the 301 | current column. The conversion methods differ on how these are found: 302 | 303 | .IP 0 4 304 | The left border is the left corner of the leftmost character in the page. 305 | Column changes are detected by large decreases in the y coordinate, and 306 | cause a recalculation of the left border from the remaining charaters in the 307 | page. The right border is a fixed position in the page. 308 | 309 | .IP 1 310 | The left and right border are given by the bounding box of the page. This works 311 | on single-column pages. This is the default method. 312 | 313 | .IP 2 314 | The blocks of text in the page are determined before scanning the page. The 315 | left and right borders for each character are those of the blocks of text it is 316 | in. 317 | 318 | .IP 3 319 | This is the same as 2, but blocks are sorted before scanning the page. It is 320 | slower than method 2 not because of the sorting but because the whole page 321 | needs to be scanned in search of characters in the first block, again for the 322 | second, the third, etc. This may be necessary if the characters in the file are 323 | not in the order they shold be printed. 324 | 325 | Three sorting algorithms can be used: the first two try to guess the order of 326 | the blocks based on their position on the page; the third does it based on the 327 | occurrence of their characters in the page. In particular, the algorithms based 328 | on the position of the box sort boxes vertically if they overlap horizontally, 329 | otherwise they order them horizontally. This usually gives reasonable results 330 | on single-column and multiple-column documents. The difference between the two 331 | is that the first is quick and approximate, the second is slower and exact. The 332 | third method scans the characters as they occurr in the file; the block 333 | containing the first is the first block; the block containing the first 334 | character not in the first block is the second, and so on. 335 | 336 | .IP 4 337 | This method assumes that the document is a single table: a sequence or rows, 338 | each made of a number of cells. The rows are first located in vertical order, 339 | then each is converted to a line of text. 340 | 341 | This method allows converting tables even if their cells are ordered by columns 342 | instead of rows, which is often the case. 343 | 344 | The usual rules for line breaking and joining are ignored, and every row is 345 | output as a single line. The minimal text distance (option \fI-f\fP) is used as 346 | the minimal distance between rows; if they are very close to each other, a 347 | negative value may be used to separate them. 348 | 349 | . 350 | . 351 | .SH MARKERS 352 | 353 | Unformatting text requires introducing line breaks in some places and not in 354 | others and removing the hyphens used to break a word between lines. 355 | This cannot in general be done uniquely. Option \fI-v\fP is for printing 356 | markers that show what have been done and why. 357 | .TP 358 | .I [] 359 | a newline was translated into a space because it was considered to 360 | separate two lines of the same paragraph 361 | .TP 362 | .I [-] 363 | an hyphen and the following newline were removed because they looked like a 364 | word broken between two lines 365 | .TP 366 | .I [S] 367 | the following line break is because the current line is short, like the only 368 | or final line of a paragraph 369 | .TP 370 | .I [E] 371 | same, but the line is also at the end of a block of text 372 | .TP 373 | .I [V] 374 | the following line break is due to vertical space between lines 375 | .TP 376 | .I [I] 377 | the following line break is because the next line is indented 378 | 379 | .P 380 | 381 | These markers are intended for debugging and checking the final result. For 382 | example, a text may look converted correctly, but two dash-separated words have 383 | been merged because the dash fell at the end of the line, and therefore looked 384 | like the hyphen of a single hyphenated word broken between two lines. Marker 385 | .I [-] 386 | helps helps for checking this kind of errors. Spelling the two parts that have 387 | been merged and their result may suggest whether merging was correct, but some 388 | cases cannot be automatically solved this way. For example, if the dash in the 389 | sentence "Price is not under 3, is much more -- over 10, I think." is placed at 390 | the end of a line, it looks like the word "moreover" when hyphenated to split 391 | it between two lines. 392 | 393 | . 394 | . 395 | . 396 | .SH BUGS 397 | 398 | Replacements are limited to some fixed characters (\\, ., <, > and &). Instead, 399 | the \fI-s\fP option should support replacing arbitrary characters (say, 400 | \fI@\fP). 401 | 402 | The plain TeX conversion is primitive: it does not convert accented characters 403 | as it should; it does not support fonts that are both bold and italic; it does 404 | not finish with \fI\\end\fP (but the latter is coherent with generating only 405 | the body of the text in the other formats). 406 | 407 | A command line option should allow specifying a number of boxes so that text is 408 | extracted from them in order rather than from the whole page. This is because 409 | the method used by pdftoroff to detect the start of a new column does not 410 | always work, and even if it does, characters in the file are not necessarily in 411 | the correct order. Such an option would also allow to discard headers and 412 | footer. As an example, \fI-b box1,box2,box3;box4;box5;2*\fP would extract text 413 | from \fIbox1,box2,box3\fP from the first page, from \fIbox4\fP from the second, 414 | from \fIbox5\fP from the third, and the repeat with \fIbox4\fP and \fIbox5\fP 415 | until the end of the document. 416 | 417 | The html output is not always correct. If the document starts with an italic 418 | font, then switches to italic and bold and then to bold only, the resulting 419 | code is \fI..........\fP, which is not nested correctly. The 420 | right code would be \fI..........\fP. Two solutions are 421 | possible: 422 | 423 | .IP " * " 4 424 | turn off all faces before starting a new one 425 | .IP " * " 426 | remember which of italic and bold was started first 427 | 428 | .P 429 | The numeric parameters for detecting the start of a new paragraph or column are 430 | fixed (the \fIstruct measure\fP in the code). They should be changeable by 431 | command line options. 432 | 433 | .SH SEE ALSO 434 | pdftotext(1), pdftohtml(1), poppler (https://poppler.freedesktop.org/) 435 | 436 | -------------------------------------------------------------------------------- /pdftext.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pdftext.c 3 | * 4 | * convert pdf to text or rich text (roff, html, tex) 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include "pdfrects.h" 12 | #include "pdftext.h" 13 | 14 | /* 15 | * known output formats 16 | */ 17 | struct format format_roff = { 18 | ".ti 1\n", "\n", 19 | "", 20 | "\\fR", "\\fI", "\\fB", "\\f[BI]", 21 | "", "", "", "", 22 | FALSE, 23 | "\\", "\\[char46]", "<", ">", "&" 24 | }; 25 | struct format format_html = { 26 | "\n

", "

\n", 27 | "", 28 | "", "", "", "", 29 | "", "", "", "", 30 | TRUE, 31 | "\\", ".", "<", ">", "&" 32 | }; 33 | struct format format_tex = { 34 | "", "\n\n", 35 | "", 36 | "\\rm ", "\\it ", "\\bf ", "\\bf ", /* FIXME: bold+italic */ 37 | "", "", "", "", 38 | FALSE, 39 | "\\backslash ", ".", "<", ">", "\\& " 40 | }; 41 | struct format format_textfont = { 42 | "", "\n", 43 | "\\[%s]", 44 | "", "", "", "", 45 | "", "", "", "", 46 | FALSE, 47 | "\\\\", ".", "<", ">", "&" 48 | }; 49 | struct format format_text = { 50 | "", "\n", 51 | "", 52 | "", "", "", "", 53 | "", "", "", "", 54 | FALSE, 55 | "\\", ".", "<", ">", "&" 56 | }; 57 | 58 | /* 59 | * print reason for a paragraph break 60 | */ 61 | gboolean debugpar = FALSE; 62 | void dnewpar(FILE *fd, char *why) { 63 | if (debugpar) 64 | fputs(why, fd); 65 | } 66 | void delement(FILE *fd, char *what, int num) { 67 | if (debugpar) 68 | fprintf(fd, what, num); 69 | } 70 | 71 | /* 72 | * start or end a font face 73 | * start TRUE to start the new face, FALSE to end the previous 74 | * reset TRUE to temporarily end or restore all active faces 75 | */ 76 | void face(FILE *fd, gboolean start, gboolean reset, 77 | gboolean *italic, gboolean *bold, 78 | PopplerTextAttributes *attr, struct format *format) { 79 | gboolean newitalic, newbold; 80 | 81 | if (reset && ! format->reset) 82 | return; 83 | 84 | newitalic = NULL != strstr(attr->font_name, "Italic"); 85 | newbold = NULL != strstr(attr->font_name, "Bold"); 86 | 87 | /* font name */ 88 | 89 | if (start && ! reset && *format->fontname != '\0') 90 | fprintf(fd, format->fontname, attr->font_name); 91 | 92 | /* font start, no end except for resets */ 93 | 94 | if (start) { 95 | if (! newitalic && ! newbold) 96 | fputs(format->plain, fd); 97 | else if (newitalic && ! newbold) 98 | fputs(format->italic, fd); 99 | else if (! newitalic && newbold) 100 | fputs(format->bold, fd); 101 | if (newitalic && newbold) 102 | fputs(format->bolditalic, fd); 103 | } 104 | if (! start && reset) 105 | fputs(format->plain, fd); 106 | 107 | /* font start-end */ 108 | 109 | if (! start) { 110 | if (*bold && newbold == reset) 111 | fputs(format->boldend, fd); 112 | if (*italic && newitalic == reset) 113 | fputs(format->italicend, fd); 114 | } 115 | else { 116 | if (*italic == reset && newitalic) 117 | fputs(format->italicbegin, fd); 118 | if (*bold == reset && newbold) 119 | fputs(format->boldbegin, fd); 120 | } 121 | 122 | /* update current font */ 123 | 124 | if (start && ! reset) { 125 | *italic = newitalic; 126 | *bold = newbold; 127 | } 128 | } 129 | 130 | /* 131 | * show a single character 132 | * rest character not written (hyphen at end of line) or NONE 133 | */ 134 | void showcharacter(FILE *fd, char *cur, char *next, char *rest, 135 | gboolean newpar, char hyphen, struct format *format) { 136 | char *ligatures[] = { 137 | "ff", "\xef\xac\x80", 138 | "fi", "\xef\xac\x81", 139 | "fl", "\xef\xac\x82", 140 | "ffi", "\xef\xac\x83", 141 | "ffl", "\xef\xac\x84", 142 | "st", "\xef\xac\x85", 143 | "st", "\xef\xac\x86", 144 | NULL, NULL 145 | }; 146 | int l; 147 | 148 | *rest = NONE; 149 | if (*cur == '\\') 150 | fputs(format->backslash, fd); 151 | else if (newpar && *cur == '.') 152 | fputs(format->firstdot, fd); 153 | else if (*cur == '<') 154 | fputs(format->less, fd); 155 | else if (*cur == '>') 156 | fputs(format->greater, fd); 157 | else if (*cur == '&') 158 | fputs(format->and, fd); 159 | else if (*cur == hyphen && (*next == '\0' || *next == '\n')) 160 | *rest = '-'; 161 | else { 162 | for (l = 0; ligatures[l] != NULL; l += 2) { 163 | if ((int) strlen(ligatures[l + 1]) != next - cur) 164 | continue; 165 | if (! memcmp(ligatures[l + 1], cur, next - cur)) { 166 | fputs(ligatures[l], fd); 167 | return; 168 | } 169 | } 170 | fwrite(cur, 1, next - cur, fd); 171 | } 172 | } 173 | 174 | /* 175 | * check if a line is short 176 | */ 177 | gboolean isshortline(PopplerRectangle crect, gdouble left, gdouble right, 178 | struct measure *measure) { 179 | return crect.x2 - left < (right - left) * measure->rightreturn / 100; 180 | } 181 | 182 | /* 183 | * check start of new column 184 | */ 185 | gboolean newcolumn(gdouble y, PopplerRectangle crect, 186 | gdouble left, PopplerRectangle *tr, 187 | struct measure *measure) { 188 | return 189 | crect.x1 - left > (tr->x2 - tr->x1) * measure->newcolumnx / 100 190 | && 191 | y - crect.y1 > (tr->y2 - tr->y1) * measure->newcolumny / 100; 192 | } 193 | 194 | /* 195 | * box-not-found error 196 | */ 197 | void boxnotfound(char *cur, PopplerRectangle *crect, RectangleList *textarea) { 198 | fprintf(stderr, "error: cannot find text rectangle\n"); 199 | fprintf(stderr, "character: %c (%d)\n", *cur, *cur); 200 | fprintf(stderr, "rectangle:\n"); 201 | rectangle_print(stderr, crect); 202 | fprintf(stderr, "\n"); 203 | fprintf(stderr, "text area:\n"); 204 | rectanglelist_print(stderr, textarea); 205 | exit(EXIT_FAILURE); 206 | } 207 | 208 | /* 209 | * data for processing the characters 210 | */ 211 | struct scandata { 212 | gboolean newpar; // last paragraph in previous box/page is over 213 | char prev; // unprinted char, possibly NONE or START 214 | gboolean italic; // current font is italic 215 | gboolean bold; // current font is bold 216 | gboolean newface; // font changed 217 | }; 218 | 219 | /* 220 | * start processing a page (no end needed) 221 | */ 222 | void startpage(struct scandata *scanpage) { 223 | scanpage->italic = FALSE; 224 | scanpage->bold = FALSE; 225 | scanpage->newface = TRUE; 226 | } 227 | 228 | /* 229 | * show the characters of a page contained in a box 230 | * zone only characters in this box are shown 231 | * NULL = all characters in page 232 | * textarea the blocks of text in the page 233 | * may also be the whole page or its bounding box 234 | * text, attrlist, rects, nrects 235 | * characters and their fonts and positions 236 | * detectcolumns whether to detect the start of a new column during the 237 | * scan, by comparing the position of the current 238 | * character with that of the current column and previous 239 | * character; only used when textarea=page 240 | */ 241 | void showregion(FILE *fd, PopplerRectangle *zone, RectangleList *textarea, 242 | char *text, GList *attrlist, 243 | PopplerRectangle *rects, guint nrects, 244 | struct measure *measure, struct format *format, 245 | struct scandata *scandata, gboolean detectcolumn) { 246 | char *cur, *next; 247 | int count; 248 | gdouble left, y; 249 | 250 | GList *attrelem; 251 | int ti = -1; 252 | PopplerRectangle *tr; 253 | PopplerTextAttributes *attr; 254 | gboolean startcolumn, shortline, newline; 255 | 256 | PopplerRectangle crect; 257 | guint r; 258 | 259 | /* cycle over (utf-8) characters in page */ 260 | 261 | shortline = FALSE; 262 | startcolumn = TRUE; 263 | 264 | attrelem = attrlist; 265 | attr = (PopplerTextAttributes *) (attrelem->data); 266 | 267 | for (cur = text, count = 0; *cur; cur = next, count++) { 268 | crect = rects[count]; 269 | next = g_utf8_next_char(cur); 270 | if (zone != NULL && ! rectangle_contain(zone, &crect)) 271 | continue; 272 | 273 | /* text rectangle this char is in */ 274 | 275 | if (ti != -1 && rectangle_contain(tr, &crect)) 276 | newline = FALSE; 277 | else { 278 | ti = rectanglelist_contain(textarea, &crect); 279 | delement(fd, "[BLOCK %d]", ti); 280 | if (ti != -1) 281 | tr = &textarea->rect[ti]; 282 | else if (*cur == ' ') { 283 | dnewpar(fd, "_SPACE_"); 284 | tr = &crect; 285 | } 286 | else 287 | boxnotfound(cur, &crect, textarea); 288 | left = tr->x1; 289 | y = tr->y1 - measure->newline - 1; 290 | newline = TRUE; 291 | } 292 | 293 | /* explicit end of line */ 294 | 295 | if (*cur == '\n' || newline) { 296 | if (shortline) { 297 | dnewpar(fd, "[S]"); 298 | scandata->newpar = TRUE; 299 | } 300 | else { 301 | if (scandata->prev == '-') 302 | dnewpar(fd, "[-]"); 303 | else 304 | dnewpar(fd, "[]"); 305 | scandata->prev = 306 | scandata->prev == '-' || 307 | scandata->prev == START ? 308 | NONE : ' '; 309 | } 310 | } 311 | 312 | /* real character */ 313 | 314 | if (*cur != '\n') { 315 | 316 | /* new column */ 317 | 318 | if (detectcolumn && 319 | newcolumn(y, crect, left, tr, measure)) 320 | startcolumn = TRUE; 321 | 322 | if (detectcolumn && startcolumn) { 323 | dnewpar(fd, "[COLUMN]"); 324 | left = 10000; 325 | y = 10000; 326 | for (r = MAX(measure->headfooter, count); 327 | r + measure->headfooter < nrects; 328 | r++) { 329 | left = MIN(left, rects[r].x1); 330 | y = MIN(y, rects[r].y1); 331 | } 332 | if (left == 10000) 333 | y = 0; /* few chars, force newpar */ 334 | y -= measure->newline + 1; 335 | startcolumn = FALSE; 336 | } 337 | 338 | /* y increase */ 339 | 340 | if (crect.y1 - y > measure->newline) { 341 | if (crect.y1 - y > measure->newpar) { 342 | dnewpar(fd, "[V]"); 343 | fputs(format->parend, fd); 344 | fputs(format->parstart, fd); 345 | scandata->newpar = TRUE; 346 | } 347 | y = crect.y1; 348 | if (crect.x1 - left > measure->indent) { 349 | dnewpar(fd, "[I]"); 350 | scandata->newpar = TRUE; 351 | } 352 | } 353 | 354 | /* new paragraph */ 355 | 356 | if (scandata->newpar) { 357 | face(fd, FALSE, TRUE, 358 | &scandata->italic, &scandata->bold, 359 | attr, format); 360 | if (scandata->prev != START) 361 | fputs(format->parend, fd); 362 | fputs(format->parstart, fd); 363 | face(fd, TRUE, TRUE, 364 | &scandata->italic, &scandata->bold, 365 | attr, format); 366 | } 367 | else if (scandata->prev > START) 368 | fprintf(fd, "%c", scandata->prev); 369 | 370 | /* start a new font face */ 371 | 372 | if (scandata->newface && *cur != ' ') { 373 | face(fd, TRUE, FALSE, 374 | &scandata->italic, &scandata->bold, 375 | attr, format); 376 | scandata->newface = FALSE; 377 | } 378 | 379 | /* print character */ 380 | 381 | showcharacter(fd, cur, next, 382 | &scandata->prev, scandata->newpar, 383 | measure->hyphen, format); 384 | 385 | /* update status variables */ 386 | 387 | shortline = isshortline(crect, left, tr->x2, measure); 388 | scandata->newpar = FALSE; 389 | } 390 | 391 | /* end of text with current font; read next */ 392 | 393 | if (count == attr->end_index - 394 | (g_unichar_isspace(*next) ? 1 : 0)) { 395 | attrelem = g_list_next(attrelem); 396 | if (! attrelem) { 397 | face(fd, FALSE, TRUE, 398 | &scandata->italic, &scandata->bold, 399 | attr, format); 400 | break; 401 | } 402 | attr = (PopplerTextAttributes *) (attrelem->data); 403 | face(fd, FALSE, FALSE, 404 | &scandata->italic, &scandata->bold, 405 | attr, format); 406 | scandata->newface = TRUE; 407 | } 408 | } 409 | 410 | /* shortline at end */ 411 | 412 | if (shortline) { 413 | dnewpar(fd, "[E]"); 414 | scandata->newpar = TRUE; 415 | } 416 | } 417 | 418 | /* 419 | * show the characters in a page 420 | */ 421 | void showpage(FILE *fd, PopplerPage *page, PopplerRectangle *zone, 422 | int method, int order, 423 | struct measure *measure, struct format *format, 424 | struct scandata *scandata) { 425 | char *text; 426 | GList *attrlist; 427 | PopplerRectangle *rects, *tr, *region; 428 | guint nrects; 429 | RectangleList *textarea; 430 | gint r; 431 | void (*sort[])(RectangleList *, PopplerPage *) = { 432 | rectanglelist_quicksort, 433 | rectanglelist_twosort, 434 | rectanglelist_charsort 435 | }; 436 | 437 | /* initalize output font */ 438 | 439 | startpage(scandata); 440 | 441 | /* get page content */ 442 | 443 | text = poppler_page_get_text(page); 444 | attrlist = poppler_page_get_text_attributes(page); 445 | if (! text || ! attrlist) 446 | return; /* no text in page */ 447 | if (! poppler_page_get_text_layout(page, &rects, &nrects)) 448 | return; /* no text in page */ 449 | 450 | /* analyze text */ 451 | 452 | switch (method) { 453 | case 0: 454 | tr = poppler_rectangle_new(); 455 | poppler_page_get_crop_box(page, tr); 456 | textarea = rectanglelist_new(1); 457 | rectanglelist_add(textarea, tr); 458 | showregion(fd, zone, textarea, text, attrlist, rects, nrects, 459 | measure, format, scandata, TRUE); 460 | poppler_rectangle_free(tr); 461 | break; 462 | case 1: 463 | tr = rectanglelist_boundingbox(page); 464 | textarea = rectanglelist_new(1); 465 | rectanglelist_add(textarea, tr); 466 | showregion(fd, zone, textarea, text, attrlist, rects, nrects, 467 | measure, format, scandata, FALSE); 468 | poppler_rectangle_free(tr); 469 | break; 470 | case 2: 471 | textarea = rectanglelist_textarea_distance(page, 472 | measure->blockdistance); 473 | showregion(fd, zone, textarea, text, attrlist, rects, nrects, 474 | measure, format, scandata, FALSE); 475 | break; 476 | case 3: 477 | textarea = rectanglelist_textarea_distance(page, 478 | measure->blockdistance); 479 | sort[order](textarea, page); 480 | region = poppler_rectangle_new(); 481 | for (r = 0; r < textarea->num; r++) { 482 | delement(fd, "[=== BLOCK %d]", r); 483 | if (zone == NULL) { 484 | showregion(fd, &textarea->rect[r], textarea, 485 | text, attrlist, rects, nrects, 486 | measure, format, scandata, FALSE); 487 | continue; 488 | } 489 | if (! rectangle_overlap(zone, &textarea->rect[r])) 490 | continue; 491 | rectangle_intersect(region, zone, &textarea->rect[r]); 492 | showregion(fd, region, textarea, 493 | text, attrlist, rects, nrects, 494 | measure, format, scandata, FALSE); 495 | } 496 | poppler_rectangle_free(region); 497 | break; 498 | case 4: 499 | measure->rightreturn = -1; 500 | measure->indent = 100000; 501 | measure->hyphen = '\0'; 502 | textarea = rectanglelist_rows(page, measure->blockdistance); 503 | region = poppler_rectangle_new(); 504 | for (r = 0; r < textarea->num; r++) { 505 | delement(fd, "[=== BLOCK %d]", r); 506 | if (zone == NULL) { 507 | showregion(fd, &textarea->rect[r], textarea, 508 | text, attrlist, rects, nrects, 509 | measure, format, scandata, FALSE); 510 | fprintf(fd, "\n"); 511 | continue; 512 | } 513 | if (! rectangle_overlap(zone, &textarea->rect[r])) 514 | continue; 515 | rectangle_intersect(region, zone, &textarea->rect[r]); 516 | showregion(fd, region, textarea, 517 | text, attrlist, rects, nrects, 518 | measure, format, scandata, FALSE); 519 | fprintf(fd, "\n"); 520 | } 521 | poppler_rectangle_free(region); 522 | break; 523 | default: 524 | fprintf(stderr, "no such conversion method: %d\n", method); 525 | exit(EXIT_FAILURE); 526 | } 527 | 528 | poppler_page_free_text_attributes(attrlist); 529 | g_free(rects); 530 | free(text); 531 | } 532 | 533 | /* 534 | * start processing a document 535 | */ 536 | void startdocument(FILE *fd, 537 | int method, struct measure *measure, struct format *format, 538 | struct scandata *scandata) { 539 | (void)fd; 540 | (void)method; 541 | (void)measure; 542 | (void)format; 543 | scandata->newpar = FALSE; 544 | scandata->prev = START; 545 | } 546 | 547 | /* 548 | * end a document 549 | */ 550 | void enddocument(FILE *fd, 551 | int method, struct measure *measure, struct format *format, 552 | struct scandata *scandata) { 553 | (void)method; 554 | (void)measure; 555 | if (scandata->prev != START) 556 | fputs(format->parend, fd); 557 | } 558 | 559 | /* 560 | * show some pages of a pdf document 561 | */ 562 | void showdocumentpart(FILE *fd, PopplerDocument *doc, int first, int last, 563 | PopplerRectangle *zone, 564 | int method, int order, 565 | struct measure *measure, struct format *format) { 566 | struct scandata scandata; 567 | int npage; 568 | PopplerPage *page; 569 | gdouble h; 570 | 571 | if (first < 0) 572 | first = poppler_document_get_n_pages(doc) + first; 573 | if (last < 0) 574 | last = poppler_document_get_n_pages(doc) + last; 575 | 576 | if (first < 0) 577 | first = 0; 578 | if (last >= poppler_document_get_n_pages(doc)) 579 | last = poppler_document_get_n_pages(doc) - 1; 580 | 581 | if (zone != NULL && zone->x1 == -100 && zone->x2 == -100) { 582 | h = zone->y2; 583 | poppler_rectangle_free(zone); 584 | debugfrequent = 0; 585 | zone = rectanglevector_main(doc, NULL, 586 | h, measure->blockdistance); 587 | } 588 | 589 | startdocument(fd, method, measure, format, &scandata); 590 | for (npage = first; npage <= last; npage++) { 591 | page = poppler_document_get_page(doc, npage); 592 | delement(fd, "[PAGE %d]", npage); 593 | showpage(fd, page, zone, 594 | method, order, measure, format, &scandata); 595 | g_object_unref(page); 596 | } 597 | enddocument(fd, method, measure, format, &scandata); 598 | } 599 | 600 | /* 601 | * show a pdf document 602 | */ 603 | void showdocument(FILE *fd, PopplerDocument *doc, PopplerRectangle *zone, 604 | int method, int order, 605 | struct measure *measure, struct format *format) { 606 | showdocumentpart(fd, doc, 0, -1, zone, method, order, measure, format); 607 | } 608 | 609 | /* 610 | * show a pdf file 611 | */ 612 | void showfile(FILE *fd, char *filename, int first, int last, 613 | PopplerRectangle *zone, 614 | int method, int order, 615 | struct measure *measure, struct format *format) { 616 | char *uri; 617 | PopplerDocument *doc; 618 | 619 | uri = filenametouri(filename); 620 | 621 | doc = poppler_document_new_from_file(uri, NULL, NULL); 622 | free(uri); 623 | if (doc == NULL) { 624 | printf("error opening file %s\n", filename); 625 | exit(EXIT_FAILURE); 626 | } 627 | 628 | showdocumentpart(fd, doc, first, last, zone, 629 | method, order, measure, format); 630 | } 631 | 632 | /* 633 | * parse a string into a struct format 634 | */ 635 | struct format *parseformat(char *s) { 636 | struct format *f; 637 | char *c, *t; 638 | 639 | c = strdup(s); 640 | f = malloc(sizeof(struct format)); 641 | memset(f, 0, sizeof(struct format)); 642 | 643 | if (! (f->parstart = strsep(&c, ","))) 644 | goto parse_error; 645 | if (! (f->parend = strsep(&c, ","))) 646 | goto parse_error; 647 | if (! (f->fontname = strsep(&c, ","))) 648 | goto parse_error; 649 | if (! (f->plain = strsep(&c, ","))) 650 | goto parse_error; 651 | if (! (f->italic = strsep(&c, ","))) 652 | goto parse_error; 653 | if (! (f->bold = strsep(&c, ","))) 654 | goto parse_error; 655 | if (! (f->bolditalic = strsep(&c, ","))) 656 | goto parse_error; 657 | if (! (f->italicbegin = strsep(&c, ","))) 658 | goto parse_error; 659 | if (! (f->italicend = strsep(&c, ","))) 660 | goto parse_error; 661 | if (! (f->boldbegin = strsep(&c, ","))) 662 | goto parse_error; 663 | if (! (f->boldend = strsep(&c, ","))) 664 | goto parse_error; 665 | if (! (t = strsep(&c, ","))) 666 | goto parse_error; 667 | f->reset = ! strcmp(t, "true"); 668 | if (! (f->backslash = strsep(&c, ","))) 669 | goto parse_error; 670 | if (! (f->firstdot = strsep(&c, ","))) 671 | goto parse_error; 672 | if (! (f->less = strsep(&c, ","))) 673 | goto parse_error; 674 | if (! (f->greater = strsep(&c, ","))) 675 | goto parse_error; 676 | if (! (f->and = strsep(&c, ","))) 677 | goto parse_error; 678 | 679 | return f; 680 | 681 | parse_error: 682 | free(f); 683 | return NULL; 684 | } 685 | 686 | -------------------------------------------------------------------------------- /cairodrm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * cairodrm.c 3 | * 4 | * a cairo context for drawing on the linux direct rendering infrastructure 5 | */ 6 | 7 | /* 8 | * Internals 9 | * --------- 10 | * 11 | * The framebuffers are the memory areas where the programs draw. The 12 | * connectors are the actual video outputs of the device (VGA, HDMI, AV, LVDS, 13 | * etc.). Each connector supports certain resolutions (modes), like 1280x1024, 14 | * 1024x768, 800x600, etc. In order to produce video, a framebuffer needs to be 15 | * created and linked to the connectors, each set on its mode (resolution). 16 | * 17 | * The case of a single connector is easy: choose a mode of the connector, 18 | * create a framebuffer as large as that mode and link it to the connector. 19 | * 20 | * The complication with multiple connectors is that they may support different 21 | * modes (resolutions). These may even differ in aspect, like in the following 22 | * (exaggerated) example. 23 | * 24 | * 25 | * +----------------+ <---- mode of connector 1 26 | * | | 27 | * +----|----------------|----+ <--- mode of connector 2 28 | * | | | | 29 | * | | | | 30 | * | | | | 31 | * +----|----------------|----+ 32 | * | | 33 | * +----------------+ 34 | * 35 | * A mode of a connector can be seen as the size of a possible viewport on a 36 | * framebuffer, like a camera that takes only a part of a larger image. The 37 | * video output from that connector shows only the part of the framebuffer that 38 | * is inside the viewport. 39 | * 40 | * Linking a framebuffer to a connector requires: 41 | * 42 | * - the mode to use 43 | * this tells the resolution of the video output, 44 | * but also the size of the viewport of the connector on the framebuffer 45 | * - the position of the viewport within the framebuffer 46 | * 47 | * This allows to link the same framebuffer to connectors set to different 48 | * modes. The viewports cannot be larger than the framebuffer. Seen in the 49 | * other way, the framebuffer must be large enough to contain all these modes. 50 | * Its weight must be at least the maximal among the weights of the modes and 51 | * its height the maximal of the heights. Such a framebuffer can be linked to 52 | * all connectors by placing each connector viewport at its center. 53 | * 54 | * In order for the same image to be shown in full on all connector, it must be 55 | * drawn on the common part of these viewports. This is the intersection of the 56 | * two rectangles in the example above. This intersection is what is used for 57 | * creating a cairo context. This way, the cairo context is visualized at the 58 | * center of each connected video device, possiblly leaving black bands on the 59 | * sides or on at the top and bottom. 60 | * 61 | * To use the maximal possible resolution while minimizing the size of the 62 | * black bands so that the image is as large as possible on all video outputs, 63 | * the size of the framebuffer and the modes are calculated as follows. 64 | * 65 | * 1. for each connector, find its maximal resolution mode 66 | * 2. the minimal width and height of these modes are the size of the cairo 67 | * context 68 | * 3. for each connector, its mode is the one of minimal size among the ones 69 | * large enough to contain the whole cairo context 70 | * 4. the size of the framebuffer is the maximal width and maximal height among 71 | * all these modes 72 | * 73 | * Since the size of the modes comprises two numbers (width and height), the 74 | * maximal (in step 1) and minimal (in step 3) are not always unique. The code 75 | * here keeps the current best (maximal or minimal so far) and updates it only 76 | * when another mode is better on both dimensions. The minimization in step 2 77 | * and the maximization in step 4 are instead done separately on the widths and 78 | * the heights, and are therefore unique. 79 | * 80 | * Actually, the modes of a connector are not necessarily the only ones 81 | * supported by the connector. The code here is however designed to only employ 82 | * them. 83 | */ 84 | 85 | /* 86 | * Parameters 87 | * ---------- 88 | * 89 | * This module takes a list of connectors to use and a requested size. 90 | * 91 | * When a list of connectors is passed, only the connectors in it are used. The 92 | * others are excluded from the calculation of the size of the framebuffer and 93 | * the cairo context and are not linked to the framebuffer. They do not show 94 | * any output and do not affect the resolution and size of the output on the 95 | * others. 96 | * 97 | * When a requested size is passed, steps 1. and 2. of the calculation of the 98 | * size of the framebuffer and cairo context are skipped. Only 3. and 4. 99 | * remain: the size of the framebuffer is calculated to include a mode large 100 | * enough to contain an area of the requested size on all connectors, and the 101 | * cairo context is the intersection of them. When a flag "exact" is also 102 | * passed, the cairo context size is instead exactly the requested size. This 103 | * allows: 104 | * 105 | * - without "exact", a resolution lower than maximal 106 | * - with "exact", to optmize the output toward a specific connector at the 107 | * expense of the others 108 | * 109 | * The second is also obtained by passing "id" or "type" as the requested size. 110 | * The best resolution for the connector of that id or type is used as the 111 | * requested size. The pdf file is shown at full screen at the maximal 112 | * resolution on that connectors; the others may show it with a black frame or 113 | * only the central part of it. 114 | */ 115 | 116 | /* 117 | * Virtual terminal switching 118 | * -------------------------- 119 | * 120 | * When the virtual terminal switches out, the original framebuffer-connector 121 | * links have to be restored. They are saved in the cairodrm->prev array before 122 | * changing them. 123 | * 124 | * The cairodrm->curr array contains the framebuffer-connector links as changed 125 | * by the initialization. They are restored when switching the virtual terminal 126 | * in. 127 | * 128 | * - init: 129 | * . cairodrm->prev = framebuffer-connector links 130 | * . change framebuffer-connector links 131 | * . cairodrm->curr = framebuffer-connector links 132 | * - switch terminal out: 133 | * framebuffer-connector links = cairodrm->prev 134 | * - switch terminal in: 135 | * framebuffer-connector links = cairodrm->curr 136 | * 137 | * Also drmDropMaster(), drmSetMaster() are called when switching the terminal 138 | * out and in, but they are only necessary when switching between drm 139 | * applications on the same virtual terminal, and fail unless called by root. 140 | */ 141 | 142 | #define _FILE_OFFSET_BITS 64 143 | #include 144 | #include 145 | #include 146 | #include 147 | #include 148 | #include 149 | #include 150 | #include 151 | #include 152 | #include 153 | #include 154 | #include "cairodrm.h" 155 | 156 | /* 157 | * connector types 158 | */ 159 | struct { 160 | char *name; unsigned value; 161 | } connectorarray[] = { 162 | {"unknown", DRM_MODE_CONNECTOR_Unknown}, 163 | {"vga", DRM_MODE_CONNECTOR_VGA}, 164 | {"dvii", DRM_MODE_CONNECTOR_DVII}, 165 | {"dvi", DRM_MODE_CONNECTOR_DVII}, 166 | {"dvid", DRM_MODE_CONNECTOR_DVID}, 167 | {"dvi", DRM_MODE_CONNECTOR_DVID}, 168 | {"dvia", DRM_MODE_CONNECTOR_DVIA}, 169 | {"dvi", DRM_MODE_CONNECTOR_DVIA}, 170 | {"composite", DRM_MODE_CONNECTOR_Composite}, 171 | {"svideo", DRM_MODE_CONNECTOR_SVIDEO}, 172 | {"lvds", DRM_MODE_CONNECTOR_LVDS}, 173 | {"component", DRM_MODE_CONNECTOR_Component}, 174 | {"9pindin", DRM_MODE_CONNECTOR_9PinDIN}, 175 | {"displayport", DRM_MODE_CONNECTOR_DisplayPort}, 176 | {"hdmia", DRM_MODE_CONNECTOR_HDMIA}, 177 | {"hdmi", DRM_MODE_CONNECTOR_HDMIA}, 178 | {"hdmib", DRM_MODE_CONNECTOR_HDMIB}, 179 | {"hdmi", DRM_MODE_CONNECTOR_HDMIB}, 180 | {"tv", DRM_MODE_CONNECTOR_TV}, 181 | {"edp", DRM_MODE_CONNECTOR_eDP}, 182 | {"virtual", DRM_MODE_CONNECTOR_VIRTUAL}, 183 | {"dsi", DRM_MODE_CONNECTOR_DSI}, 184 | {NULL, 0} 185 | }; 186 | 187 | /* 188 | * list connector types 189 | */ 190 | void listconnectors(int drm, drmModeResPtr resptr, int *enabled, int modes) { 191 | int i, j; 192 | drmModeConnectorPtr conn; 193 | 194 | for (i = 0; i < resptr->count_connectors; i++) { 195 | if (! enabled[i]) 196 | continue; 197 | conn = drmModeGetConnector(drm, resptr->connectors[i]); 198 | printf("connector %d: ", conn->connector_id); 199 | for (j = 0; connectorarray[j].name; j++) 200 | if (connectorarray[j].value == conn->connector_type) { 201 | printf("%s", connectorarray[j].name); 202 | break; 203 | } 204 | if (modes) 205 | for (j = 0; j < conn->count_modes; j++) 206 | printf(" %dx%d", 207 | conn->modes[j].hdisplay, 208 | conn->modes[j].vdisplay); 209 | printf("\n"); 210 | drmModeFreeConnector(conn); 211 | } 212 | } 213 | 214 | /* 215 | * match a connector with a specification 216 | */ 217 | int matchconnector(drmModeConnectorPtr conn, char *spec) { 218 | int i; 219 | if ((unsigned) atoi(spec) == conn->connector_id) 220 | return 1; 221 | 222 | for (i = 0; connectorarray[i].name; i++) 223 | if (! strcmp(spec, connectorarray[i].name) && 224 | connectorarray[i].value == conn->connector_type) 225 | return 1; 226 | return 0; 227 | } 228 | 229 | /* 230 | * parse a connector string 231 | */ 232 | int *enabledconnectors(int drm, drmModeResPtr resptr, char *connectors) { 233 | int *enabled; 234 | char *scan, field[100], c; 235 | drmModeConnectorPtr conn; 236 | int i, res; 237 | 238 | printf("enabled connectors\n"); 239 | enabled = malloc(resptr->count_connectors * sizeof(int)); 240 | for (i = 0; i < resptr->count_connectors; i++) { 241 | if (connectors == NULL || 242 | strstr(connectors, "all") || 243 | ! strcmp(connectors, "list")) 244 | enabled[i] = 1; 245 | else { 246 | conn = drmModeGetConnector(drm, resptr->connectors[i]); 247 | enabled[i] = 0; 248 | for (scan = connectors; scan != NULL; ) { 249 | res = sscanf(scan, "%90[^,]%c", field, &c); 250 | if (res == 1 || (res == 2 && c == ',')) 251 | if (matchconnector(conn, field)) 252 | enabled[i] = 1; 253 | scan = index(scan, ','); 254 | if (scan) 255 | scan++; 256 | } 257 | drmModeFreeConnector(conn); 258 | } 259 | printf("\tconnector %d: %s\n", resptr->connectors[i], 260 | enabled[i] ? "enabled" : "disabled"); 261 | } 262 | 263 | return enabled; 264 | } 265 | 266 | /* 267 | * maximal-resolution mode of a connector 268 | */ 269 | int _maximalmode(drmModeConnectorPtr conn, drmModeResPtr resptr) { 270 | int i; 271 | unsigned int w, h; 272 | int max; 273 | 274 | w = resptr->max_width + 1; 275 | h = resptr->max_height + 1; 276 | max = 0; 277 | for (i = 0; i < conn->count_modes; i++) { 278 | printf("\t\t\tmode %2d: %d x %d\n", i, 279 | conn->modes[i].hdisplay, 280 | conn->modes[i].vdisplay); 281 | if (w < conn->modes[i].hdisplay && 282 | h < conn->modes[i].vdisplay) { 283 | w = conn->modes[i].hdisplay; 284 | h = conn->modes[i].vdisplay; 285 | max = i; 286 | } 287 | } 288 | printf("\t\tmode %d: %d x %d\n", max, 289 | conn->modes[max].hdisplay, conn->modes[max].vdisplay); 290 | return max; 291 | } 292 | 293 | /* 294 | * maximal resolution supported by all connectors 295 | */ 296 | int _maximalcommon(int drm, drmModeResPtr resptr, int *enabled, 297 | unsigned *width, unsigned *height) { 298 | drmModeConnectorPtr conn; 299 | int i, j; 300 | 301 | *width = resptr->max_width + 1; 302 | *height = resptr->max_height + 1; 303 | printf("determine maximal common resolution\n"); 304 | for (i = 0; i < resptr->count_connectors; i++) { 305 | conn = drmModeGetConnector(drm, resptr->connectors[i]); 306 | printf("\tconnector %d\n", conn->connector_id); 307 | if (! enabled[i] || conn->connection != DRM_MODE_CONNECTED) { 308 | puts(! enabled[i] ? "\t\tdisabled": "\t\tunconnected"); 309 | drmModeFreeConnector(conn); 310 | continue; 311 | } 312 | j = _maximalmode(conn, resptr); 313 | if (*width > conn->modes[j].hdisplay) 314 | *width = conn->modes[j].hdisplay; 315 | if (*height > conn->modes[j].vdisplay) 316 | *height = conn->modes[j].vdisplay; 317 | drmModeFreeConnector(conn); 318 | } 319 | if (*width == resptr->max_width + 1 || 320 | *height == resptr->max_height + 1) { 321 | printf("\tno available modes\n"); 322 | return -1; 323 | } 324 | printf("\tmaximal common size: %dx%d\n", *width, *height); 325 | return 0; 326 | } 327 | 328 | /* 329 | * minimal-resolution mode of a connector of the given size or more 330 | */ 331 | int _minimalmode(drmModeConnectorPtr conn, drmModeResPtr resptr, 332 | int width, int height) { 333 | int i; 334 | unsigned int w, h; 335 | int min; 336 | 337 | w = resptr->max_width + 1; 338 | h = resptr->max_height + 1; 339 | min = 0; 340 | for (i = 0; i < conn->count_modes; i++) { 341 | printf("\t\t\tmode %2d: %d x %d\n", i, 342 | conn->modes[i].hdisplay, 343 | conn->modes[i].vdisplay); 344 | if (conn->modes[i].hdisplay < width) 345 | continue; 346 | if (conn->modes[i].vdisplay < height) 347 | continue; 348 | if (w > conn->modes[i].hdisplay && 349 | h > conn->modes[i].vdisplay) { 350 | w = conn->modes[i].hdisplay; 351 | h = conn->modes[i].vdisplay; 352 | min = i; 353 | } 354 | } 355 | printf("\t\tmode %d: %d x %d\n", min, 356 | conn->modes[min].hdisplay, conn->modes[min].vdisplay); 357 | return min; 358 | } 359 | 360 | /* 361 | * minimal framebuffer size 362 | */ 363 | int _framebuffersize(int drm, drmModeResPtr resptr, int *enabled, 364 | int reqwidth, int reqheight, 365 | unsigned *width, unsigned *height) { 366 | drmModeConnectorPtr conn; 367 | int i, j; 368 | 369 | *width = 0; 370 | *height = 0; 371 | printf("determine framebuffer size\n"); 372 | for (i = 0; i < resptr->count_connectors; i++) { 373 | conn = drmModeGetConnector(drm, resptr->connectors[i]); 374 | printf("\tconnector %d\n", conn->connector_id); 375 | if (! enabled[i] || conn->connection != DRM_MODE_CONNECTED) { 376 | puts(! enabled[i] ? "\t\tdisabled": "\t\tunconnected"); 377 | drmModeFreeConnector(conn); 378 | continue; 379 | } 380 | j = _minimalmode(conn, resptr, reqwidth, reqheight); 381 | if (*width < conn->modes[j].hdisplay) 382 | *width = conn->modes[j].hdisplay; 383 | if (*height < conn->modes[j].vdisplay) 384 | *height = conn->modes[j].vdisplay; 385 | drmModeFreeConnector(conn); 386 | } 387 | if (*width == 0 || *height == 0) { 388 | printf("\tno available modes\n"); 389 | return -1; 390 | } 391 | printf("\tframebuffer size: %dx%d\n", *width, *height); 392 | return 0; 393 | } 394 | 395 | /* 396 | * create, add and map a framebuffer 397 | */ 398 | uint32_t _createframebuffer(int drm, int width, int height, int bpp, 399 | uint64_t *size, uint64_t *offset, uint32_t *stride, 400 | uint32_t *handle) { 401 | struct drm_mode_create_dumb createdumb; 402 | struct drm_mode_map_dumb mapdumb; 403 | uint32_t buf_id; 404 | int res; 405 | 406 | printf("create framebuffer\n"); 407 | memset(&createdumb, 0, sizeof(createdumb)); 408 | createdumb.width = width; 409 | createdumb.height = height; 410 | createdumb.bpp = bpp; 411 | createdumb.flags = 0; 412 | printf("\tcreate width=%d height=%d bpp=%d\n", 413 | createdumb.width, createdumb.height, createdumb.bpp); 414 | res = drmIoctl(drm, DRM_IOCTL_MODE_CREATE_DUMB, &createdumb); 415 | printf("\t\tresult: %s\n", strerror(-res)); 416 | printf("\t\tsize: %llu\n", createdumb.size); 417 | printf("\t\thandle: %d\n", createdumb.handle); 418 | 419 | printf("\tadd width=%d height=%d 24 32 pitch=%d handle=%d\n", 420 | createdumb.width, createdumb.height, 421 | createdumb.pitch, createdumb.handle); 422 | res = drmModeAddFB(drm, 423 | createdumb.width, createdumb.height, 424 | 24, 32, // createdumb.bpp, 425 | createdumb.pitch, 426 | createdumb.handle, &buf_id); 427 | printf("\t\tresult: %s\n", strerror(-res)); 428 | printf("\t\tbuf_id: %d\n", buf_id); 429 | 430 | memset(&mapdumb, 0, sizeof(mapdumb)); 431 | mapdumb.handle = createdumb.handle; 432 | mapdumb.pad = 0; 433 | printf("\tmap handle=%d\n", mapdumb.handle); 434 | res = drmIoctl(drm, DRM_IOCTL_MODE_MAP_DUMB, &mapdumb); 435 | printf("\t\tresult: %s\n", strerror(-res)); 436 | printf("\t\toffset: %llu\n", mapdumb.offset); 437 | 438 | *size = createdumb.size; 439 | *offset = mapdumb.offset; 440 | *stride = createdumb.pitch; 441 | *handle = createdumb.handle; 442 | return buf_id; 443 | } 444 | 445 | /* 446 | * link a framebuffer to the connectors 447 | */ 448 | int _linkframebufferconnectors(int drm, 449 | drmModeResPtr resptr, int *enabled, 450 | drmModeCrtcPtr *prev, drmModeCrtcPtr *curr, 451 | int buf_id, 452 | int width, int height, 453 | int fbwidth, int fbheight, 454 | int *cwidth, int *cheight) { 455 | int i; 456 | drmModeConnectorPtr conn; 457 | drmModeEncoderPtr enc; 458 | int nmode; 459 | int res; 460 | unsigned x, y; 461 | 462 | printf("link framebuffer to connector(s)\n"); 463 | *cwidth = resptr->max_width + 1; 464 | *cheight = resptr->max_height + 1; 465 | for (i = 0; i < resptr->count_connectors; i++) { 466 | prev[i] = NULL; 467 | curr[i] = NULL; 468 | 469 | printf("\tconnector %d\n", resptr->connectors[i]); 470 | if (! enabled[i]) { 471 | puts("\t\tdisabled"); 472 | continue; 473 | } 474 | conn = drmModeGetConnector(drm, resptr->connectors[i]); 475 | if (conn->connection != DRM_MODE_CONNECTED) { 476 | puts("\t\tunconnected"); 477 | drmModeFreeConnector(conn); 478 | continue; 479 | } 480 | 481 | if (conn->encoder_id == 0) { 482 | printf("no encoder\n"); 483 | exit(1); 484 | // search for an encoder 485 | // also: save previous and current, 486 | // restore in _restoreframebufferconnectors() 487 | } 488 | enc = drmModeGetEncoder(drm, conn->encoder_id); 489 | 490 | if (enc->crtc_id == 0) { 491 | printf("no crtc\n"); 492 | exit(1); 493 | // search for a crtc 494 | // also: save previous and current, 495 | // restore in _restoreframebufferconnectors() 496 | } 497 | 498 | prev[i] = drmModeGetCrtc(drm, enc->crtc_id); 499 | 500 | nmode = _minimalmode(conn, resptr, width, height); 501 | x = (fbwidth - conn->modes[nmode].hdisplay) / 2; 502 | y = (fbheight - conn->modes[nmode].vdisplay) / 2; 503 | printf("\t\tdisplacement: x=%d y=%d\n", x, y); 504 | res = drmModeSetCrtc(drm, enc->crtc_id, buf_id, x, y, 505 | &conn->connector_id, 1, &conn->modes[nmode]); 506 | printf("\t\tresult: %s\n", strerror(-res)); 507 | 508 | curr[i] = drmModeGetCrtc(drm, enc->crtc_id); 509 | 510 | if (*cwidth > conn->modes[nmode].hdisplay) 511 | *cwidth = conn->modes[nmode].hdisplay; 512 | if (*cheight > conn->modes[nmode].vdisplay) 513 | *cheight = conn->modes[nmode].vdisplay; 514 | 515 | drmModeFreeEncoder(enc); 516 | drmModeFreeConnector(conn); 517 | } 518 | 519 | printf("\tintersection: %d x %d\n", *cwidth, *cheight); 520 | return 0; 521 | } 522 | 523 | /* 524 | * restore saved framebuffers-connectors links 525 | */ 526 | void _restoreframebufferconnectors(int drm, drmModeResPtr resptr, 527 | drmModeCrtcPtr *saved) { 528 | int i; 529 | int res; 530 | 531 | printf("restoring framebuffer-connector links\n"); 532 | for (i = 0; i < resptr->count_connectors; i++) { 533 | printf("\tconnector %d\n", resptr->connectors[i]); 534 | if (saved[i] == NULL) { 535 | printf("\t\tnot saved\n"); 536 | continue; 537 | } 538 | // restore previous encoder and its crtc if changed 539 | res = drmModeSetCrtc(drm, saved[i]->crtc_id, 540 | saved[i]->buffer_id, saved[i]->x, saved[i]->y, 541 | &resptr->connectors[i], 1, &saved[i]->mode); 542 | printf("\t\tresult: %s\n", strerror(-res)); 543 | } 544 | } 545 | 546 | /* 547 | * create a cairo context from a drm device 548 | */ 549 | struct cairodrm *cairodrm_init(char *devname, 550 | char *connectors, char *size, int flags) { 551 | unsigned width, height, bpp = 32; 552 | 553 | int drm, res; 554 | uint64_t supportdumb; 555 | drmModeResPtr resptr; 556 | int *enabled, *sizeenabled; 557 | 558 | uint64_t fbsize, offset; 559 | uint32_t pitch, handle; 560 | 561 | uint32_t buf_id; 562 | drmModeCrtcPtr *prev, *curr; 563 | 564 | unsigned char *img, *dbuf, *pos; 565 | unsigned int fbwidth, fbheight; 566 | int stride; 567 | 568 | int cwidth, cheight; 569 | unsigned x, y; 570 | cairo_format_t format; 571 | cairo_surface_t *surface; 572 | cairo_status_t status; 573 | cairo_t *cr; 574 | 575 | struct cairodrm *cairodrm; 576 | 577 | /* check availability */ 578 | 579 | res = drmAvailable(); 580 | if (! ! strcmp(devname, "/dev/dri/card0")) { 581 | res = 1; 582 | printf("WARNING: working around a bug in libdrm: "); 583 | printf("cannot check DRM availablity with no card0 \n"); 584 | } 585 | if (res != 1) { 586 | printf("drm not available\n"); 587 | return NULL; 588 | } 589 | 590 | /* open card */ 591 | 592 | drm = open(devname, O_RDWR); 593 | if (drm == -1) { 594 | perror(devname); 595 | return NULL; 596 | } 597 | 598 | res = drmGetCap(drm, DRM_CAP_DUMB_BUFFER, &supportdumb); 599 | // tbd: check res and supportdumb 600 | 601 | /* get resources */ 602 | 603 | resptr = drmModeGetResources(drm); 604 | 605 | /* enabled connectors */ 606 | 607 | enabled = enabledconnectors(drm, resptr, connectors); 608 | 609 | /* list connectors */ 610 | 611 | if ((connectors != NULL && strstr(connectors, "list")) || 612 | (size != NULL && ! strcmp(size, "list"))) { 613 | listconnectors(drm, resptr, enabled, 614 | size != NULL && ! strcmp(size, "list")); 615 | drmModeFreeResources(resptr); 616 | return NULL; 617 | } 618 | 619 | /* maximal shared resolution */ 620 | 621 | if (size == NULL || 2 != sscanf(size, "%dx%d", &width, &height)) { 622 | if (size == NULL) 623 | sizeenabled = enabled; 624 | else { 625 | sizeenabled = enabledconnectors(drm, resptr, size); 626 | flags |= CAIRODRM_EXACT; 627 | } 628 | res = _maximalcommon(drm, resptr, sizeenabled, &width, &height); 629 | if (sizeenabled != enabled) 630 | free(sizeenabled); 631 | if (res) { 632 | drmModeFreeResources(resptr); 633 | return NULL; 634 | } 635 | } 636 | /* size of framebuffer */ 637 | 638 | res = _framebuffersize(drm, resptr, enabled, 639 | width, height, &fbwidth, &fbheight); 640 | if (res) { 641 | drmModeFreeResources(resptr); 642 | return NULL; 643 | } 644 | 645 | /* create dumb framebuffer */ 646 | 647 | buf_id = _createframebuffer(drm, fbwidth, fbheight, bpp, 648 | &fbsize, &offset, &pitch, &handle); 649 | 650 | /* link framebuffer -> connectors */ 651 | 652 | prev = malloc(resptr->count_connectors * sizeof(drmModeCrtcPtr)); 653 | curr = malloc(resptr->count_connectors * sizeof(drmModeCrtcPtr)); 654 | res = _linkframebufferconnectors(drm, resptr, enabled, prev, curr, 655 | buf_id, width, height, fbwidth, fbheight, &cwidth, &cheight); 656 | if (flags & CAIRODRM_EXACT) { 657 | cwidth = width; 658 | cheight = height; 659 | } 660 | 661 | /* map framebuffer to memory */ 662 | 663 | printf("mmap size=%" PRIu64 "drm=%d offset=%" PRIu64 "\n", 664 | fbsize, drm, offset); 665 | img = mmap(NULL, fbsize, 666 | PROT_READ | PROT_WRITE, MAP_SHARED, drm, offset); 667 | if (img == MAP_FAILED) { 668 | perror("mmap"); 669 | return NULL; 670 | } 671 | dbuf = (flags & CAIRODRM_DOUBLEBUFFERING) ? malloc(fbsize) : img; 672 | 673 | /* create the cairo context */ 674 | 675 | format = CAIRO_FORMAT_RGB24; // or CAIRO_FORMAT_RGB16_565; 676 | stride = pitch; 677 | x = (fbwidth - cwidth) / 2; 678 | y = (fbheight - cheight) / 2; 679 | pos = dbuf + bpp / 8 * x + stride * y; 680 | surface = cairo_image_surface_create_for_data(pos, format, 681 | cwidth, cheight, stride); 682 | status = cairo_surface_status(surface); 683 | if (status != CAIRO_STATUS_SUCCESS) 684 | printf("WARNING: cairo status=%d\n", status); 685 | cr = cairo_create(surface); 686 | 687 | /* fill structure */ 688 | 689 | cairodrm = malloc(sizeof(struct cairodrm)); 690 | cairodrm->surface = surface; 691 | cairodrm->cr = cr; 692 | cairodrm->width = cwidth; 693 | cairodrm->height = cheight; 694 | cairodrm->dev = drm; 695 | cairodrm->handle = handle; 696 | cairodrm->buf_id = buf_id; 697 | cairodrm->img = img; 698 | cairodrm->dbuf = dbuf; 699 | cairodrm->size = fbsize; 700 | cairodrm->resptr = resptr; 701 | cairodrm->enabled = enabled; 702 | cairodrm->prev = prev; 703 | cairodrm->curr = curr; 704 | return cairodrm; 705 | } 706 | 707 | /* 708 | * switch in and out a virtual terminal 709 | */ 710 | void cairodrm_switcher(struct cairodrm *cairodrm, int inout) { 711 | int res; 712 | 713 | if (inout == 0) { 714 | printf(">>> switch vt out\n"); 715 | _restoreframebufferconnectors(cairodrm->dev, 716 | cairodrm->resptr, cairodrm->prev); 717 | res = drmDropMaster(cairodrm->dev); // ok if fails 718 | printf("drmDropMaster: %s\n", strerror(-res)); 719 | } 720 | else { 721 | printf(">>> switch vt in\n"); 722 | res = drmSetMaster(cairodrm->dev); // ok if fails 723 | printf("drmSetMaster: %s\n", strerror(-res)); 724 | _restoreframebufferconnectors(cairodrm->dev, 725 | cairodrm->resptr, cairodrm->curr); 726 | } 727 | } 728 | 729 | /* 730 | * clear the cairo context 731 | */ 732 | void cairodrm_clear(struct cairodrm *cairodrm, 733 | double red, double green, double blue) { 734 | cairo_identity_matrix(cairodrm->cr); 735 | cairo_set_source_rgb(cairodrm->cr, red, green, blue); 736 | cairo_rectangle(cairodrm->cr, 0, 0, cairodrm->width, cairodrm->height); 737 | cairo_fill(cairodrm->cr); 738 | cairo_stroke(cairodrm->cr); 739 | } 740 | 741 | /* 742 | * return whether double buffering is used 743 | */ 744 | int cairodrm_doublebuffering(struct cairodrm *cairodrm) { 745 | return cairodrm->img != cairodrm->dbuf; 746 | } 747 | 748 | /* 749 | * flush the cairo context 750 | */ 751 | void cairodrm_flush(struct cairodrm *cairodrm) { 752 | drmModeClip clip; 753 | int res; 754 | 755 | if (cairodrm_doublebuffering(cairodrm)) 756 | memcpy(cairodrm->img, cairodrm->dbuf, cairodrm->size); 757 | clip.x1 = 0; 758 | clip.y1 = 0; 759 | clip.x2 = cairodrm->width; 760 | clip.y2 = cairodrm->height; 761 | res = drmModeDirtyFB(cairodrm->dev, cairodrm->buf_id, &clip, 1); 762 | printf("drmModeDirtyFB: %s\n", strerror(-res)); 763 | } 764 | 765 | /* 766 | * deallocate and close 767 | */ 768 | void cairodrm_finish(struct cairodrm *cairodrm) { 769 | struct drm_mode_destroy_dumb destroydumb; 770 | int res; 771 | int i; 772 | 773 | cairo_destroy(cairodrm->cr); 774 | cairo_surface_destroy(cairodrm->surface); 775 | munmap(cairodrm->img, cairodrm->size); 776 | if (cairodrm_doublebuffering(cairodrm)) 777 | free(cairodrm->dbuf); 778 | 779 | res = drmModeRmFB(cairodrm->dev, cairodrm->buf_id); 780 | printf("remove framebuffer: %s\n", strerror(-res)); 781 | 782 | destroydumb.handle = cairodrm->handle; 783 | res = drmIoctl(cairodrm->dev, 784 | DRM_IOCTL_MODE_DESTROY_DUMB, &destroydumb); 785 | printf("destroy framebuffer handle=%d: %s\n", 786 | destroydumb.handle, strerror(-res)); 787 | 788 | for (i = 0; i < cairodrm->resptr->count_connectors; i++) { 789 | if (cairodrm->prev[i] != NULL) 790 | drmModeFreeCrtc(cairodrm->prev[i]); 791 | if (cairodrm->curr[i] != NULL) 792 | drmModeFreeCrtc(cairodrm->curr[i]); 793 | } 794 | free(cairodrm->prev); 795 | free(cairodrm->curr); 796 | drmModeFreeResources(cairodrm->resptr); 797 | free(cairodrm->enabled); 798 | 799 | close(cairodrm->dev); 800 | free(cairodrm); 801 | } 802 | 803 | --------------------------------------------------------------------------------