├── .gitignore ├── CHANGES.md ├── Makefile ├── Readme.md ├── abstract.pdf ├── bytepdf.ml ├── bytepdf.opam ├── dune └── dune-project /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | .merlin 3 | *.install 4 | *.bc.js 5 | .gh-pages 6 | data/*.csv 7 | data/*.svg 8 | data/*.png 9 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # 0.1 2 | 3 | Plop 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: all 2 | 3 | all: 4 | dune build 5 | test: 6 | dune runtest 7 | 8 | clean: 9 | dune clean 10 | 11 | .PHONY: all test clean 12 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # A tool to create PDFs that are also OCaml bytecodes 2 | 3 | The `bytepdf` tool allows you to take a PDF `foo.pdf` and an OCaml bytecode `foo.byte` and merges them into a file that is both a valid PDF and a valid bytecode. 4 | 5 | ``` 6 | bytepdf --bc foo.byte --pdf foo.pdf -o bar.pdf 7 | ``` 8 | 9 | The resulting file can both be read as a pdf and executed by the ocaml interpreter: 10 | 11 | ``` 12 | open bar.pdf 13 | ocamlrun bar.pdf 14 | ``` 15 | 16 | Furthermore, if you open the PDF with Acrobat Reader, the PDF will contain the OCaml bytecode as a file attachment. For more details, you can read the help. For an explanation of how this work, consider looking at [this abstract](abstract.pdf). 17 | The only current limitation is that the bytecode should not 18 | have been statically linked with C code. 19 | 20 | ## Install 21 | 22 | ``` 23 | opam install bytepdf 24 | ``` 25 | 26 | Dev version has been tested with up to OCaml 4.10 27 | -------------------------------------------------------------------------------- /abstract.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Drup/bytepdf/22fd830cafaabf3cdc5e15a09855676354de3401/abstract.pdf -------------------------------------------------------------------------------- /bytepdf.ml: -------------------------------------------------------------------------------- 1 | open Rresult 2 | 3 | module BC = struct 4 | open OByteLib 5 | 6 | type secdata = 7 | | CODE of Code.t 8 | | DLPT of Dlpt.t 9 | | DLLS of Dlls.t 10 | | PRIM of Prim.t 11 | | DATA of Data.t 12 | | SYMB of Symb.t 13 | | CRCS of Crcs.t 14 | | DBUG of Dbug.t 15 | | Unknown of { 16 | name : string ; 17 | data : string ; 18 | } 19 | 20 | let from_bytefile 21 | {Bytefile. 22 | version; vmpath = _; vmarg = _; index = _; 23 | extra; data; prim; code; dlpt; dlls; 24 | crcs; dbug; symb } = 25 | let l = [ 26 | CODE code; 27 | DLPT dlpt; DLLS dlls; 28 | PRIM prim; DATA data; 29 | SYMB symb; 30 | CRCS crcs; DBUG dbug; 31 | ] 32 | in 33 | version, extra, l 34 | 35 | 36 | let write_secdata_raw v oc section : Section.t = match section with 37 | | CODE x -> Code.write v oc x ; CODE 38 | | DLPT x -> Dlpt.write oc x ; DLPT 39 | | DLLS x -> Dlls.write oc x ; DLLS 40 | | PRIM x -> Prim.write oc x ; PRIM 41 | | DATA x -> Data.write oc x ; DATA 42 | | SYMB x -> Symb.write v oc x ; SYMB 43 | | CRCS x -> Crcs.write oc x ; CRCS 44 | | DBUG x -> Dbug.write oc x ; DBUG 45 | | Unknown { data ; name } -> output_string oc data ; Unknown name 46 | 47 | let write_secdata v oc secdata = 48 | let offset = pos_out oc in 49 | let section = write_secdata_raw v oc secdata in 50 | let length = pos_out oc - offset in 51 | Index.({ section; offset; length }) 52 | 53 | let write_secdatas v oc secdatas = 54 | let rec aux = function 55 | | [] -> [] 56 | | sec :: l -> 57 | let i = write_secdata v oc sec in 58 | i :: aux l 59 | in 60 | aux secdatas 61 | 62 | let writeshebang oc s = Printf.fprintf oc "#!%s\n" s 63 | 64 | let write_oc ~oc ~version ?shebang ?(extra=Extra.empty) secdatas = 65 | begin match shebang with 66 | | Some s -> writeshebang oc s 67 | | None -> () 68 | end; 69 | Extra.write oc extra; 70 | let indices = write_secdatas version oc secdatas in 71 | Index.write oc indices ; 72 | Version.write oc version ; 73 | flush oc 74 | 75 | let write ~filename ~version ?shebang ?(extra=Extra.empty) secdatas = 76 | let oflags = [ Open_wronly; Open_creat; Open_trunc; Open_binary ] in 77 | let oc = 78 | try open_out_gen oflags 0o751 filename 79 | with _ -> failwith @@ Printf.sprintf "fail to open file %S for writting" filename 80 | in 81 | try 82 | write_oc ~oc ~version ?shebang ~extra secdatas ; 83 | close_out oc ; 84 | with 85 | | Failure msg -> 86 | close_out oc; 87 | failwith @@ Printf.sprintf "fail to write bytecode file %S (%s)" filename msg 88 | | exn -> 89 | close_out oc; 90 | failwith @@ Printf.sprintf "fail to write bytecode file %S (internal error: %s)" filename (Printexc.to_string exn) 91 | 92 | end 93 | 94 | module PdfAnnot = struct 95 | 96 | let dict l = Pdf.Dictionary l 97 | let stream d s = 98 | let b = Pdfio.bytes_of_string s in 99 | let l = Pdfio.bytes_size b in 100 | Pdf.Stream ( ref ( dict (("/Length", Pdf.Integer l) :: d), Pdf.Got b) ) 101 | 102 | (* let get_first_page pdf = 103 | * let document_catalog = 104 | * try Pdf.lookup_obj pdf pdf.Pdf.root with 105 | * Not_found -> raise (Pdf.PDFError "/Root entry is incorrect") 106 | * in 107 | * let pages = 108 | * Pdf.lookup_fail "No or malformed /Pages" pdf "/Pages" document_catalog 109 | * in 110 | * match Pdf.lookup_direct pdf "/Type" pages with 111 | * | Some (Pdf.Name "/Pages") -> 112 | * begin match Pdf.lookup_fail "No /Kids in page tree" pdf "/Kids" pages with 113 | * | Pdf.Array (p :: _) -> p 114 | * | _ -> raise (Pdf.PDFError "Malformed /Kids in page tree node") 115 | * end 116 | * | _ -> raise (Pdf.PDFError "find_pages: Not a page tree node or page object") *) 117 | 118 | let add_annotation pdf page obj = 119 | let rest = page.Pdfpage.rest in 120 | let new_annots = 121 | match Pdf.lookup_direct pdf "/Annots" rest with 122 | | Some (Pdf.Array annotations) -> 123 | Pdf.Array (obj :: annotations) 124 | | Some _ -> raise (Pdf.PDFError "Bad annotation dictionary") 125 | | None -> Pdf.Array [obj] 126 | in 127 | {page with rest = Pdf.add_dict_entry rest "/Annots" new_annots} 128 | 129 | let attach ~pdf ~filename ~content = 130 | let embeddedfile = stream [("/Type", Name "/EmbeddedFile")] content in 131 | let id_embeddedfile = Pdf.addobj pdf embeddedfile in 132 | let filespec = dict [ 133 | ("/Type", Name "/Filespec"); 134 | ("/F", String filename); 135 | ("/EF", dict [("/F", Indirect id_embeddedfile)]); 136 | ] 137 | in 138 | let id_filespec = Pdf.addobj pdf filespec in 139 | let coord = 140 | Pdf.(Array [Real 0.; Real 0.; Real 0.; Real 0.]) 141 | in 142 | let annot = dict [ 143 | ("/Type", Name "/Annot"); 144 | ("/Subtype", Name "/FileAttachment"); 145 | ("/FS", Indirect id_filespec); 146 | ("/Rect", coord); 147 | ("/F", Integer 2); (* Hidden annotation *) 148 | ] 149 | in 150 | let i = Pdf.addobj pdf annot in 151 | let annotobj = Pdf.Indirect i in 152 | (* TODO : Add the annotation in the array of /Annots *) 153 | let pages = Pdfpage.pages_of_pagetree pdf in 154 | match pages with 155 | | [] -> failwith "This PDF has no pages. Impossible to add the annotation." 156 | | p :: pages -> 157 | let newpage = add_annotation pdf p annotobj in 158 | let pdf = Pdfpage.change_pages true pdf (newpage :: pages) in 159 | Pdf.remove_unreferenced pdf; 160 | pdf 161 | 162 | end 163 | 164 | (** Borrowed from containers *) 165 | module IO = struct 166 | 167 | let finally_ f x ~h = 168 | try 169 | let res = f x in 170 | h x; 171 | res 172 | with e -> 173 | h x; 174 | raise e 175 | 176 | let with_in ?(mode=0o644) ?(flags=[Open_text]) filename f = 177 | let ic = open_in_gen (Open_rdonly::flags) mode filename in 178 | finally_ f ic ~h:close_in 179 | 180 | let read_all ic = 181 | let buf = ref (Bytes.create 1024) in 182 | let len = ref 0 in 183 | try 184 | while true do 185 | (* resize *) 186 | if !len = Bytes.length !buf then ( 187 | buf := Bytes.extend !buf 0 !len; 188 | ); 189 | assert (Bytes.length !buf > !len); 190 | let n = input ic !buf !len (Bytes.length !buf - !len) in 191 | len := !len + n; 192 | if n = 0 then raise Exit; (* exhausted *) 193 | done; 194 | assert false (* never reached*) 195 | with Exit -> 196 | Bytes.sub_string !buf 0 !len 197 | end 198 | 199 | let smash filepdf filebc fileout = 200 | let version, extra, bc = 201 | BC.from_bytefile @@ OByteLib.Bytefile.read filebc 202 | in 203 | let shebang = "ocamlrun" in 204 | 205 | let bc_string = 206 | R.get_ok @@ 207 | Bos.OS.File.with_tmp_oc "bytepdf%s" 208 | (fun f oc bc -> 209 | BC.write_oc ~oc ~shebang ~extra ~version bc; 210 | R.get_ok @@ Bos.OS.File.read f 211 | ) 212 | bc 213 | in 214 | (* let original_bc_len = String.length bc_string in *) 215 | 216 | let pdf = Pdfread.pdf_of_file None None filepdf in 217 | let pdf = PdfAnnot.attach ~pdf ~filename:filebc ~content:bc_string in 218 | 219 | let pdf_string = 220 | let oc, br = Pdfio.input_output_of_bytes 16 in 221 | Pdfwrite.pdf_to_output 222 | ~preserve_objstm:false ~generate_objstm:false ~compress_objstm:false 223 | None false pdf oc; 224 | let b = Pdfio.extract_bytes_from_input_output oc br in 225 | Pdfio.string_of_bytes b 226 | in 227 | let pdf_len = String.length pdf_string in 228 | 229 | let search_token = "#!" ^ shebang ^ "\n" in 230 | let search_len = String.length search_token in 231 | 232 | let bc_start = 233 | let i = Astring.String.find_sub ~sub:search_token pdf_string in 234 | match i with 235 | | None -> assert false 236 | | Some i -> search_len + i 237 | in 238 | let bc_end = 239 | let sub = OByteLib.Version.to_magic version ^ "\nendstream" in 240 | let i = Astring.String.find_sub ~rev:true ~sub pdf_string in 241 | match i with 242 | | None -> assert false 243 | | Some i -> i - 8 * List.length bc - 4 244 | in 245 | 246 | (* let offset = pdf_len - bc_end in *) 247 | (* Format.printf "offset: %i@.%s@." offset 248 | * (String.sub pdf_string bc_end (4 + 8 * List.length bc)) 249 | * ; *) 250 | 251 | let extra = String.sub pdf_string 0 bc_start in 252 | let bc_string2 = String.sub pdf_string bc_start (bc_end - bc_start) in 253 | assert (Astring.String.find_sub ~sub:bc_string2 bc_string <> None) ; 254 | let xpdf = String.sub pdf_string bc_end (pdf_len - bc_end) in 255 | 256 | BC.write 257 | ~filename:fileout 258 | ~version 259 | ?shebang:None 260 | ~extra 261 | (bc @ [BC.Unknown {name="XPDF"; data=xpdf}]); 262 | 263 | (* ignore (extra, xpdf); *) 264 | (* let oflags = [ Open_wronly; Open_creat; Open_trunc; Open_binary ] in 265 | * let oc = 266 | * try open_out_gen oflags 0o751 fileout 267 | * with _ -> failwith @@ Printf.sprintf "fail to open file %S for writing" fileout 268 | * in 269 | * output_string oc pdf_string; *) 270 | 271 | () 272 | 273 | let term = 274 | let open Cmdliner in 275 | let bytecode = 276 | let doc = 277 | Arg.info ~docv:"BC" ~doc:"The OCaml bytecode file to be included in the resulting polyglot file." ["bytecode";"bc"] 278 | in 279 | Arg.(required & opt (some non_dir_file) None doc) 280 | in 281 | let pdf = 282 | let doc = 283 | Arg.info ~docv:"PDF" ~doc:"The PDF file to be included in the resulting polyglot file." ["pdf"] 284 | in 285 | Arg.(required & opt (some non_dir_file) None doc) 286 | in 287 | let output = 288 | let doc = 289 | Arg.info ~docv:"FILE" ~doc:"Output file" ["o"] 290 | in 291 | Arg.(required & opt (some string) None doc) 292 | in 293 | let info = 294 | Cmd.info "bytepdf" 295 | ~doc:"Merge an OCaml bytecode and a PDF into a file that is both." 296 | in 297 | let t = 298 | Term.(const smash $ pdf $ bytecode $ output) 299 | in 300 | Cmd.v info t 301 | 302 | let () = exit @@ Cmdliner.Cmd.eval term 303 | -------------------------------------------------------------------------------- /bytepdf.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | version: "dev" 3 | maintainer: "Drup " 4 | authors: "Drup " 5 | license: "ISC" 6 | homepage: "https://github.com/Drup/bytepdf" 7 | bug-reports: "https://github.com/Drup/bytepdf/issues" 8 | dev-repo: "git+https://github.com/Drup/bytepdf.git" 9 | synopsis: "Tool to create PDFs that are also OCaml bytecodes" 10 | description: """\ 11 | The `bytepdf` tool allows you to take a PDF `foo.pdf` and an OCaml bytecode `foo.byte` and merges them into a file that is both a valid PDF and a valid bytecode. 12 | 13 | ``` 14 | bytepdf --bc foo.byte --pdf foo.pdf -o bar.pdf 15 | ``` 16 | 17 | The resulting file can both be read as a pdf and executed by the ocaml interpreter: 18 | 19 | ``` 20 | open bar.pdf 21 | ocamlrun bar.pdf 22 | ``` 23 | 24 | Furthermore, if you open the PDF with Acrobat Reader, the PDF will contain the OCaml bytecode as a file attachment. For more details, you can read the help. 25 | The only current limitation is that the bytecode should not 26 | have been statically linked with C code.""" 27 | 28 | depends: [ 29 | "ocaml" {>= "4.10" & < "5.0" } 30 | "dune" {>= "1.1"} 31 | "containers" {>= "0.12"} 32 | "bos" 33 | "cmdliner" 34 | "obytelib" {>= "1.6"} 35 | "camlpdf" {>= "2.6"} 36 | ] 37 | build: [ 38 | ["dune" "subst"] {dev} 39 | ["dune" "build" "-p" name "-j" jobs] 40 | ] 41 | -------------------------------------------------------------------------------- /dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name bytepdf) 3 | (public_name bytepdf) 4 | (modules Bytepdf) 5 | (libraries obytelib camlpdf cmdliner bos astring)) 6 | 7 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 1.1) 2 | (name bytepdf) 3 | --------------------------------------------------------------------------------