├── README.md
├── LICENSE
├── pdfpasswd
    └── main.go
├── ps.go
├── text.go
├── lex.go
├── page.go
└── read.go


/README.md:
--------------------------------------------------------------------------------
1 | go get rsc.io/pdf
2 | 
3 | http://godoc.org/rsc.io/pdf
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009 The Go Authors. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 |    * Redistributions in binary form must reproduce the above
10 | copyright notice, this list of conditions and the following disclaimer
11 | in the documentation and/or other materials provided with the
12 | distribution.
13 |    * Neither the name of Google Inc. nor the names of its
14 | contributors may be used to endorse or promote products derived from
15 | this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/pdfpasswd/main.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Pdfpasswd searches for the password for an encrypted PDF
  6 | // by trying all strings over a given alphabet up to a given length.
  7 | package main
  8 | 
  9 | import (
 10 | 	"flag"
 11 | 	"fmt"
 12 | 	"log"
 13 | 	"os"
 14 | 
 15 | 	"rsc.io/pdf"
 16 | )
 17 | 
 18 | var (
 19 | 	alphabet  = flag.String("a", "0123456789", "alphabet")
 20 | 	maxLength = flag.Int("m", 4, "max length")
 21 | )
 22 | 
 23 | func usage() {
 24 | 	fmt.Fprintf(os.Stderr, "usage: pdfpasswd [-a alphabet] [-m maxlength] file\n")
 25 | 	os.Exit(2)
 26 | }
 27 | 
 28 | func main() {
 29 | 	log.SetFlags(0)
 30 | 	log.SetPrefix("pdfpasswd: ")
 31 | 
 32 | 	flag.Usage = usage
 33 | 	flag.Parse()
 34 | 	if flag.NArg() != 1 {
 35 | 		usage()
 36 | 	}
 37 | 
 38 | 	f, err := os.Open(flag.Arg(0))
 39 | 	if err != nil {
 40 | 		log.Fatal(err)
 41 | 	}
 42 | 
 43 | 	last := ""
 44 | 	alpha := *alphabet
 45 | 	ctr := make([]int, *maxLength)
 46 | 	pw := func() string {
 47 | 		inc(ctr, len(alpha)+1)
 48 | 		for !valid(ctr) {
 49 | 			inc(ctr, len(alpha)+1)
 50 | 		}
 51 | 		if done(ctr) {
 52 | 			return ""
 53 | 		}
 54 | 		buf := make([]byte, len(ctr))
 55 | 		var i int
 56 | 		for i = 0; i < len(buf); i++ {
 57 | 			if ctr[i] == 0 {
 58 | 				break
 59 | 			}
 60 | 			buf[i] = alpha[ctr[i]-1]
 61 | 		}
 62 | 		last = string(buf[:i])
 63 | 		println(last)
 64 | 		return last
 65 | 	}
 66 | 	st, err := f.Stat()
 67 | 	if err != nil {
 68 | 		log.Fatal(err)
 69 | 	}
 70 | 	_, err = pdf.NewReaderEncrypted(f, st.Size(), pw)
 71 | 	if err != nil {
 72 | 		if err == pdf.ErrInvalidPassword {
 73 | 			log.Fatal("password not found")
 74 | 		}
 75 | 		log.Fatal("reading pdf: %v", err)
 76 | 	}
 77 | 	fmt.Printf("password: %q\n", last)
 78 | }
 79 | 
 80 | func inc(ctr []int, n int) {
 81 | 	for i := 0; i < len(ctr); i++ {
 82 | 		ctr[i]++
 83 | 		if ctr[i] < n {
 84 | 			break
 85 | 		}
 86 | 		ctr[i] = 0
 87 | 	}
 88 | }
 89 | 
 90 | func done(ctr []int) bool {
 91 | 	for _, x := range ctr {
 92 | 		if x != 0 {
 93 | 			return false
 94 | 		}
 95 | 	}
 96 | 	return true
 97 | }
 98 | 
 99 | func valid(ctr []int) bool {
100 | 	i := len(ctr)
101 | 	for i > 0 && ctr[i-1] == 0 {
102 | 		i--
103 | 	}
104 | 	for i--; i >= 0; i-- {
105 | 		if ctr[i] == 0 {
106 | 			return false
107 | 		}
108 | 	}
109 | 	return true
110 | }
111 | 


--------------------------------------------------------------------------------
/ps.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package pdf
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"io"
 10 | )
 11 | 
 12 | // A Stack represents a stack of values.
 13 | type Stack struct {
 14 | 	stack []Value
 15 | }
 16 | 
 17 | func (stk *Stack) Len() int {
 18 | 	return len(stk.stack)
 19 | }
 20 | 
 21 | func (stk *Stack) Push(v Value) {
 22 | 	stk.stack = append(stk.stack, v)
 23 | }
 24 | 
 25 | func (stk *Stack) Pop() Value {
 26 | 	n := len(stk.stack)
 27 | 	if n == 0 {
 28 | 		return Value{}
 29 | 	}
 30 | 	v := stk.stack[n-1]
 31 | 	stk.stack[n-1] = Value{}
 32 | 	stk.stack = stk.stack[:n-1]
 33 | 	return v
 34 | }
 35 | 
 36 | func newDict() Value {
 37 | 	return Value{nil, objptr{}, make(dict)}
 38 | }
 39 | 
 40 | // Interpret interprets the content in a stream as a basic PostScript program,
 41 | // pushing values onto a stack and then calling the do function to execute
 42 | // operators. The do function may push or pop values from the stack as needed
 43 | // to implement op.
 44 | //
 45 | // Interpret handles the operators "dict", "currentdict", "begin", "end", "def", and "pop" itself.
 46 | //
 47 | // Interpret is not a full-blown PostScript interpreter. Its job is to handle the
 48 | // very limited PostScript found in certain supporting file formats embedded
 49 | // in PDF files, such as cmap files that describe the mapping from font code
 50 | // points to Unicode code points.
 51 | //
 52 | // There is no support for executable blocks, among other limitations.
 53 | //
 54 | func Interpret(strm Value, do func(stk *Stack, op string)) {
 55 | 	rd := strm.Reader()
 56 | 	b := newBuffer(rd, 0)
 57 | 	b.allowEOF = true
 58 | 	b.allowObjptr = false
 59 | 	b.allowStream = false
 60 | 	var stk Stack
 61 | 	var dicts []dict
 62 | Reading:
 63 | 	for {
 64 | 		tok := b.readToken()
 65 | 		if tok == io.EOF {
 66 | 			break
 67 | 		}
 68 | 		if kw, ok := tok.(keyword); ok {
 69 | 			switch kw {
 70 | 			case "null", "[", "]", "<<", ">>":
 71 | 				break
 72 | 			default:
 73 | 				for i := len(dicts) - 1; i >= 0; i-- {
 74 | 					if v, ok := dicts[i][name(kw)]; ok {
 75 | 						stk.Push(Value{nil, objptr{}, v})
 76 | 						continue Reading
 77 | 					}
 78 | 				}
 79 | 				do(&stk, string(kw))
 80 | 				continue
 81 | 			case "dict":
 82 | 				stk.Pop()
 83 | 				stk.Push(Value{nil, objptr{}, make(dict)})
 84 | 				continue
 85 | 			case "currentdict":
 86 | 				if len(dicts) == 0 {
 87 | 					panic("no current dictionary")
 88 | 				}
 89 | 				stk.Push(Value{nil, objptr{}, dicts[len(dicts)-1]})
 90 | 				continue
 91 | 			case "begin":
 92 | 				d := stk.Pop()
 93 | 				if d.Kind() != Dict {
 94 | 					panic("cannot begin non-dict")
 95 | 				}
 96 | 				dicts = append(dicts, d.data.(dict))
 97 | 				continue
 98 | 			case "end":
 99 | 				if len(dicts) <= 0 {
100 | 					panic("mismatched begin/end")
101 | 				}
102 | 				dicts = dicts[:len(dicts)-1]
103 | 				continue
104 | 			case "def":
105 | 				if len(dicts) <= 0 {
106 | 					panic("def without open dict")
107 | 				}
108 | 				val := stk.Pop()
109 | 				key, ok := stk.Pop().data.(name)
110 | 				if !ok {
111 | 					panic("def of non-name")
112 | 				}
113 | 				dicts[len(dicts)-1][key] = val.data
114 | 				continue
115 | 			case "pop":
116 | 				stk.Pop()
117 | 				continue
118 | 			}
119 | 		}
120 | 		b.unreadToken(tok)
121 | 		obj := b.readObject()
122 | 		stk.Push(Value{nil, objptr{}, obj})
123 | 	}
124 | }
125 | 
126 | type seqReader struct {
127 | 	rd     io.Reader
128 | 	offset int64
129 | }
130 | 
131 | func (r *seqReader) ReadAt(buf []byte, offset int64) (int, error) {
132 | 	if offset != r.offset {
133 | 		return 0, fmt.Errorf("non-sequential read of stream")
134 | 	}
135 | 	n, err := io.ReadFull(r.rd, buf)
136 | 	r.offset += int64(n)
137 | 	return n, err
138 | }
139 | 


--------------------------------------------------------------------------------
/text.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package pdf
  6 | 
  7 | import (
  8 | 	"unicode"
  9 | 	"unicode/utf16"
 10 | )
 11 | 
 12 | const noRune = unicode.ReplacementChar
 13 | 
 14 | func isPDFDocEncoded(s string) bool {
 15 | 	if isUTF16(s) {
 16 | 		return false
 17 | 	}
 18 | 	for i := 0; i < len(s); i++ {
 19 | 		if pdfDocEncoding[s[i]] == noRune {
 20 | 			return false
 21 | 		}
 22 | 	}
 23 | 	return true
 24 | }
 25 | 
 26 | func pdfDocDecode(s string) string {
 27 | 	for i := 0; i < len(s); i++ {
 28 | 		if s[i] >= 0x80 || pdfDocEncoding[s[i]] != rune(s[i]) {
 29 | 			goto Decode
 30 | 		}
 31 | 	}
 32 | 	return s
 33 | 
 34 | Decode:
 35 | 	r := make([]rune, len(s))
 36 | 	for i := 0; i < len(s); i++ {
 37 | 		r[i] = pdfDocEncoding[s[i]]
 38 | 	}
 39 | 	return string(r)
 40 | }
 41 | 
 42 | func isUTF16(s string) bool {
 43 | 	return len(s) >= 2 && s[0] == 0xfe && s[1] == 0xff && len(s)%2 == 0
 44 | }
 45 | 
 46 | func utf16Decode(s string) string {
 47 | 	var u []uint16
 48 | 	for i := 0; i < len(s); i += 2 {
 49 | 		u = append(u, uint16(s[i])<<8|uint16(s[i+1]))
 50 | 	}
 51 | 	return string(utf16.Decode(u))
 52 | }
 53 | 
 54 | // See PDF 32000-1:2008, Table D.2
 55 | var pdfDocEncoding = [256]rune{
 56 | 	noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
 57 | 	noRune, 0x0009, 0x000a, noRune, noRune, 0x000d, noRune, noRune,
 58 | 	noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune,
 59 | 	0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc,
 60 | 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
 61 | 	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
 62 | 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 63 | 	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
 64 | 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
 65 | 	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
 66 | 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
 67 | 	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
 68 | 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
 69 | 	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
 70 | 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
 71 | 	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, noRune,
 72 | 	0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044,
 73 | 	0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018,
 74 | 	0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160,
 75 | 	0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, noRune,
 76 | 	0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
 77 | 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, noRune, 0x00ae, 0x00af,
 78 | 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
 79 | 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
 80 | 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
 81 | 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
 82 | 	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
 83 | 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
 84 | 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
 85 | 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
 86 | 	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
 87 | 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
 88 | }
 89 | 
 90 | var winAnsiEncoding = [256]rune{
 91 | 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
 92 | 	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
 93 | 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
 94 | 	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
 95 | 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
 96 | 	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
 97 | 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
 98 | 	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
 99 | 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
100 | 	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
101 | 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
102 | 	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
103 | 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
104 | 	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
105 | 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
106 | 	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
107 | 	0x20ac, noRune, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
108 | 	0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, noRune, 0x017d, noRune,
109 | 	noRune, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
110 | 	0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, noRune, 0x017e, 0x0178,
111 | 	0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
112 | 	0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
113 | 	0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
114 | 	0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
115 | 	0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
116 | 	0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
117 | 	0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
118 | 	0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
119 | 	0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
120 | 	0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
121 | 	0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
122 | 	0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
123 | }
124 | 
125 | var macRomanEncoding = [256]rune{
126 | 	0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
127 | 	0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
128 | 	0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
129 | 	0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
130 | 	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
131 | 	0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
132 | 	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
133 | 	0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
134 | 	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
135 | 	0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
136 | 	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
137 | 	0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
138 | 	0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
139 | 	0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
140 | 	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
141 | 	0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
142 | 	0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1,
143 | 	0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8,
144 | 	0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3,
145 | 	0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc,
146 | 	0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
147 | 	0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
148 | 	0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
149 | 	0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
150 | 	0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
151 | 	0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153,
152 | 	0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
153 | 	0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
154 | 	0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1,
155 | 	0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4,
156 | 	0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc,
157 | 	0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7,
158 | }
159 | 


--------------------------------------------------------------------------------
/lex.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Reading of PDF tokens and objects from a raw byte stream.
  6 | 
  7 | package pdf
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 	"io"
 12 | 	"strconv"
 13 | )
 14 | 
 15 | // A token is a PDF token in the input stream, one of the following Go types:
 16 | //
 17 | //	bool, a PDF boolean
 18 | //	int64, a PDF integer
 19 | //	float64, a PDF real
 20 | //	string, a PDF string literal
 21 | //	keyword, a PDF keyword
 22 | //	name, a PDF name without the leading slash
 23 | //
 24 | type token interface{}
 25 | 
 26 | // A name is a PDF name, without the leading slash.
 27 | type name string
 28 | 
 29 | // A keyword is a PDF keyword.
 30 | // Delimiter tokens used in higher-level syntax,
 31 | // such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords.
 32 | type keyword string
 33 | 
 34 | // A buffer holds buffered input bytes from the PDF file.
 35 | type buffer struct {
 36 | 	r           io.Reader // source of data
 37 | 	buf         []byte    // buffered data
 38 | 	pos         int       // read index in buf
 39 | 	offset      int64     // offset at end of buf; aka offset of next read
 40 | 	tmp         []byte    // scratch space for accumulating token
 41 | 	unread      []token   // queue of read but then unread tokens
 42 | 	allowEOF    bool
 43 | 	allowObjptr bool
 44 | 	allowStream bool
 45 | 	eof         bool
 46 | 	key         []byte
 47 | 	useAES      bool
 48 | 	objptr      objptr
 49 | }
 50 | 
 51 | // newBuffer returns a new buffer reading from r at the given offset.
 52 | func newBuffer(r io.Reader, offset int64) *buffer {
 53 | 	return &buffer{
 54 | 		r:           r,
 55 | 		offset:      offset,
 56 | 		buf:         make([]byte, 0, 4096),
 57 | 		allowObjptr: true,
 58 | 		allowStream: true,
 59 | 	}
 60 | }
 61 | 
 62 | func (b *buffer) seek(offset int64) {
 63 | 	b.offset = offset
 64 | 	b.buf = b.buf[:0]
 65 | 	b.pos = 0
 66 | 	b.unread = b.unread[:0]
 67 | }
 68 | 
 69 | func (b *buffer) readByte() byte {
 70 | 	if b.pos >= len(b.buf) {
 71 | 		b.reload()
 72 | 		if b.pos >= len(b.buf) {
 73 | 			return '\n'
 74 | 		}
 75 | 	}
 76 | 	c := b.buf[b.pos]
 77 | 	b.pos++
 78 | 	return c
 79 | }
 80 | 
 81 | func (b *buffer) errorf(format string, args ...interface{}) {
 82 | 	panic(fmt.Errorf(format, args...))
 83 | }
 84 | 
 85 | func (b *buffer) reload() bool {
 86 | 	n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
 87 | 	n, err := b.r.Read(b.buf[:n])
 88 | 	if n == 0 && err != nil {
 89 | 		b.buf = b.buf[:0]
 90 | 		b.pos = 0
 91 | 		if b.allowEOF && err == io.EOF {
 92 | 			b.eof = true
 93 | 			return false
 94 | 		}
 95 | 		b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
 96 | 		return false
 97 | 	}
 98 | 	b.offset += int64(n)
 99 | 	b.buf = b.buf[:n]
100 | 	b.pos = 0
101 | 	return true
102 | }
103 | 
104 | func (b *buffer) seekForward(offset int64) {
105 | 	for b.offset < offset {
106 | 		if !b.reload() {
107 | 			return
108 | 		}
109 | 	}
110 | 	b.pos = len(b.buf) - int(b.offset-offset)
111 | }
112 | 
113 | func (b *buffer) readOffset() int64 {
114 | 	return b.offset - int64(len(b.buf)) + int64(b.pos)
115 | }
116 | 
117 | func (b *buffer) unreadByte() {
118 | 	if b.pos > 0 {
119 | 		b.pos--
120 | 	}
121 | }
122 | 
123 | func (b *buffer) unreadToken(t token) {
124 | 	b.unread = append(b.unread, t)
125 | }
126 | 
127 | func (b *buffer) readToken() token {
128 | 	if n := len(b.unread); n > 0 {
129 | 		t := b.unread[n-1]
130 | 		b.unread = b.unread[:n-1]
131 | 		return t
132 | 	}
133 | 
134 | 	// Find first non-space, non-comment byte.
135 | 	c := b.readByte()
136 | 	for {
137 | 		if isSpace(c) {
138 | 			if b.eof {
139 | 				return io.EOF
140 | 			}
141 | 			c = b.readByte()
142 | 		} else if c == '%' {
143 | 			for c != '\r' && c != '\n' {
144 | 				c = b.readByte()
145 | 			}
146 | 		} else {
147 | 			break
148 | 		}
149 | 	}
150 | 
151 | 	switch c {
152 | 	case '<':
153 | 		if b.readByte() == '<' {
154 | 			return keyword("<<")
155 | 		}
156 | 		b.unreadByte()
157 | 		return b.readHexString()
158 | 
159 | 	case '(':
160 | 		return b.readLiteralString()
161 | 
162 | 	case '[', ']', '{', '}':
163 | 		return keyword(string(c))
164 | 
165 | 	case '/':
166 | 		return b.readName()
167 | 
168 | 	case '>':
169 | 		if b.readByte() == '>' {
170 | 			return keyword(">>")
171 | 		}
172 | 		b.unreadByte()
173 | 		fallthrough
174 | 
175 | 	default:
176 | 		if isDelim(c) {
177 | 			b.errorf("unexpected delimiter %#q", rune(c))
178 | 			return nil
179 | 		}
180 | 		b.unreadByte()
181 | 		return b.readKeyword()
182 | 	}
183 | }
184 | 
185 | func (b *buffer) readHexString() token {
186 | 	tmp := b.tmp[:0]
187 | 	for {
188 | 	Loop:
189 | 		c := b.readByte()
190 | 		if c == '>' {
191 | 			break
192 | 		}
193 | 		if isSpace(c) {
194 | 			goto Loop
195 | 		}
196 | 	Loop2:
197 | 		c2 := b.readByte()
198 | 		if isSpace(c2) {
199 | 			goto Loop2
200 | 		}
201 | 		x := unhex(c)<<4 | unhex(c2)
202 | 		if x < 0 {
203 | 			b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
204 | 			break
205 | 		}
206 | 		tmp = append(tmp, byte(x))
207 | 	}
208 | 	b.tmp = tmp
209 | 	return string(tmp)
210 | }
211 | 
212 | func unhex(b byte) int {
213 | 	switch {
214 | 	case '0' <= b && b <= '9':
215 | 		return int(b) - '0'
216 | 	case 'a' <= b && b <= 'f':
217 | 		return int(b) - 'a' + 10
218 | 	case 'A' <= b && b <= 'F':
219 | 		return int(b) - 'A' + 10
220 | 	}
221 | 	return -1
222 | }
223 | 
224 | func (b *buffer) readLiteralString() token {
225 | 	tmp := b.tmp[:0]
226 | 	depth := 1
227 | Loop:
228 | 	for {
229 | 		c := b.readByte()
230 | 		switch c {
231 | 		default:
232 | 			tmp = append(tmp, c)
233 | 		case '(':
234 | 			depth++
235 | 			tmp = append(tmp, c)
236 | 		case ')':
237 | 			if depth--; depth == 0 {
238 | 				break Loop
239 | 			}
240 | 			tmp = append(tmp, c)
241 | 		case '\\':
242 | 			switch c = b.readByte(); c {
243 | 			default:
244 | 				b.errorf("invalid escape sequence \\%c", c)
245 | 				tmp = append(tmp, '\\', c)
246 | 			case 'n':
247 | 				tmp = append(tmp, '\n')
248 | 			case 'r':
249 | 				tmp = append(tmp, '\r')
250 | 			case 'b':
251 | 				tmp = append(tmp, '\b')
252 | 			case 't':
253 | 				tmp = append(tmp, '\t')
254 | 			case 'f':
255 | 				tmp = append(tmp, '\f')
256 | 			case '(', ')', '\\':
257 | 				tmp = append(tmp, c)
258 | 			case '\r':
259 | 				if b.readByte() != '\n' {
260 | 					b.unreadByte()
261 | 				}
262 | 				fallthrough
263 | 			case '\n':
264 | 				// no append
265 | 			case '0', '1', '2', '3', '4', '5', '6', '7':
266 | 				x := int(c - '0')
267 | 				for i := 0; i < 2; i++ {
268 | 					c = b.readByte()
269 | 					if c < '0' || c > '7' {
270 | 						b.unreadByte()
271 | 						break
272 | 					}
273 | 					x = x*8 + int(c-'0')
274 | 				}
275 | 				if x > 255 {
276 | 					b.errorf("invalid octal escape \\%03o", x)
277 | 				}
278 | 				tmp = append(tmp, byte(x))
279 | 			}
280 | 		}
281 | 	}
282 | 	b.tmp = tmp
283 | 	return string(tmp)
284 | }
285 | 
286 | func (b *buffer) readName() token {
287 | 	tmp := b.tmp[:0]
288 | 	for {
289 | 		c := b.readByte()
290 | 		if isDelim(c) || isSpace(c) {
291 | 			b.unreadByte()
292 | 			break
293 | 		}
294 | 		if c == '#' {
295 | 			x := unhex(b.readByte())<<4 | unhex(b.readByte())
296 | 			if x < 0 {
297 | 				b.errorf("malformed name")
298 | 			}
299 | 			tmp = append(tmp, byte(x))
300 | 			continue
301 | 		}
302 | 		tmp = append(tmp, c)
303 | 	}
304 | 	b.tmp = tmp
305 | 	return name(string(tmp))
306 | }
307 | 
308 | func (b *buffer) readKeyword() token {
309 | 	tmp := b.tmp[:0]
310 | 	for {
311 | 		c := b.readByte()
312 | 		if isDelim(c) || isSpace(c) {
313 | 			b.unreadByte()
314 | 			break
315 | 		}
316 | 		tmp = append(tmp, c)
317 | 	}
318 | 	b.tmp = tmp
319 | 	s := string(tmp)
320 | 	switch {
321 | 	case s == "true":
322 | 		return true
323 | 	case s == "false":
324 | 		return false
325 | 	case isInteger(s):
326 | 		x, err := strconv.ParseInt(s, 10, 64)
327 | 		if err != nil {
328 | 			b.errorf("invalid integer %s", s)
329 | 		}
330 | 		return x
331 | 	case isReal(s):
332 | 		x, err := strconv.ParseFloat(s, 64)
333 | 		if err != nil {
334 | 			b.errorf("invalid real %s", s)
335 | 		}
336 | 		return x
337 | 	}
338 | 	return keyword(string(tmp))
339 | }
340 | 
341 | func isInteger(s string) bool {
342 | 	if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
343 | 		s = s[1:]
344 | 	}
345 | 	if len(s) == 0 {
346 | 		return false
347 | 	}
348 | 	for _, c := range s {
349 | 		if c < '0' || '9' < c {
350 | 			return false
351 | 		}
352 | 	}
353 | 	return true
354 | }
355 | 
356 | func isReal(s string) bool {
357 | 	if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
358 | 		s = s[1:]
359 | 	}
360 | 	if len(s) == 0 {
361 | 		return false
362 | 	}
363 | 	ndot := 0
364 | 	for _, c := range s {
365 | 		if c == '.' {
366 | 			ndot++
367 | 			continue
368 | 		}
369 | 		if c < '0' || '9' < c {
370 | 			return false
371 | 		}
372 | 	}
373 | 	return ndot == 1
374 | }
375 | 
376 | // An object is a PDF syntax object, one of the following Go types:
377 | //
378 | //	bool, a PDF boolean
379 | //	int64, a PDF integer
380 | //	float64, a PDF real
381 | //	string, a PDF string literal
382 | //	name, a PDF name without the leading slash
383 | //	dict, a PDF dictionary
384 | //	array, a PDF array
385 | //	stream, a PDF stream
386 | //	objptr, a PDF object reference
387 | //	objdef, a PDF object definition
388 | //
389 | // An object may also be nil, to represent the PDF null.
390 | type object interface{}
391 | 
392 | type dict map[name]object
393 | 
394 | type array []object
395 | 
396 | type stream struct {
397 | 	hdr    dict
398 | 	ptr    objptr
399 | 	offset int64
400 | }
401 | 
402 | type objptr struct {
403 | 	id  uint32
404 | 	gen uint16
405 | }
406 | 
407 | type objdef struct {
408 | 	ptr objptr
409 | 	obj object
410 | }
411 | 
412 | func (b *buffer) readObject() object {
413 | 	tok := b.readToken()
414 | 	if kw, ok := tok.(keyword); ok {
415 | 		switch kw {
416 | 		case "null":
417 | 			return nil
418 | 		case "<<":
419 | 			return b.readDict()
420 | 		case "[":
421 | 			return b.readArray()
422 | 		}
423 | 		b.errorf("unexpected keyword %q parsing object", kw)
424 | 		return nil
425 | 	}
426 | 
427 | 	if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
428 | 		tok = decryptString(b.key, b.useAES, b.objptr, str)
429 | 	}
430 | 
431 | 	if !b.allowObjptr {
432 | 		return tok
433 | 	}
434 | 
435 | 	if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
436 | 		tok2 := b.readToken()
437 | 		if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 {
438 | 			tok3 := b.readToken()
439 | 			switch tok3 {
440 | 			case keyword("R"):
441 | 				return objptr{uint32(t1), uint16(t2)}
442 | 			case keyword("obj"):
443 | 				old := b.objptr
444 | 				b.objptr = objptr{uint32(t1), uint16(t2)}
445 | 				obj := b.readObject()
446 | 				if _, ok := obj.(stream); !ok {
447 | 					tok4 := b.readToken()
448 | 					if tok4 != keyword("endobj") {
449 | 						b.errorf("missing endobj after indirect object definition")
450 | 						b.unreadToken(tok4)
451 | 					}
452 | 				}
453 | 				b.objptr = old
454 | 				return objdef{objptr{uint32(t1), uint16(t2)}, obj}
455 | 			}
456 | 			b.unreadToken(tok3)
457 | 		}
458 | 		b.unreadToken(tok2)
459 | 	}
460 | 	return tok
461 | }
462 | 
463 | func (b *buffer) readArray() object {
464 | 	var x array
465 | 	for {
466 | 		tok := b.readToken()
467 | 		if tok == nil || tok == keyword("]") {
468 | 			break
469 | 		}
470 | 		b.unreadToken(tok)
471 | 		x = append(x, b.readObject())
472 | 	}
473 | 	return x
474 | }
475 | 
476 | func (b *buffer) readDict() object {
477 | 	x := make(dict)
478 | 	for {
479 | 		tok := b.readToken()
480 | 		if tok == nil || tok == keyword(">>") {
481 | 			break
482 | 		}
483 | 		n, ok := tok.(name)
484 | 		if !ok {
485 | 			b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
486 | 			continue
487 | 		}
488 | 		x[n] = b.readObject()
489 | 	}
490 | 
491 | 	if !b.allowStream {
492 | 		return x
493 | 	}
494 | 
495 | 	tok := b.readToken()
496 | 	if tok != keyword("stream") {
497 | 		b.unreadToken(tok)
498 | 		return x
499 | 	}
500 | 
501 | 	switch b.readByte() {
502 | 	case '\r':
503 | 		if b.readByte() != '\n' {
504 | 			b.unreadByte()
505 | 		}
506 | 	case '\n':
507 | 		// ok
508 | 	default:
509 | 		b.errorf("stream keyword not followed by newline")
510 | 	}
511 | 
512 | 	return stream{x, b.objptr, b.readOffset()}
513 | }
514 | 
515 | func isSpace(b byte) bool {
516 | 	switch b {
517 | 	case '\x00', '\t', '\n', '\f', '\r', ' ':
518 | 		return true
519 | 	}
520 | 	return false
521 | }
522 | 
523 | func isDelim(b byte) bool {
524 | 	switch b {
525 | 	case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%':
526 | 		return true
527 | 	}
528 | 	return false
529 | }
530 | 


--------------------------------------------------------------------------------
/page.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Go Authors.  All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | package pdf
  6 | 
  7 | import (
  8 | 	"fmt"
  9 | 	"strings"
 10 | )
 11 | 
 12 | // A Page represent a single page in a PDF file.
 13 | // The methods interpret a Page dictionary stored in V.
 14 | type Page struct {
 15 | 	V Value
 16 | }
 17 | 
 18 | // Page returns the page for the given page number.
 19 | // Page numbers are indexed starting at 1, not 0.
 20 | // If the page is not found, Page returns a Page with p.V.IsNull().
 21 | func (r *Reader) Page(num int) Page {
 22 | 	num-- // now 0-indexed
 23 | 	page := r.Trailer().Key("Root").Key("Pages")
 24 | Search:
 25 | 	for page.Key("Type").Name() == "Pages" {
 26 | 		count := int(page.Key("Count").Int64())
 27 | 		if count < num {
 28 | 			return Page{}
 29 | 		}
 30 | 		kids := page.Key("Kids")
 31 | 		for i := 0; i < kids.Len(); i++ {
 32 | 			kid := kids.Index(i)
 33 | 			if kid.Key("Type").Name() == "Pages" {
 34 | 				c := int(kid.Key("Count").Int64())
 35 | 				if num < c {
 36 | 					page = kid
 37 | 					continue Search
 38 | 				}
 39 | 				num -= c
 40 | 				continue
 41 | 			}
 42 | 			if kid.Key("Type").Name() == "Page" {
 43 | 				if num == 0 {
 44 | 					return Page{kid}
 45 | 				}
 46 | 				num--
 47 | 			}
 48 | 		}
 49 | 	}
 50 | 	return Page{}
 51 | }
 52 | 
 53 | // NumPage returns the number of pages in the PDF file.
 54 | func (r *Reader) NumPage() int {
 55 | 	return int(r.Trailer().Key("Root").Key("Pages").Key("Count").Int64())
 56 | }
 57 | 
 58 | func (p Page) findInherited(key string) Value {
 59 | 	for v := p.V; !v.IsNull(); v = v.Key("Parent") {
 60 | 		if r := v.Key(key); !r.IsNull() {
 61 | 			return r
 62 | 		}
 63 | 	}
 64 | 	return Value{}
 65 | }
 66 | 
 67 | /*
 68 | func (p Page) MediaBox() Value {
 69 | 	return p.findInherited("MediaBox")
 70 | }
 71 | 
 72 | func (p Page) CropBox() Value {
 73 | 	return p.findInherited("CropBox")
 74 | }
 75 | */
 76 | 
 77 | // Resources returns the resources dictionary associated with the page.
 78 | func (p Page) Resources() Value {
 79 | 	return p.findInherited("Resources")
 80 | }
 81 | 
 82 | // Fonts returns a list of the fonts associated with the page.
 83 | func (p Page) Fonts() []string {
 84 | 	return p.Resources().Key("Font").Keys()
 85 | }
 86 | 
 87 | // Font returns the font with the given name associated with the page.
 88 | func (p Page) Font(name string) Font {
 89 | 	return Font{p.Resources().Key("Font").Key(name)}
 90 | }
 91 | 
 92 | // A Font represent a font in a PDF file.
 93 | // The methods interpret a Font dictionary stored in V.
 94 | type Font struct {
 95 | 	V Value
 96 | }
 97 | 
 98 | // BaseFont returns the font's name (BaseFont property).
 99 | func (f Font) BaseFont() string {
100 | 	return f.V.Key("BaseFont").Name()
101 | }
102 | 
103 | // FirstChar returns the code point of the first character in the font.
104 | func (f Font) FirstChar() int {
105 | 	return int(f.V.Key("FirstChar").Int64())
106 | }
107 | 
108 | // LastChar returns the code point of the last character in the font.
109 | func (f Font) LastChar() int {
110 | 	return int(f.V.Key("LastChar").Int64())
111 | }
112 | 
113 | // Widths returns the widths of the glyphs in the font.
114 | // In a well-formed PDF, len(f.Widths()) == f.LastChar()+1 - f.FirstChar().
115 | func (f Font) Widths() []float64 {
116 | 	x := f.V.Key("Widths")
117 | 	var out []float64
118 | 	for i := 0; i < x.Len(); i++ {
119 | 		out = append(out, x.Index(i).Float64())
120 | 	}
121 | 	return out
122 | }
123 | 
124 | // Width returns the width of the given code point.
125 | func (f Font) Width(code int) float64 {
126 | 	first := f.FirstChar()
127 | 	last := f.LastChar()
128 | 	if code < first || last < code {
129 | 		return 0
130 | 	}
131 | 	return f.V.Key("Widths").Index(code - first).Float64()
132 | }
133 | 
134 | // Encoder returns the encoding between font code point sequences and UTF-8.
135 | func (f Font) Encoder() TextEncoding {
136 | 	enc := f.V.Key("Encoding")
137 | 	switch enc.Kind() {
138 | 	case Name:
139 | 		switch enc.Name() {
140 | 		case "WinAnsiEncoding":
141 | 			return &byteEncoder{&winAnsiEncoding}
142 | 		case "MacRomanEncoding":
143 | 			return &byteEncoder{&macRomanEncoding}
144 | 		case "Identity-H":
145 | 			// TODO: Should be big-endian UCS-2 decoder
146 | 			return &nopEncoder{}
147 | 		default:
148 | 			println("unknown encoding", enc.Name())
149 | 			return &nopEncoder{}
150 | 		}
151 | 	case Dict:
152 | 		return &dictEncoder{enc.Key("Differences")}
153 | 	case Null:
154 | 		// ok, try ToUnicode
155 | 	default:
156 | 		println("unexpected encoding", enc.String())
157 | 		return &nopEncoder{}
158 | 	}
159 | 
160 | 	toUnicode := f.V.Key("ToUnicode")
161 | 	if toUnicode.Kind() == Dict {
162 | 		m := readCmap(toUnicode)
163 | 		if m == nil {
164 | 			return &nopEncoder{}
165 | 		}
166 | 		return m
167 | 	}
168 | 
169 | 	return &byteEncoder{&pdfDocEncoding}
170 | }
171 | 
172 | type dictEncoder struct {
173 | 	v Value
174 | }
175 | 
176 | func (e *dictEncoder) Decode(raw string) (text string) {
177 | 	r := make([]rune, 0, len(raw))
178 | 	for i := 0; i < len(raw); i++ {
179 | 		ch := rune(raw[i])
180 | 		n := -1
181 | 		for j := 0; j < e.v.Len(); j++ {
182 | 			x := e.v.Index(j)
183 | 			if x.Kind() == Integer {
184 | 				n = int(x.Int64())
185 | 				continue
186 | 			}
187 | 			if x.Kind() == Name {
188 | 				if int(raw[i]) == n {
189 | 					r := nameToRune[x.Name()]
190 | 					if r != 0 {
191 | 						ch = r
192 | 						break
193 | 					}
194 | 				}
195 | 				n++
196 | 			}
197 | 		}
198 | 		r = append(r, ch)
199 | 	}
200 | 	return string(r)
201 | }
202 | 
203 | // A TextEncoding represents a mapping between
204 | // font code points and UTF-8 text.
205 | type TextEncoding interface {
206 | 	// Decode returns the UTF-8 text corresponding to
207 | 	// the sequence of code points in raw.
208 | 	Decode(raw string) (text string)
209 | }
210 | 
211 | type nopEncoder struct {
212 | }
213 | 
214 | func (e *nopEncoder) Decode(raw string) (text string) {
215 | 	return raw
216 | }
217 | 
218 | type byteEncoder struct {
219 | 	table *[256]rune
220 | }
221 | 
222 | func (e *byteEncoder) Decode(raw string) (text string) {
223 | 	r := make([]rune, 0, len(raw))
224 | 	for i := 0; i < len(raw); i++ {
225 | 		r = append(r, e.table[raw[i]])
226 | 	}
227 | 	return string(r)
228 | }
229 | 
230 | type cmap struct {
231 | 	space   [4][][2]string
232 | 	bfrange []bfrange
233 | }
234 | 
235 | func (m *cmap) Decode(raw string) (text string) {
236 | 	var r []rune
237 | Parse:
238 | 	for len(raw) > 0 {
239 | 		for n := 1; n <= 4 && n <= len(raw); n++ {
240 | 			for _, space := range m.space[n-1] {
241 | 				if space[0] <= raw[:n] && raw[:n] <= space[1] {
242 | 					text := raw[:n]
243 | 					raw = raw[n:]
244 | 					for _, bf := range m.bfrange {
245 | 						if len(bf.lo) == n && bf.lo <= text && text <= bf.hi {
246 | 							if bf.dst.Kind() == String {
247 | 								s := bf.dst.RawString()
248 | 								if bf.lo != text {
249 | 									b := []byte(s)
250 | 									b[len(b)-1] += text[len(text)-1] - bf.lo[len(bf.lo)-1]
251 | 									s = string(b)
252 | 								}
253 | 								r = append(r, []rune(utf16Decode(s))...)
254 | 								continue Parse
255 | 							}
256 | 							if bf.dst.Kind() == Array {
257 | 								fmt.Printf("array %v\n", bf.dst)
258 | 							} else {
259 | 								fmt.Printf("unknown dst %v\n", bf.dst)
260 | 							}
261 | 							r = append(r, noRune)
262 | 							continue Parse
263 | 						}
264 | 					}
265 | 					fmt.Printf("no text for %q", text)
266 | 					r = append(r, noRune)
267 | 					continue Parse
268 | 				}
269 | 			}
270 | 		}
271 | 		println("no code space found")
272 | 		r = append(r, noRune)
273 | 		raw = raw[1:]
274 | 	}
275 | 	return string(r)
276 | }
277 | 
278 | type bfrange struct {
279 | 	lo  string
280 | 	hi  string
281 | 	dst Value
282 | }
283 | 
284 | func readCmap(toUnicode Value) *cmap {
285 | 	n := -1
286 | 	var m cmap
287 | 	ok := true
288 | 	Interpret(toUnicode, func(stk *Stack, op string) {
289 | 		if !ok {
290 | 			return
291 | 		}
292 | 		switch op {
293 | 		case "findresource":
294 | 			category := stk.Pop()
295 | 			key := stk.Pop()
296 | 			fmt.Println("findresource", key, category)
297 | 			stk.Push(newDict())
298 | 		case "begincmap":
299 | 			stk.Push(newDict())
300 | 		case "endcmap":
301 | 			stk.Pop()
302 | 		case "begincodespacerange":
303 | 			n = int(stk.Pop().Int64())
304 | 		case "endcodespacerange":
305 | 			if n < 0 {
306 | 				println("missing begincodespacerange")
307 | 				ok = false
308 | 				return
309 | 			}
310 | 			for i := 0; i < n; i++ {
311 | 				hi, lo := stk.Pop().RawString(), stk.Pop().RawString()
312 | 				if len(lo) == 0 || len(lo) != len(hi) {
313 | 					println("bad codespace range")
314 | 					ok = false
315 | 					return
316 | 				}
317 | 				m.space[len(lo)-1] = append(m.space[len(lo)-1], [2]string{lo, hi})
318 | 			}
319 | 			n = -1
320 | 		case "beginbfrange":
321 | 			n = int(stk.Pop().Int64())
322 | 		case "endbfrange":
323 | 			if n < 0 {
324 | 				panic("missing beginbfrange")
325 | 			}
326 | 			for i := 0; i < n; i++ {
327 | 				dst, srcHi, srcLo := stk.Pop(), stk.Pop().RawString(), stk.Pop().RawString()
328 | 				m.bfrange = append(m.bfrange, bfrange{srcLo, srcHi, dst})
329 | 			}
330 | 		case "defineresource":
331 | 			category := stk.Pop().Name()
332 | 			value := stk.Pop()
333 | 			key := stk.Pop().Name()
334 | 			fmt.Println("defineresource", key, value, category)
335 | 			stk.Push(value)
336 | 		default:
337 | 			println("interp\t", op)
338 | 		}
339 | 	})
340 | 	if !ok {
341 | 		return nil
342 | 	}
343 | 	return &m
344 | }
345 | 
346 | type matrix [3][3]float64
347 | 
348 | var ident = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}
349 | 
350 | func (x matrix) mul(y matrix) matrix {
351 | 	var z matrix
352 | 	for i := 0; i < 3; i++ {
353 | 		for j := 0; j < 3; j++ {
354 | 			for k := 0; k < 3; k++ {
355 | 				z[i][j] += x[i][k] * y[k][j]
356 | 			}
357 | 		}
358 | 	}
359 | 	return z
360 | }
361 | 
362 | // A Text represents a single piece of text drawn on a page.
363 | type Text struct {
364 | 	Font     string  // the font used
365 | 	FontSize float64 // the font size, in points (1/72 of an inch)
366 | 	X        float64 // the X coordinate, in points, increasing left to right
367 | 	Y        float64 // the Y coordinate, in points, increasing bottom to top
368 | 	W        float64 // the width of the text, in points
369 | 	S        string  // the actual UTF-8 text
370 | }
371 | 
372 | // A Rect represents a rectangle.
373 | type Rect struct {
374 | 	Min, Max Point
375 | }
376 | 
377 | // A Point represents an X, Y pair.
378 | type Point struct {
379 | 	X float64
380 | 	Y float64
381 | }
382 | 
383 | // Content describes the basic content on a page: the text and any drawn rectangles.
384 | type Content struct {
385 | 	Text []Text
386 | 	Rect []Rect
387 | }
388 | 
389 | type gstate struct {
390 | 	Tc    float64
391 | 	Tw    float64
392 | 	Th    float64
393 | 	Tl    float64
394 | 	Tf    Font
395 | 	Tfs   float64
396 | 	Tmode int
397 | 	Trise float64
398 | 	Tm    matrix
399 | 	Tlm   matrix
400 | 	Trm   matrix
401 | 	CTM   matrix
402 | }
403 | 
404 | // Content returns the page's content.
405 | func (p Page) Content() Content {
406 | 	strm := p.V.Key("Contents")
407 | 	var enc TextEncoding = &nopEncoder{}
408 | 
409 | 	var g = gstate{
410 | 		Th:  1,
411 | 		CTM: ident,
412 | 	}
413 | 
414 | 	var text []Text
415 | 	showText := func(s string) {
416 | 		n := 0
417 | 		for _, ch := range enc.Decode(s) {
418 | 			Trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM)
419 | 			w0 := g.Tf.Width(int(s[n]))
420 | 			n++
421 | 			if ch != ' ' {
422 | 				f := g.Tf.BaseFont()
423 | 				if i := strings.Index(f, "+"); i >= 0 {
424 | 					f = f[i+1:]
425 | 				}
426 | 				text = append(text, Text{f, Trm[0][0], Trm[2][0], Trm[2][1], w0 / 1000 * Trm[0][0], string(ch)})
427 | 			}
428 | 			tx := w0/1000*g.Tfs + g.Tc
429 | 			if ch == ' ' {
430 | 				tx += g.Tw
431 | 			}
432 | 			tx *= g.Th
433 | 			g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
434 | 		}
435 | 	}
436 | 
437 | 	var rect []Rect
438 | 	var gstack []gstate
439 | 	Interpret(strm, func(stk *Stack, op string) {
440 | 		n := stk.Len()
441 | 		args := make([]Value, n)
442 | 		for i := n - 1; i >= 0; i-- {
443 | 			args[i] = stk.Pop()
444 | 		}
445 | 		switch op {
446 | 		default:
447 | 			//fmt.Println(op, args)
448 | 			return
449 | 
450 | 		case "cm": // update g.CTM
451 | 			if len(args) != 6 {
452 | 				panic("bad g.Tm")
453 | 			}
454 | 			var m matrix
455 | 			for i := 0; i < 6; i++ {
456 | 				m[i/2][i%2] = args[i].Float64()
457 | 			}
458 | 			m[2][2] = 1
459 | 			g.CTM = m.mul(g.CTM)
460 | 
461 | 		case "gs": // set parameters from graphics state resource
462 | 			gs := p.Resources().Key("ExtGState").Key(args[0].Name())
463 | 			font := gs.Key("Font")
464 | 			if font.Kind() == Array && font.Len() == 2 {
465 | 				//fmt.Println("FONT", font)
466 | 			}
467 | 
468 | 		case "f": // fill
469 | 		case "g": // setgray
470 | 		case "l": // lineto
471 | 		case "m": // moveto
472 | 
473 | 		case "cs": // set colorspace non-stroking
474 | 		case "scn": // set color non-stroking
475 | 
476 | 		case "re": // append rectangle to path
477 | 			if len(args) != 4 {
478 | 				panic("bad re")
479 | 			}
480 | 			x, y, w, h := args[0].Float64(), args[1].Float64(), args[2].Float64(), args[3].Float64()
481 | 			rect = append(rect, Rect{Point{x, y}, Point{x + w, y + h}})
482 | 
483 | 		case "q": // save graphics state
484 | 			gstack = append(gstack, g)
485 | 
486 | 		case "Q": // restore graphics state
487 | 			n := len(gstack) - 1
488 | 			g = gstack[n]
489 | 			gstack = gstack[:n]
490 | 
491 | 		case "BT": // begin text (reset text matrix and line matrix)
492 | 			g.Tm = ident
493 | 			g.Tlm = g.Tm
494 | 
495 | 		case "ET": // end text
496 | 
497 | 		case "T*": // move to start of next line
498 | 			x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
499 | 			g.Tlm = x.mul(g.Tlm)
500 | 			g.Tm = g.Tlm
501 | 
502 | 		case "Tc": // set character spacing
503 | 			if len(args) != 1 {
504 | 				panic("bad g.Tc")
505 | 			}
506 | 			g.Tc = args[0].Float64()
507 | 
508 | 		case "TD": // move text position and set leading
509 | 			if len(args) != 2 {
510 | 				panic("bad Td")
511 | 			}
512 | 			g.Tl = -args[1].Float64()
513 | 			fallthrough
514 | 		case "Td": // move text position
515 | 			if len(args) != 2 {
516 | 				panic("bad Td")
517 | 			}
518 | 			tx := args[0].Float64()
519 | 			ty := args[1].Float64()
520 | 			x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}}
521 | 			g.Tlm = x.mul(g.Tlm)
522 | 			g.Tm = g.Tlm
523 | 
524 | 		case "Tf": // set text font and size
525 | 			if len(args) != 2 {
526 | 				panic("bad TL")
527 | 			}
528 | 			f := args[0].Name()
529 | 			g.Tf = p.Font(f)
530 | 			enc = g.Tf.Encoder()
531 | 			if enc == nil {
532 | 				println("no cmap for", f)
533 | 				enc = &nopEncoder{}
534 | 			}
535 | 			g.Tfs = args[1].Float64()
536 | 
537 | 		case "\"": // set spacing, move to next line, and show text
538 | 			if len(args) != 3 {
539 | 				panic("bad \" operator")
540 | 			}
541 | 			g.Tw = args[0].Float64()
542 | 			g.Tc = args[1].Float64()
543 | 			args = args[2:]
544 | 			fallthrough
545 | 		case "'": // move to next line and show text
546 | 			if len(args) != 1 {
547 | 				panic("bad ' operator")
548 | 			}
549 | 			x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}}
550 | 			g.Tlm = x.mul(g.Tlm)
551 | 			g.Tm = g.Tlm
552 | 			fallthrough
553 | 		case "Tj": // show text
554 | 			if len(args) != 1 {
555 | 				panic("bad Tj operator")
556 | 			}
557 | 			showText(args[0].RawString())
558 | 
559 | 		case "TJ": // show text, allowing individual glyph positioning
560 | 			v := args[0]
561 | 			for i := 0; i < v.Len(); i++ {
562 | 				x := v.Index(i)
563 | 				if x.Kind() == String {
564 | 					showText(x.RawString())
565 | 				} else {
566 | 					tx := -x.Float64() / 1000 * g.Tfs * g.Th
567 | 					g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm)
568 | 				}
569 | 			}
570 | 
571 | 		case "TL": // set text leading
572 | 			if len(args) != 1 {
573 | 				panic("bad TL")
574 | 			}
575 | 			g.Tl = args[0].Float64()
576 | 
577 | 		case "Tm": // set text matrix and line matrix
578 | 			if len(args) != 6 {
579 | 				panic("bad g.Tm")
580 | 			}
581 | 			var m matrix
582 | 			for i := 0; i < 6; i++ {
583 | 				m[i/2][i%2] = args[i].Float64()
584 | 			}
585 | 			m[2][2] = 1
586 | 			g.Tm = m
587 | 			g.Tlm = m
588 | 
589 | 		case "Tr": // set text rendering mode
590 | 			if len(args) != 1 {
591 | 				panic("bad Tr")
592 | 			}
593 | 			g.Tmode = int(args[0].Int64())
594 | 
595 | 		case "Ts": // set text rise
596 | 			if len(args) != 1 {
597 | 				panic("bad Ts")
598 | 			}
599 | 			g.Trise = args[0].Float64()
600 | 
601 | 		case "Tw": // set word spacing
602 | 			if len(args) != 1 {
603 | 				panic("bad g.Tw")
604 | 			}
605 | 			g.Tw = args[0].Float64()
606 | 
607 | 		case "Tz": // set horizontal text scaling
608 | 			if len(args) != 1 {
609 | 				panic("bad Tz")
610 | 			}
611 | 			g.Th = args[0].Float64() / 100
612 | 		}
613 | 	})
614 | 	return Content{text, rect}
615 | }
616 | 
617 | // TextVertical implements sort.Interface for sorting
618 | // a slice of Text values in vertical order, top to bottom,
619 | // and then left to right within a line.
620 | type TextVertical []Text
621 | 
622 | func (x TextVertical) Len() int      { return len(x) }
623 | func (x TextVertical) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
624 | func (x TextVertical) Less(i, j int) bool {
625 | 	if x[i].Y != x[j].Y {
626 | 		return x[i].Y > x[j].Y
627 | 	}
628 | 	return x[i].X < x[j].X
629 | }
630 | 
631 | // TextVertical implements sort.Interface for sorting
632 | // a slice of Text values in horizontal order, left to right,
633 | // and then top to bottom within a column.
634 | type TextHorizontal []Text
635 | 
636 | func (x TextHorizontal) Len() int      { return len(x) }
637 | func (x TextHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
638 | func (x TextHorizontal) Less(i, j int) bool {
639 | 	if x[i].X != x[j].X {
640 | 		return x[i].X < x[j].X
641 | 	}
642 | 	return x[i].Y > x[j].Y
643 | }
644 | 
645 | // An Outline is a tree describing the outline (also known as the table of contents)
646 | // of a document.
647 | type Outline struct {
648 | 	Title string    // title for this element
649 | 	Child []Outline // child elements
650 | }
651 | 
652 | // Outline returns the document outline.
653 | // The Outline returned is the root of the outline tree and typically has no Title itself.
654 | // That is, the children of the returned root are the top-level entries in the outline.
655 | func (r *Reader) Outline() Outline {
656 | 	return buildOutline(r.Trailer().Key("Root").Key("Outlines"))
657 | }
658 | 
659 | func buildOutline(entry Value) Outline {
660 | 	var x Outline
661 | 	x.Title = entry.Key("Title").Text()
662 | 	for child := entry.Key("First"); child.Kind() == Dict; child = child.Key("Next") {
663 | 		x.Child = append(x.Child, buildOutline(child))
664 | 	}
665 | 	return x
666 | }
667 | 


--------------------------------------------------------------------------------
/read.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2014 The Go Authors.  All rights reserved.
   2 | // Use of this source code is governed by a BSD-style
   3 | // license that can be found in the LICENSE file.
   4 | 
   5 | // Package pdf implements reading of PDF files.
   6 | //
   7 | // Overview
   8 | //
   9 | // PDF is Adobe's Portable Document Format, ubiquitous on the internet.
  10 | // A PDF document is a complex data format built on a fairly simple structure.
  11 | // This package exposes the simple structure along with some wrappers to
  12 | // extract basic information. If more complex information is needed, it is
  13 | // possible to extract that information by interpreting the structure exposed
  14 | // by this package.
  15 | //
  16 | // Specifically, a PDF is a data structure built from Values, each of which has
  17 | // one of the following Kinds:
  18 | //
  19 | //	Null, for the null object.
  20 | //	Integer, for an integer.
  21 | //	Real, for a floating-point number.
  22 | //	Bool, for a boolean value.
  23 | //	Name, for a name constant (as in /Helvetica).
  24 | //	String, for a string constant.
  25 | //	Dict, for a dictionary of name-value pairs.
  26 | //	Array, for an array of values.
  27 | //	Stream, for an opaque data stream and associated header dictionary.
  28 | //
  29 | // The accessors on Value—Int64, Float64, Bool, Name, and so on—return
  30 | // a view of the data as the given type. When there is no appropriate view,
  31 | // the accessor returns a zero result. For example, the Name accessor returns
  32 | // the empty string if called on a Value v for which v.Kind() != Name.
  33 | // Returning zero values this way, especially from the Dict and Array accessors,
  34 | // which themselves return Values, makes it possible to traverse a PDF quickly
  35 | // without writing any error checking. On the other hand, it means that mistakes
  36 | // can go unreported.
  37 | //
  38 | // The basic structure of the PDF file is exposed as the graph of Values.
  39 | //
  40 | // Most richer data structures in a PDF file are dictionaries with specific interpretations
  41 | // of the name-value pairs. The Font and Page wrappers make the interpretation
  42 | // of a specific Value as the corresponding type easier. They are only helpers, though:
  43 | // they are implemented only in terms of the Value API and could be moved outside
  44 | // the package. Equally important, traversal of other PDF data structures can be implemented
  45 | // in other packages as needed.
  46 | //
  47 | package pdf
  48 | 
  49 | // BUG(rsc): The package is incomplete, although it has been used successfully on some
  50 | // large real-world PDF files.
  51 | 
  52 | // BUG(rsc): There is no support for closing open PDF files. If you drop all references to a Reader,
  53 | // the underlying reader will eventually be garbage collected.
  54 | 
  55 | // BUG(rsc): The library makes no attempt at efficiency. A value cache maintained in the Reader
  56 | // would probably help significantly.
  57 | 
  58 | // BUG(rsc): The support for reading encrypted files is weak.
  59 | 
  60 | // BUG(rsc): The Value API does not support error reporting. The intent is to allow users to
  61 | // set an error reporting callback in Reader, but that code has not been implemented.
  62 | 
  63 | import (
  64 | 	"bytes"
  65 | 	"compress/zlib"
  66 | 	"crypto/aes"
  67 | 	"crypto/cipher"
  68 | 	"crypto/md5"
  69 | 	"crypto/rc4"
  70 | 	"fmt"
  71 | 	"io"
  72 | 	"io/ioutil"
  73 | 	"os"
  74 | 	"sort"
  75 | 	"strconv"
  76 | )
  77 | 
  78 | // A Reader is a single PDF file open for reading.
  79 | type Reader struct {
  80 | 	f          io.ReaderAt
  81 | 	end        int64
  82 | 	xref       []xref
  83 | 	trailer    dict
  84 | 	trailerptr objptr
  85 | 	key        []byte
  86 | 	useAES     bool
  87 | }
  88 | 
  89 | type xref struct {
  90 | 	ptr      objptr
  91 | 	inStream bool
  92 | 	stream   objptr
  93 | 	offset   int64
  94 | }
  95 | 
  96 | func (r *Reader) errorf(format string, args ...interface{}) {
  97 | 	panic(fmt.Errorf(format, args...))
  98 | }
  99 | 
 100 | // Open opens a file for reading.
 101 | func Open(file string) (*Reader, error) {
 102 | 	// TODO: Deal with closing file.
 103 | 	f, err := os.Open(file)
 104 | 	if err != nil {
 105 | 		return nil, err
 106 | 	}
 107 | 	fi, err := f.Stat()
 108 | 	if err != nil {
 109 | 		f.Close()
 110 | 		return nil, err
 111 | 	}
 112 | 	return NewReader(f, fi.Size())
 113 | }
 114 | 
 115 | // NewReader opens a file for reading, using the data in f with the given total size.
 116 | func NewReader(f io.ReaderAt, size int64) (*Reader, error) {
 117 | 	return NewReaderEncrypted(f, size, nil)
 118 | }
 119 | 
 120 | // NewReaderEncrypted opens a file for reading, using the data in f with the given total size.
 121 | // If the PDF is encrypted, NewReaderEncrypted calls pw repeatedly to obtain passwords
 122 | // to try. If pw returns the empty string, NewReaderEncrypted stops trying to decrypt
 123 | // the file and returns an error.
 124 | func NewReaderEncrypted(f io.ReaderAt, size int64, pw func() string) (*Reader, error) {
 125 | 	buf := make([]byte, 10)
 126 | 	f.ReadAt(buf, 0)
 127 | 	if !bytes.HasPrefix(buf, []byte("%PDF-1.")) || buf[7] < '0' || buf[7] > '7' || buf[8] != '\r' && buf[8] != '\n' {
 128 | 		return nil, fmt.Errorf("not a PDF file: invalid header")
 129 | 	}
 130 | 	end := size
 131 | 	const endChunk = 100
 132 | 	buf = make([]byte, endChunk)
 133 | 	f.ReadAt(buf, end-endChunk)
 134 | 	for len(buf) > 0 && buf[len(buf)-1] == '\n' || buf[len(buf)-1] == '\r' {
 135 | 		buf = buf[:len(buf)-1]
 136 | 	}
 137 | 	buf = bytes.TrimRight(buf, "\r\n\t ")
 138 | 	if !bytes.HasSuffix(buf, []byte("%%EOF")) {
 139 | 		return nil, fmt.Errorf("not a PDF file: missing %%%%EOF")
 140 | 	}
 141 | 	i := findLastLine(buf, "startxref")
 142 | 	if i < 0 {
 143 | 		return nil, fmt.Errorf("malformed PDF file: missing final startxref")
 144 | 	}
 145 | 
 146 | 	r := &Reader{
 147 | 		f:   f,
 148 | 		end: end,
 149 | 	}
 150 | 	pos := end - endChunk + int64(i)
 151 | 	b := newBuffer(io.NewSectionReader(f, pos, end-pos), pos)
 152 | 	if b.readToken() != keyword("startxref") {
 153 | 		return nil, fmt.Errorf("malformed PDF file: missing startxref")
 154 | 	}
 155 | 	startxref, ok := b.readToken().(int64)
 156 | 	if !ok {
 157 | 		return nil, fmt.Errorf("malformed PDF file: startxref not followed by integer")
 158 | 	}
 159 | 	b = newBuffer(io.NewSectionReader(r.f, startxref, r.end-startxref), startxref)
 160 | 	xref, trailerptr, trailer, err := readXref(r, b)
 161 | 	if err != nil {
 162 | 		return nil, err
 163 | 	}
 164 | 	r.xref = xref
 165 | 	r.trailer = trailer
 166 | 	r.trailerptr = trailerptr
 167 | 	if trailer["Encrypt"] == nil {
 168 | 		return r, nil
 169 | 	}
 170 | 	err = r.initEncrypt("")
 171 | 	if err == nil {
 172 | 		return r, nil
 173 | 	}
 174 | 	if pw == nil || err != ErrInvalidPassword {
 175 | 		return nil, err
 176 | 	}
 177 | 	for {
 178 | 		next := pw()
 179 | 		if next == "" {
 180 | 			break
 181 | 		}
 182 | 		if r.initEncrypt(next) == nil {
 183 | 			return r, nil
 184 | 		}
 185 | 	}
 186 | 	return nil, err
 187 | }
 188 | 
 189 | // Trailer returns the file's Trailer value.
 190 | func (r *Reader) Trailer() Value {
 191 | 	return Value{r, r.trailerptr, r.trailer}
 192 | }
 193 | 
 194 | func readXref(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
 195 | 	tok := b.readToken()
 196 | 	if tok == keyword("xref") {
 197 | 		return readXrefTable(r, b)
 198 | 	}
 199 | 	if _, ok := tok.(int64); ok {
 200 | 		b.unreadToken(tok)
 201 | 		return readXrefStream(r, b)
 202 | 	}
 203 | 	return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", tok)
 204 | }
 205 | 
 206 | func readXrefStream(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
 207 | 	obj1 := b.readObject()
 208 | 	obj, ok := obj1.(objdef)
 209 | 	if !ok {
 210 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj1))
 211 | 	}
 212 | 	strmptr := obj.ptr
 213 | 	strm, ok := obj.obj.(stream)
 214 | 	if !ok {
 215 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj))
 216 | 	}
 217 | 	if strm.hdr["Type"] != name("XRef") {
 218 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream does not have type XRef")
 219 | 	}
 220 | 	size, ok := strm.hdr["Size"].(int64)
 221 | 	if !ok {
 222 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream missing Size")
 223 | 	}
 224 | 	table := make([]xref, size)
 225 | 
 226 | 	table, err := readXrefStreamData(r, strm, table, size)
 227 | 	if err != nil {
 228 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
 229 | 	}
 230 | 
 231 | 	for prevoff := strm.hdr["Prev"]; prevoff != nil; {
 232 | 		off, ok := prevoff.(int64)
 233 | 		if !ok {
 234 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff)
 235 | 		}
 236 | 		b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off)
 237 | 		obj1 := b.readObject()
 238 | 		obj, ok := obj1.(objdef)
 239 | 		if !ok {
 240 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj1))
 241 | 		}
 242 | 		prevstrm, ok := obj.obj.(stream)
 243 | 		if !ok {
 244 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj))
 245 | 		}
 246 | 		prevoff = prevstrm.hdr["Prev"]
 247 | 		prev := Value{r, objptr{}, prevstrm}
 248 | 		if prev.Kind() != Stream {
 249 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream is not stream: %v", prev)
 250 | 		}
 251 | 		if prev.Key("Type").Name() != "XRef" {
 252 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream does not have type XRef")
 253 | 		}
 254 | 		psize := prev.Key("Size").Int64()
 255 | 		if psize > size {
 256 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream larger than last stream")
 257 | 		}
 258 | 		if table, err = readXrefStreamData(r, prev.data.(stream), table, psize); err != nil {
 259 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: reading xref prev stream: %v", err)
 260 | 		}
 261 | 	}
 262 | 
 263 | 	return table, strmptr, strm.hdr, nil
 264 | }
 265 | 
 266 | func readXrefStreamData(r *Reader, strm stream, table []xref, size int64) ([]xref, error) {
 267 | 	index, _ := strm.hdr["Index"].(array)
 268 | 	if index == nil {
 269 | 		index = array{int64(0), size}
 270 | 	}
 271 | 	if len(index)%2 != 0 {
 272 | 		return nil, fmt.Errorf("invalid Index array %v", objfmt(index))
 273 | 	}
 274 | 	ww, ok := strm.hdr["W"].(array)
 275 | 	if !ok {
 276 | 		return nil, fmt.Errorf("xref stream missing W array")
 277 | 	}
 278 | 
 279 | 	var w []int
 280 | 	for _, x := range ww {
 281 | 		i, ok := x.(int64)
 282 | 		if !ok || int64(int(i)) != i {
 283 | 			return nil, fmt.Errorf("invalid W array %v", objfmt(ww))
 284 | 		}
 285 | 		w = append(w, int(i))
 286 | 	}
 287 | 	if len(w) < 3 {
 288 | 		return nil, fmt.Errorf("invalid W array %v", objfmt(ww))
 289 | 	}
 290 | 
 291 | 	v := Value{r, objptr{}, strm}
 292 | 	wtotal := 0
 293 | 	for _, wid := range w {
 294 | 		wtotal += wid
 295 | 	}
 296 | 	buf := make([]byte, wtotal)
 297 | 	data := v.Reader()
 298 | 	for len(index) > 0 {
 299 | 		start, ok1 := index[0].(int64)
 300 | 		n, ok2 := index[1].(int64)
 301 | 		if !ok1 || !ok2 {
 302 | 			return nil, fmt.Errorf("malformed Index pair %v %v %T %T", objfmt(index[0]), objfmt(index[1]), index[0], index[1])
 303 | 		}
 304 | 		index = index[2:]
 305 | 		for i := 0; i < int(n); i++ {
 306 | 			_, err := io.ReadFull(data, buf)
 307 | 			if err != nil {
 308 | 				return nil, fmt.Errorf("error reading xref stream: %v", err)
 309 | 			}
 310 | 			v1 := decodeInt(buf[0:w[0]])
 311 | 			if w[0] == 0 {
 312 | 				v1 = 1
 313 | 			}
 314 | 			v2 := decodeInt(buf[w[0] : w[0]+w[1]])
 315 | 			v3 := decodeInt(buf[w[0]+w[1] : w[0]+w[1]+w[2]])
 316 | 			x := int(start) + i
 317 | 			for cap(table) <= x {
 318 | 				table = append(table[:cap(table)], xref{})
 319 | 			}
 320 | 			if table[x].ptr != (objptr{}) {
 321 | 				continue
 322 | 			}
 323 | 			switch v1 {
 324 | 			case 0:
 325 | 				table[x] = xref{ptr: objptr{0, 65535}}
 326 | 			case 1:
 327 | 				table[x] = xref{ptr: objptr{uint32(x), uint16(v3)}, offset: int64(v2)}
 328 | 			case 2:
 329 | 				table[x] = xref{ptr: objptr{uint32(x), 0}, inStream: true, stream: objptr{uint32(v2), 0}, offset: int64(v3)}
 330 | 			default:
 331 | 				fmt.Printf("invalid xref stream type %d: %x\n", v1, buf)
 332 | 			}
 333 | 		}
 334 | 	}
 335 | 	return table, nil
 336 | }
 337 | 
 338 | func decodeInt(b []byte) int {
 339 | 	x := 0
 340 | 	for _, c := range b {
 341 | 		x = x<<8 | int(c)
 342 | 	}
 343 | 	return x
 344 | }
 345 | 
 346 | func readXrefTable(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
 347 | 	var table []xref
 348 | 
 349 | 	table, err := readXrefTableData(b, table)
 350 | 	if err != nil {
 351 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
 352 | 	}
 353 | 
 354 | 	trailer, ok := b.readObject().(dict)
 355 | 	if !ok {
 356 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref table not followed by trailer dictionary")
 357 | 	}
 358 | 
 359 | 	for prevoff := trailer["Prev"]; prevoff != nil; {
 360 | 		off, ok := prevoff.(int64)
 361 | 		if !ok {
 362 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff)
 363 | 		}
 364 | 		b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off)
 365 | 		tok := b.readToken()
 366 | 		if tok != keyword("xref") {
 367 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev does not point to xref")
 368 | 		}
 369 | 		table, err = readXrefTableData(b, table)
 370 | 		if err != nil {
 371 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
 372 | 		}
 373 | 
 374 | 		trailer, ok := b.readObject().(dict)
 375 | 		if !ok {
 376 | 			return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev table not followed by trailer dictionary")
 377 | 		}
 378 | 		prevoff = trailer["Prev"]
 379 | 	}
 380 | 
 381 | 	size, ok := trailer[name("Size")].(int64)
 382 | 	if !ok {
 383 | 		return nil, objptr{}, nil, fmt.Errorf("malformed PDF: trailer missing /Size entry")
 384 | 	}
 385 | 
 386 | 	if size < int64(len(table)) {
 387 | 		table = table[:size]
 388 | 	}
 389 | 
 390 | 	return table, objptr{}, trailer, nil
 391 | }
 392 | 
 393 | func readXrefTableData(b *buffer, table []xref) ([]xref, error) {
 394 | 	for {
 395 | 		tok := b.readToken()
 396 | 		if tok == keyword("trailer") {
 397 | 			break
 398 | 		}
 399 | 		start, ok1 := tok.(int64)
 400 | 		n, ok2 := b.readToken().(int64)
 401 | 		if !ok1 || !ok2 {
 402 | 			return nil, fmt.Errorf("malformed xref table")
 403 | 		}
 404 | 		for i := 0; i < int(n); i++ {
 405 | 			off, ok1 := b.readToken().(int64)
 406 | 			gen, ok2 := b.readToken().(int64)
 407 | 			alloc, ok3 := b.readToken().(keyword)
 408 | 			if !ok1 || !ok2 || !ok3 || alloc != keyword("f") && alloc != keyword("n") {
 409 | 				return nil, fmt.Errorf("malformed xref table")
 410 | 			}
 411 | 			x := int(start) + i
 412 | 			for cap(table) <= x {
 413 | 				table = append(table[:cap(table)], xref{})
 414 | 			}
 415 | 			if len(table) <= x {
 416 | 				table = table[:x+1]
 417 | 			}
 418 | 			if alloc == "n" && table[x].offset == 0 {
 419 | 				table[x] = xref{ptr: objptr{uint32(x), uint16(gen)}, offset: int64(off)}
 420 | 			}
 421 | 		}
 422 | 	}
 423 | 	return table, nil
 424 | }
 425 | 
 426 | func findLastLine(buf []byte, s string) int {
 427 | 	bs := []byte(s)
 428 | 	max := len(buf)
 429 | 	for {
 430 | 		i := bytes.LastIndex(buf[:max], bs)
 431 | 		if i <= 0 || i+len(bs) >= len(buf) {
 432 | 			return -1
 433 | 		}
 434 | 		if (buf[i-1] == '\n' || buf[i-1] == '\r') && (buf[i+len(bs)] == '\n' || buf[i+len(bs)] == '\r') {
 435 | 			return i
 436 | 		}
 437 | 		max = i
 438 | 	}
 439 | }
 440 | 
 441 | // A Value is a single PDF value, such as an integer, dictionary, or array.
 442 | // The zero Value is a PDF null (Kind() == Null, IsNull() = true).
 443 | type Value struct {
 444 | 	r    *Reader
 445 | 	ptr  objptr
 446 | 	data interface{}
 447 | }
 448 | 
 449 | // IsNull reports whether the value is a null. It is equivalent to Kind() == Null.
 450 | func (v Value) IsNull() bool {
 451 | 	return v.data == nil
 452 | }
 453 | 
 454 | // A ValueKind specifies the kind of data underlying a Value.
 455 | type ValueKind int
 456 | 
 457 | // The PDF value kinds.
 458 | const (
 459 | 	Null ValueKind = iota
 460 | 	Bool
 461 | 	Integer
 462 | 	Real
 463 | 	String
 464 | 	Name
 465 | 	Dict
 466 | 	Array
 467 | 	Stream
 468 | )
 469 | 
 470 | // Kind reports the kind of value underlying v.
 471 | func (v Value) Kind() ValueKind {
 472 | 	switch v.data.(type) {
 473 | 	default:
 474 | 		return Null
 475 | 	case bool:
 476 | 		return Bool
 477 | 	case int64:
 478 | 		return Integer
 479 | 	case float64:
 480 | 		return Real
 481 | 	case string:
 482 | 		return String
 483 | 	case name:
 484 | 		return Name
 485 | 	case dict:
 486 | 		return Dict
 487 | 	case array:
 488 | 		return Array
 489 | 	case stream:
 490 | 		return Stream
 491 | 	}
 492 | }
 493 | 
 494 | // String returns a textual representation of the value v.
 495 | // Note that String is not the accessor for values with Kind() == String.
 496 | // To access such values, see RawString, Text, and TextFromUTF16.
 497 | func (v Value) String() string {
 498 | 	return objfmt(v.data)
 499 | }
 500 | 
 501 | func objfmt(x interface{}) string {
 502 | 	switch x := x.(type) {
 503 | 	default:
 504 | 		return fmt.Sprint(x)
 505 | 	case string:
 506 | 		if isPDFDocEncoded(x) {
 507 | 			return strconv.Quote(pdfDocDecode(x))
 508 | 		}
 509 | 		if isUTF16(x) {
 510 | 			return strconv.Quote(utf16Decode(x[2:]))
 511 | 		}
 512 | 		return strconv.Quote(x)
 513 | 	case name:
 514 | 		return "/" + string(x)
 515 | 	case dict:
 516 | 		var keys []string
 517 | 		for k := range x {
 518 | 			keys = append(keys, string(k))
 519 | 		}
 520 | 		sort.Strings(keys)
 521 | 		var buf bytes.Buffer
 522 | 		buf.WriteString("<<")
 523 | 		for i, k := range keys {
 524 | 			elem := x[name(k)]
 525 | 			if i > 0 {
 526 | 				buf.WriteString(" ")
 527 | 			}
 528 | 			buf.WriteString("/")
 529 | 			buf.WriteString(k)
 530 | 			buf.WriteString(" ")
 531 | 			buf.WriteString(objfmt(elem))
 532 | 		}
 533 | 		buf.WriteString(">>")
 534 | 		return buf.String()
 535 | 
 536 | 	case array:
 537 | 		var buf bytes.Buffer
 538 | 		buf.WriteString("[")
 539 | 		for i, elem := range x {
 540 | 			if i > 0 {
 541 | 				buf.WriteString(" ")
 542 | 			}
 543 | 			buf.WriteString(objfmt(elem))
 544 | 		}
 545 | 		buf.WriteString("]")
 546 | 		return buf.String()
 547 | 
 548 | 	case stream:
 549 | 		return fmt.Sprintf("%v@%d", objfmt(x.hdr), x.offset)
 550 | 
 551 | 	case objptr:
 552 | 		return fmt.Sprintf("%d %d R", x.id, x.gen)
 553 | 
 554 | 	case objdef:
 555 | 		return fmt.Sprintf("{%d %d obj}%v", x.ptr.id, x.ptr.gen, objfmt(x.obj))
 556 | 	}
 557 | }
 558 | 
 559 | // Bool returns v's boolean value.
 560 | // If v.Kind() != Bool, Bool returns false.
 561 | func (v Value) Bool() bool {
 562 | 	x, ok := v.data.(bool)
 563 | 	if !ok {
 564 | 		return false
 565 | 	}
 566 | 	return x
 567 | }
 568 | 
 569 | // Int64 returns v's int64 value.
 570 | // If v.Kind() != Int64, Int64 returns 0.
 571 | func (v Value) Int64() int64 {
 572 | 	x, ok := v.data.(int64)
 573 | 	if !ok {
 574 | 		return 0
 575 | 	}
 576 | 	return x
 577 | }
 578 | 
 579 | // Float64 returns v's float64 value, converting from integer if necessary.
 580 | // If v.Kind() != Float64 and v.Kind() != Int64, Float64 returns 0.
 581 | func (v Value) Float64() float64 {
 582 | 	x, ok := v.data.(float64)
 583 | 	if !ok {
 584 | 		x, ok := v.data.(int64)
 585 | 		if ok {
 586 | 			return float64(x)
 587 | 		}
 588 | 		return 0
 589 | 	}
 590 | 	return x
 591 | }
 592 | 
 593 | // RawString returns v's string value.
 594 | // If v.Kind() != String, RawString returns the empty string.
 595 | func (v Value) RawString() string {
 596 | 	x, ok := v.data.(string)
 597 | 	if !ok {
 598 | 		return ""
 599 | 	}
 600 | 	return x
 601 | }
 602 | 
 603 | // Text returns v's string value interpreted as a ``text string'' (defined in the PDF spec)
 604 | // and converted to UTF-8.
 605 | // If v.Kind() != String, Text returns the empty string.
 606 | func (v Value) Text() string {
 607 | 	x, ok := v.data.(string)
 608 | 	if !ok {
 609 | 		return ""
 610 | 	}
 611 | 	if isPDFDocEncoded(x) {
 612 | 		return pdfDocDecode(x)
 613 | 	}
 614 | 	if isUTF16(x) {
 615 | 		return utf16Decode(x[2:])
 616 | 	}
 617 | 	return x
 618 | }
 619 | 
 620 | // TextFromUTF16 returns v's string value interpreted as big-endian UTF-16
 621 | // and then converted to UTF-8.
 622 | // If v.Kind() != String or if the data is not valid UTF-16, TextFromUTF16 returns
 623 | // the empty string.
 624 | func (v Value) TextFromUTF16() string {
 625 | 	x, ok := v.data.(string)
 626 | 	if !ok {
 627 | 		return ""
 628 | 	}
 629 | 	if len(x)%2 == 1 {
 630 | 		return ""
 631 | 	}
 632 | 	if x == "" {
 633 | 		return ""
 634 | 	}
 635 | 	return utf16Decode(x)
 636 | }
 637 | 
 638 | // Name returns v's name value.
 639 | // If v.Kind() != Name, Name returns the empty string.
 640 | // The returned name does not include the leading slash:
 641 | // if v corresponds to the name written using the syntax /Helvetica,
 642 | // Name() == "Helvetica".
 643 | func (v Value) Name() string {
 644 | 	x, ok := v.data.(name)
 645 | 	if !ok {
 646 | 		return ""
 647 | 	}
 648 | 	return string(x)
 649 | }
 650 | 
 651 | // Key returns the value associated with the given name key in the dictionary v.
 652 | // Like the result of the Name method, the key should not include a leading slash.
 653 | // If v is a stream, Key applies to the stream's header dictionary.
 654 | // If v.Kind() != Dict and v.Kind() != Stream, Key returns a null Value.
 655 | func (v Value) Key(key string) Value {
 656 | 	x, ok := v.data.(dict)
 657 | 	if !ok {
 658 | 		strm, ok := v.data.(stream)
 659 | 		if !ok {
 660 | 			return Value{}
 661 | 		}
 662 | 		x = strm.hdr
 663 | 	}
 664 | 	return v.r.resolve(v.ptr, x[name(key)])
 665 | }
 666 | 
 667 | // Keys returns a sorted list of the keys in the dictionary v.
 668 | // If v is a stream, Keys applies to the stream's header dictionary.
 669 | // If v.Kind() != Dict and v.Kind() != Stream, Keys returns nil.
 670 | func (v Value) Keys() []string {
 671 | 	x, ok := v.data.(dict)
 672 | 	if !ok {
 673 | 		strm, ok := v.data.(stream)
 674 | 		if !ok {
 675 | 			return nil
 676 | 		}
 677 | 		x = strm.hdr
 678 | 	}
 679 | 	keys := []string{} // not nil
 680 | 	for k := range x {
 681 | 		keys = append(keys, string(k))
 682 | 	}
 683 | 	sort.Strings(keys)
 684 | 	return keys
 685 | }
 686 | 
 687 | // Index returns the i'th element in the array v.
 688 | // If v.Kind() != Array or if i is outside the array bounds,
 689 | // Index returns a null Value.
 690 | func (v Value) Index(i int) Value {
 691 | 	x, ok := v.data.(array)
 692 | 	if !ok || i < 0 || i >= len(x) {
 693 | 		return Value{}
 694 | 	}
 695 | 	return v.r.resolve(v.ptr, x[i])
 696 | }
 697 | 
 698 | // Len returns the length of the array v.
 699 | // If v.Kind() != Array, Len returns a null Value.
 700 | func (v Value) Len() int {
 701 | 	x, ok := v.data.(array)
 702 | 	if !ok {
 703 | 		return 0
 704 | 	}
 705 | 	return len(x)
 706 | }
 707 | 
 708 | func (r *Reader) resolve(parent objptr, x interface{}) Value {
 709 | 	if ptr, ok := x.(objptr); ok {
 710 | 		if ptr.id >= uint32(len(r.xref)) {
 711 | 			return Value{}
 712 | 		}
 713 | 		xref := r.xref[ptr.id]
 714 | 		if xref.ptr != ptr || !xref.inStream && xref.offset == 0 {
 715 | 			return Value{}
 716 | 		}
 717 | 		var obj object
 718 | 		if xref.inStream {
 719 | 			strm := r.resolve(parent, xref.stream)
 720 | 		Search:
 721 | 			for {
 722 | 				if strm.Kind() != Stream {
 723 | 					panic("not a stream")
 724 | 				}
 725 | 				if strm.Key("Type").Name() != "ObjStm" {
 726 | 					panic("not an object stream")
 727 | 				}
 728 | 				n := int(strm.Key("N").Int64())
 729 | 				first := strm.Key("First").Int64()
 730 | 				if first == 0 {
 731 | 					panic("missing First")
 732 | 				}
 733 | 				b := newBuffer(strm.Reader(), 0)
 734 | 				b.allowEOF = true
 735 | 				for i := 0; i < n; i++ {
 736 | 					id, _ := b.readToken().(int64)
 737 | 					off, _ := b.readToken().(int64)
 738 | 					if uint32(id) == ptr.id {
 739 | 						b.seekForward(first + off)
 740 | 						x = b.readObject()
 741 | 						break Search
 742 | 					}
 743 | 				}
 744 | 				ext := strm.Key("Extends")
 745 | 				if ext.Kind() != Stream {
 746 | 					panic("cannot find object in stream")
 747 | 				}
 748 | 				strm = ext
 749 | 			}
 750 | 		} else {
 751 | 			b := newBuffer(io.NewSectionReader(r.f, xref.offset, r.end-xref.offset), xref.offset)
 752 | 			b.key = r.key
 753 | 			b.useAES = r.useAES
 754 | 			obj = b.readObject()
 755 | 			def, ok := obj.(objdef)
 756 | 			if !ok {
 757 | 				panic(fmt.Errorf("loading %v: found %T instead of objdef", ptr, obj))
 758 | 				return Value{}
 759 | 			}
 760 | 			if def.ptr != ptr {
 761 | 				panic(fmt.Errorf("loading %v: found %v", ptr, def.ptr))
 762 | 			}
 763 | 			x = def.obj
 764 | 		}
 765 | 		parent = ptr
 766 | 	}
 767 | 
 768 | 	switch x := x.(type) {
 769 | 	case nil, bool, int64, float64, name, dict, array, stream:
 770 | 		return Value{r, parent, x}
 771 | 	case string:
 772 | 		return Value{r, parent, x}
 773 | 	default:
 774 | 		panic(fmt.Errorf("unexpected value type %T in resolve", x))
 775 | 	}
 776 | }
 777 | 
 778 | type errorReadCloser struct {
 779 | 	err error
 780 | }
 781 | 
 782 | func (e *errorReadCloser) Read([]byte) (int, error) {
 783 | 	return 0, e.err
 784 | }
 785 | 
 786 | func (e *errorReadCloser) Close() error {
 787 | 	return e.err
 788 | }
 789 | 
 790 | // Reader returns the data contained in the stream v.
 791 | // If v.Kind() != Stream, Reader returns a ReadCloser that
 792 | // responds to all reads with a ``stream not present'' error.
 793 | func (v Value) Reader() io.ReadCloser {
 794 | 	x, ok := v.data.(stream)
 795 | 	if !ok {
 796 | 		return &errorReadCloser{fmt.Errorf("stream not present")}
 797 | 	}
 798 | 	var rd io.Reader
 799 | 	rd = io.NewSectionReader(v.r.f, x.offset, v.Key("Length").Int64())
 800 | 	if v.r.key != nil {
 801 | 		rd = decryptStream(v.r.key, v.r.useAES, x.ptr, rd)
 802 | 	}
 803 | 	filter := v.Key("Filter")
 804 | 	param := v.Key("DecodeParms")
 805 | 	switch filter.Kind() {
 806 | 	default:
 807 | 		panic(fmt.Errorf("unsupported filter %v", filter))
 808 | 	case Null:
 809 | 		// ok
 810 | 	case Name:
 811 | 		rd = applyFilter(rd, filter.Name(), param)
 812 | 	case Array:
 813 | 		for i := 0; i < filter.Len(); i++ {
 814 | 			rd = applyFilter(rd, filter.Index(i).Name(), param.Index(i))
 815 | 		}
 816 | 	}
 817 | 
 818 | 	return ioutil.NopCloser(rd)
 819 | }
 820 | 
 821 | func applyFilter(rd io.Reader, name string, param Value) io.Reader {
 822 | 	switch name {
 823 | 	default:
 824 | 		panic("unknown filter " + name)
 825 | 	case "FlateDecode":
 826 | 		zr, err := zlib.NewReader(rd)
 827 | 		if err != nil {
 828 | 			panic(err)
 829 | 		}
 830 | 		pred := param.Key("Predictor")
 831 | 		if pred.Kind() == Null {
 832 | 			return zr
 833 | 		}
 834 | 		columns := param.Key("Columns").Int64()
 835 | 		switch pred.Int64() {
 836 | 		default:
 837 | 			fmt.Println("unknown predictor", pred)
 838 | 			panic("pred")
 839 | 		case 12:
 840 | 			return &pngUpReader{r: zr, hist: make([]byte, 1+columns), tmp: make([]byte, 1+columns)}
 841 | 		}
 842 | 	}
 843 | }
 844 | 
 845 | type pngUpReader struct {
 846 | 	r    io.Reader
 847 | 	hist []byte
 848 | 	tmp  []byte
 849 | 	pend []byte
 850 | }
 851 | 
 852 | func (r *pngUpReader) Read(b []byte) (int, error) {
 853 | 	n := 0
 854 | 	for len(b) > 0 {
 855 | 		if len(r.pend) > 0 {
 856 | 			m := copy(b, r.pend)
 857 | 			n += m
 858 | 			b = b[m:]
 859 | 			r.pend = r.pend[m:]
 860 | 			continue
 861 | 		}
 862 | 		_, err := io.ReadFull(r.r, r.tmp)
 863 | 		if err != nil {
 864 | 			return n, err
 865 | 		}
 866 | 		if r.tmp[0] != 2 {
 867 | 			return n, fmt.Errorf("malformed PNG-Up encoding")
 868 | 		}
 869 | 		for i, b := range r.tmp {
 870 | 			r.hist[i] += b
 871 | 		}
 872 | 		r.pend = r.hist[1:]
 873 | 	}
 874 | 	return n, nil
 875 | }
 876 | 
 877 | var passwordPad = []byte{
 878 | 	0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
 879 | 	0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A,
 880 | }
 881 | 
 882 | func (r *Reader) initEncrypt(password string) error {
 883 | 	// See PDF 32000-1:2008, §7.6.
 884 | 	encrypt, _ := r.resolve(objptr{}, r.trailer["Encrypt"]).data.(dict)
 885 | 	if encrypt["Filter"] != name("Standard") {
 886 | 		return fmt.Errorf("unsupported PDF: encryption filter %v", objfmt(encrypt["Filter"]))
 887 | 	}
 888 | 	n, _ := encrypt["Length"].(int64)
 889 | 	if n == 0 {
 890 | 		n = 40
 891 | 	}
 892 | 	if n%8 != 0 || n > 128 || n < 40 {
 893 | 		return fmt.Errorf("malformed PDF: %d-bit encryption key", n)
 894 | 	}
 895 | 	V, _ := encrypt["V"].(int64)
 896 | 	if V != 1 && V != 2 && (V != 4 || !okayV4(encrypt)) {
 897 | 		return fmt.Errorf("unsupported PDF: encryption version V=%d; %v", V, objfmt(encrypt))
 898 | 	}
 899 | 
 900 | 	ids, ok := r.trailer["ID"].(array)
 901 | 	if !ok || len(ids) < 1 {
 902 | 		return fmt.Errorf("malformed PDF: missing ID in trailer")
 903 | 	}
 904 | 	idstr, ok := ids[0].(string)
 905 | 	if !ok {
 906 | 		return fmt.Errorf("malformed PDF: missing ID in trailer")
 907 | 	}
 908 | 	ID := []byte(idstr)
 909 | 
 910 | 	R, _ := encrypt["R"].(int64)
 911 | 	if R < 2 {
 912 | 		return fmt.Errorf("malformed PDF: encryption revision R=%d", R)
 913 | 	}
 914 | 	if R > 4 {
 915 | 		return fmt.Errorf("unsupported PDF: encryption revision R=%d", R)
 916 | 	}
 917 | 	O, _ := encrypt["O"].(string)
 918 | 	U, _ := encrypt["U"].(string)
 919 | 	if len(O) != 32 || len(U) != 32 {
 920 | 		return fmt.Errorf("malformed PDF: missing O= or U= encryption parameters")
 921 | 	}
 922 | 	p, _ := encrypt["P"].(int64)
 923 | 	P := uint32(p)
 924 | 
 925 | 	// TODO: Password should be converted to Latin-1.
 926 | 	pw := []byte(password)
 927 | 	h := md5.New()
 928 | 	if len(pw) >= 32 {
 929 | 		h.Write(pw[:32])
 930 | 	} else {
 931 | 		h.Write(pw)
 932 | 		h.Write(passwordPad[:32-len(pw)])
 933 | 	}
 934 | 	h.Write([]byte(O))
 935 | 	h.Write([]byte{byte(P), byte(P >> 8), byte(P >> 16), byte(P >> 24)})
 936 | 	h.Write([]byte(ID))
 937 | 	key := h.Sum(nil)
 938 | 
 939 | 	if R >= 3 {
 940 | 		for i := 0; i < 50; i++ {
 941 | 			h.Reset()
 942 | 			h.Write(key[:n/8])
 943 | 			key = h.Sum(key[:0])
 944 | 		}
 945 | 		key = key[:n/8]
 946 | 	} else {
 947 | 		key = key[:40/8]
 948 | 	}
 949 | 
 950 | 	c, err := rc4.NewCipher(key)
 951 | 	if err != nil {
 952 | 		return fmt.Errorf("malformed PDF: invalid RC4 key: %v", err)
 953 | 	}
 954 | 
 955 | 	var u []byte
 956 | 	if R == 2 {
 957 | 		u = make([]byte, 32)
 958 | 		copy(u, passwordPad)
 959 | 		c.XORKeyStream(u, u)
 960 | 	} else {
 961 | 		h.Reset()
 962 | 		h.Write(passwordPad)
 963 | 		h.Write([]byte(ID))
 964 | 		u = h.Sum(nil)
 965 | 		c.XORKeyStream(u, u)
 966 | 
 967 | 		for i := 1; i <= 19; i++ {
 968 | 			key1 := make([]byte, len(key))
 969 | 			copy(key1, key)
 970 | 			for j := range key1 {
 971 | 				key1[j] ^= byte(i)
 972 | 			}
 973 | 			c, _ = rc4.NewCipher(key1)
 974 | 			c.XORKeyStream(u, u)
 975 | 		}
 976 | 	}
 977 | 
 978 | 	if !bytes.HasPrefix([]byte(U), u) {
 979 | 		return ErrInvalidPassword
 980 | 	}
 981 | 
 982 | 	r.key = key
 983 | 	r.useAES = V == 4
 984 | 
 985 | 	return nil
 986 | }
 987 | 
 988 | var ErrInvalidPassword = fmt.Errorf("encrypted PDF: invalid password")
 989 | 
 990 | func okayV4(encrypt dict) bool {
 991 | 	cf, ok := encrypt["CF"].(dict)
 992 | 	if !ok {
 993 | 		return false
 994 | 	}
 995 | 	stmf, ok := encrypt["StmF"].(name)
 996 | 	if !ok {
 997 | 		return false
 998 | 	}
 999 | 	strf, ok := encrypt["StrF"].(name)
1000 | 	if !ok {
1001 | 		return false
1002 | 	}
1003 | 	if stmf != strf {
1004 | 		return false
1005 | 	}
1006 | 	cfparam, ok := cf[stmf].(dict)
1007 | 	if cfparam["AuthEvent"] != nil && cfparam["AuthEvent"] != name("DocOpen") {
1008 | 		return false
1009 | 	}
1010 | 	if cfparam["Length"] != nil && cfparam["Length"] != int64(16) {
1011 | 		return false
1012 | 	}
1013 | 	if cfparam["CFM"] != name("AESV2") {
1014 | 		return false
1015 | 	}
1016 | 	return true
1017 | }
1018 | 
1019 | func cryptKey(key []byte, useAES bool, ptr objptr) []byte {
1020 | 	h := md5.New()
1021 | 	h.Write(key)
1022 | 	h.Write([]byte{byte(ptr.id), byte(ptr.id >> 8), byte(ptr.id >> 16), byte(ptr.gen), byte(ptr.gen >> 8)})
1023 | 	if useAES {
1024 | 		h.Write([]byte("sAlT"))
1025 | 	}
1026 | 	return h.Sum(nil)
1027 | }
1028 | 
1029 | func decryptString(key []byte, useAES bool, ptr objptr, x string) string {
1030 | 	key = cryptKey(key, useAES, ptr)
1031 | 	if useAES {
1032 | 		panic("AES not implemented")
1033 | 	} else {
1034 | 		c, _ := rc4.NewCipher(key)
1035 | 		data := []byte(x)
1036 | 		c.XORKeyStream(data, data)
1037 | 		x = string(data)
1038 | 	}
1039 | 	return x
1040 | }
1041 | 
1042 | func decryptStream(key []byte, useAES bool, ptr objptr, rd io.Reader) io.Reader {
1043 | 	key = cryptKey(key, useAES, ptr)
1044 | 	if useAES {
1045 | 		cb, err := aes.NewCipher(key)
1046 | 		if err != nil {
1047 | 			panic("AES: " + err.Error())
1048 | 		}
1049 | 		iv := make([]byte, 16)
1050 | 		io.ReadFull(rd, iv)
1051 | 		cbc := cipher.NewCBCDecrypter(cb, iv)
1052 | 		rd = &cbcReader{cbc: cbc, rd: rd, buf: make([]byte, 16)}
1053 | 	} else {
1054 | 		c, _ := rc4.NewCipher(key)
1055 | 		rd = &cipher.StreamReader{c, rd}
1056 | 	}
1057 | 	return rd
1058 | }
1059 | 
1060 | type cbcReader struct {
1061 | 	cbc  cipher.BlockMode
1062 | 	rd   io.Reader
1063 | 	buf  []byte
1064 | 	pend []byte
1065 | }
1066 | 
1067 | func (r *cbcReader) Read(b []byte) (n int, err error) {
1068 | 	if len(r.pend) == 0 {
1069 | 		_, err = io.ReadFull(r.rd, r.buf)
1070 | 		if err != nil {
1071 | 			return 0, err
1072 | 		}
1073 | 		r.cbc.CryptBlocks(r.buf, r.buf)
1074 | 		r.pend = r.buf
1075 | 	}
1076 | 	n = copy(b, r.pend)
1077 | 	r.pend = r.pend[n:]
1078 | 	return n, nil
1079 | }
1080 | 


--------------------------------------------------------------------------------