├── README.md ├── LICENSE ├── pdfpasswd └── main.go ├── ps.go ├── text.go ├── lex.go ├── page.go └── read.go /README.md: -------------------------------------------------------------------------------- 1 | go get rsc.io/pdf 2 | 3 | http://godoc.org/rsc.io/pdf 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 The Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /pdfpasswd/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Pdfpasswd searches for the password for an encrypted PDF 6 | // by trying all strings over a given alphabet up to a given length. 7 | package main 8 | 9 | import ( 10 | "flag" 11 | "fmt" 12 | "log" 13 | "os" 14 | 15 | "rsc.io/pdf" 16 | ) 17 | 18 | var ( 19 | alphabet = flag.String("a", "0123456789", "alphabet") 20 | maxLength = flag.Int("m", 4, "max length") 21 | ) 22 | 23 | func usage() { 24 | fmt.Fprintf(os.Stderr, "usage: pdfpasswd [-a alphabet] [-m maxlength] file\n") 25 | os.Exit(2) 26 | } 27 | 28 | func main() { 29 | log.SetFlags(0) 30 | log.SetPrefix("pdfpasswd: ") 31 | 32 | flag.Usage = usage 33 | flag.Parse() 34 | if flag.NArg() != 1 { 35 | usage() 36 | } 37 | 38 | f, err := os.Open(flag.Arg(0)) 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | 43 | last := "" 44 | alpha := *alphabet 45 | ctr := make([]int, *maxLength) 46 | pw := func() string { 47 | inc(ctr, len(alpha)+1) 48 | for !valid(ctr) { 49 | inc(ctr, len(alpha)+1) 50 | } 51 | if done(ctr) { 52 | return "" 53 | } 54 | buf := make([]byte, len(ctr)) 55 | var i int 56 | for i = 0; i < len(buf); i++ { 57 | if ctr[i] == 0 { 58 | break 59 | } 60 | buf[i] = alpha[ctr[i]-1] 61 | } 62 | last = string(buf[:i]) 63 | println(last) 64 | return last 65 | } 66 | st, err := f.Stat() 67 | if err != nil { 68 | log.Fatal(err) 69 | } 70 | _, err = pdf.NewReaderEncrypted(f, st.Size(), pw) 71 | if err != nil { 72 | if err == pdf.ErrInvalidPassword { 73 | log.Fatal("password not found") 74 | } 75 | log.Fatal("reading pdf: %v", err) 76 | } 77 | fmt.Printf("password: %q\n", last) 78 | } 79 | 80 | func inc(ctr []int, n int) { 81 | for i := 0; i < len(ctr); i++ { 82 | ctr[i]++ 83 | if ctr[i] < n { 84 | break 85 | } 86 | ctr[i] = 0 87 | } 88 | } 89 | 90 | func done(ctr []int) bool { 91 | for _, x := range ctr { 92 | if x != 0 { 93 | return false 94 | } 95 | } 96 | return true 97 | } 98 | 99 | func valid(ctr []int) bool { 100 | i := len(ctr) 101 | for i > 0 && ctr[i-1] == 0 { 102 | i-- 103 | } 104 | for i--; i >= 0; i-- { 105 | if ctr[i] == 0 { 106 | return false 107 | } 108 | } 109 | return true 110 | } 111 | -------------------------------------------------------------------------------- /ps.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pdf 6 | 7 | import ( 8 | "fmt" 9 | "io" 10 | ) 11 | 12 | // A Stack represents a stack of values. 13 | type Stack struct { 14 | stack []Value 15 | } 16 | 17 | func (stk *Stack) Len() int { 18 | return len(stk.stack) 19 | } 20 | 21 | func (stk *Stack) Push(v Value) { 22 | stk.stack = append(stk.stack, v) 23 | } 24 | 25 | func (stk *Stack) Pop() Value { 26 | n := len(stk.stack) 27 | if n == 0 { 28 | return Value{} 29 | } 30 | v := stk.stack[n-1] 31 | stk.stack[n-1] = Value{} 32 | stk.stack = stk.stack[:n-1] 33 | return v 34 | } 35 | 36 | func newDict() Value { 37 | return Value{nil, objptr{}, make(dict)} 38 | } 39 | 40 | // Interpret interprets the content in a stream as a basic PostScript program, 41 | // pushing values onto a stack and then calling the do function to execute 42 | // operators. The do function may push or pop values from the stack as needed 43 | // to implement op. 44 | // 45 | // Interpret handles the operators "dict", "currentdict", "begin", "end", "def", and "pop" itself. 46 | // 47 | // Interpret is not a full-blown PostScript interpreter. Its job is to handle the 48 | // very limited PostScript found in certain supporting file formats embedded 49 | // in PDF files, such as cmap files that describe the mapping from font code 50 | // points to Unicode code points. 51 | // 52 | // There is no support for executable blocks, among other limitations. 53 | // 54 | func Interpret(strm Value, do func(stk *Stack, op string)) { 55 | rd := strm.Reader() 56 | b := newBuffer(rd, 0) 57 | b.allowEOF = true 58 | b.allowObjptr = false 59 | b.allowStream = false 60 | var stk Stack 61 | var dicts []dict 62 | Reading: 63 | for { 64 | tok := b.readToken() 65 | if tok == io.EOF { 66 | break 67 | } 68 | if kw, ok := tok.(keyword); ok { 69 | switch kw { 70 | case "null", "[", "]", "<<", ">>": 71 | break 72 | default: 73 | for i := len(dicts) - 1; i >= 0; i-- { 74 | if v, ok := dicts[i][name(kw)]; ok { 75 | stk.Push(Value{nil, objptr{}, v}) 76 | continue Reading 77 | } 78 | } 79 | do(&stk, string(kw)) 80 | continue 81 | case "dict": 82 | stk.Pop() 83 | stk.Push(Value{nil, objptr{}, make(dict)}) 84 | continue 85 | case "currentdict": 86 | if len(dicts) == 0 { 87 | panic("no current dictionary") 88 | } 89 | stk.Push(Value{nil, objptr{}, dicts[len(dicts)-1]}) 90 | continue 91 | case "begin": 92 | d := stk.Pop() 93 | if d.Kind() != Dict { 94 | panic("cannot begin non-dict") 95 | } 96 | dicts = append(dicts, d.data.(dict)) 97 | continue 98 | case "end": 99 | if len(dicts) <= 0 { 100 | panic("mismatched begin/end") 101 | } 102 | dicts = dicts[:len(dicts)-1] 103 | continue 104 | case "def": 105 | if len(dicts) <= 0 { 106 | panic("def without open dict") 107 | } 108 | val := stk.Pop() 109 | key, ok := stk.Pop().data.(name) 110 | if !ok { 111 | panic("def of non-name") 112 | } 113 | dicts[len(dicts)-1][key] = val.data 114 | continue 115 | case "pop": 116 | stk.Pop() 117 | continue 118 | } 119 | } 120 | b.unreadToken(tok) 121 | obj := b.readObject() 122 | stk.Push(Value{nil, objptr{}, obj}) 123 | } 124 | } 125 | 126 | type seqReader struct { 127 | rd io.Reader 128 | offset int64 129 | } 130 | 131 | func (r *seqReader) ReadAt(buf []byte, offset int64) (int, error) { 132 | if offset != r.offset { 133 | return 0, fmt.Errorf("non-sequential read of stream") 134 | } 135 | n, err := io.ReadFull(r.rd, buf) 136 | r.offset += int64(n) 137 | return n, err 138 | } 139 | -------------------------------------------------------------------------------- /text.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pdf 6 | 7 | import ( 8 | "unicode" 9 | "unicode/utf16" 10 | ) 11 | 12 | const noRune = unicode.ReplacementChar 13 | 14 | func isPDFDocEncoded(s string) bool { 15 | if isUTF16(s) { 16 | return false 17 | } 18 | for i := 0; i < len(s); i++ { 19 | if pdfDocEncoding[s[i]] == noRune { 20 | return false 21 | } 22 | } 23 | return true 24 | } 25 | 26 | func pdfDocDecode(s string) string { 27 | for i := 0; i < len(s); i++ { 28 | if s[i] >= 0x80 || pdfDocEncoding[s[i]] != rune(s[i]) { 29 | goto Decode 30 | } 31 | } 32 | return s 33 | 34 | Decode: 35 | r := make([]rune, len(s)) 36 | for i := 0; i < len(s); i++ { 37 | r[i] = pdfDocEncoding[s[i]] 38 | } 39 | return string(r) 40 | } 41 | 42 | func isUTF16(s string) bool { 43 | return len(s) >= 2 && s[0] == 0xfe && s[1] == 0xff && len(s)%2 == 0 44 | } 45 | 46 | func utf16Decode(s string) string { 47 | var u []uint16 48 | for i := 0; i < len(s); i += 2 { 49 | u = append(u, uint16(s[i])<<8|uint16(s[i+1])) 50 | } 51 | return string(utf16.Decode(u)) 52 | } 53 | 54 | // See PDF 32000-1:2008, Table D.2 55 | var pdfDocEncoding = [256]rune{ 56 | noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune, 57 | noRune, 0x0009, 0x000a, noRune, noRune, 0x000d, noRune, noRune, 58 | noRune, noRune, noRune, noRune, noRune, noRune, noRune, noRune, 59 | 0x02d8, 0x02c7, 0x02c6, 0x02d9, 0x02dd, 0x02db, 0x02da, 0x02dc, 60 | 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 61 | 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 62 | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 63 | 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 64 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 65 | 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 66 | 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 67 | 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 68 | 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 69 | 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 70 | 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 71 | 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, noRune, 72 | 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 73 | 0x2039, 0x203a, 0x2212, 0x2030, 0x201e, 0x201c, 0x201d, 0x2018, 74 | 0x2019, 0x201a, 0x2122, 0xfb01, 0xfb02, 0x0141, 0x0152, 0x0160, 75 | 0x0178, 0x017d, 0x0131, 0x0142, 0x0153, 0x0161, 0x017e, noRune, 76 | 0x20ac, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 77 | 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, noRune, 0x00ae, 0x00af, 78 | 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 79 | 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 80 | 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 81 | 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 82 | 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 83 | 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 84 | 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 85 | 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 86 | 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 87 | 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 88 | } 89 | 90 | var winAnsiEncoding = [256]rune{ 91 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 92 | 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 93 | 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 94 | 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 95 | 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 96 | 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 97 | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 98 | 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 99 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 100 | 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 101 | 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 102 | 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 103 | 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 104 | 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 105 | 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 106 | 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 107 | 0x20ac, noRune, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 108 | 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, noRune, 0x017d, noRune, 109 | noRune, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 110 | 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, noRune, 0x017e, 0x0178, 111 | 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 112 | 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 113 | 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 114 | 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 115 | 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 116 | 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 117 | 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 118 | 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, 119 | 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 120 | 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 121 | 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 122 | 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, 123 | } 124 | 125 | var macRomanEncoding = [256]rune{ 126 | 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 127 | 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 128 | 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 129 | 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 130 | 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 131 | 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 132 | 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 133 | 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 134 | 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 135 | 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 136 | 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 137 | 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 138 | 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 139 | 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 140 | 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 141 | 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 142 | 0x00c4, 0x00c5, 0x00c7, 0x00c9, 0x00d1, 0x00d6, 0x00dc, 0x00e1, 143 | 0x00e0, 0x00e2, 0x00e4, 0x00e3, 0x00e5, 0x00e7, 0x00e9, 0x00e8, 144 | 0x00ea, 0x00eb, 0x00ed, 0x00ec, 0x00ee, 0x00ef, 0x00f1, 0x00f3, 145 | 0x00f2, 0x00f4, 0x00f6, 0x00f5, 0x00fa, 0x00f9, 0x00fb, 0x00fc, 146 | 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df, 147 | 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8, 148 | 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211, 149 | 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8, 150 | 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab, 151 | 0x00bb, 0x2026, 0x00a0, 0x00c0, 0x00c3, 0x00d5, 0x0152, 0x0153, 152 | 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca, 153 | 0x00ff, 0x0178, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02, 154 | 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x00c2, 0x00ca, 0x00c1, 155 | 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf, 0x00cc, 0x00d3, 0x00d4, 156 | 0xf8ff, 0x00d2, 0x00da, 0x00db, 0x00d9, 0x0131, 0x02c6, 0x02dc, 157 | 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7, 158 | } 159 | -------------------------------------------------------------------------------- /lex.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Reading of PDF tokens and objects from a raw byte stream. 6 | 7 | package pdf 8 | 9 | import ( 10 | "fmt" 11 | "io" 12 | "strconv" 13 | ) 14 | 15 | // A token is a PDF token in the input stream, one of the following Go types: 16 | // 17 | // bool, a PDF boolean 18 | // int64, a PDF integer 19 | // float64, a PDF real 20 | // string, a PDF string literal 21 | // keyword, a PDF keyword 22 | // name, a PDF name without the leading slash 23 | // 24 | type token interface{} 25 | 26 | // A name is a PDF name, without the leading slash. 27 | type name string 28 | 29 | // A keyword is a PDF keyword. 30 | // Delimiter tokens used in higher-level syntax, 31 | // such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords. 32 | type keyword string 33 | 34 | // A buffer holds buffered input bytes from the PDF file. 35 | type buffer struct { 36 | r io.Reader // source of data 37 | buf []byte // buffered data 38 | pos int // read index in buf 39 | offset int64 // offset at end of buf; aka offset of next read 40 | tmp []byte // scratch space for accumulating token 41 | unread []token // queue of read but then unread tokens 42 | allowEOF bool 43 | allowObjptr bool 44 | allowStream bool 45 | eof bool 46 | key []byte 47 | useAES bool 48 | objptr objptr 49 | } 50 | 51 | // newBuffer returns a new buffer reading from r at the given offset. 52 | func newBuffer(r io.Reader, offset int64) *buffer { 53 | return &buffer{ 54 | r: r, 55 | offset: offset, 56 | buf: make([]byte, 0, 4096), 57 | allowObjptr: true, 58 | allowStream: true, 59 | } 60 | } 61 | 62 | func (b *buffer) seek(offset int64) { 63 | b.offset = offset 64 | b.buf = b.buf[:0] 65 | b.pos = 0 66 | b.unread = b.unread[:0] 67 | } 68 | 69 | func (b *buffer) readByte() byte { 70 | if b.pos >= len(b.buf) { 71 | b.reload() 72 | if b.pos >= len(b.buf) { 73 | return '\n' 74 | } 75 | } 76 | c := b.buf[b.pos] 77 | b.pos++ 78 | return c 79 | } 80 | 81 | func (b *buffer) errorf(format string, args ...interface{}) { 82 | panic(fmt.Errorf(format, args...)) 83 | } 84 | 85 | func (b *buffer) reload() bool { 86 | n := cap(b.buf) - int(b.offset%int64(cap(b.buf))) 87 | n, err := b.r.Read(b.buf[:n]) 88 | if n == 0 && err != nil { 89 | b.buf = b.buf[:0] 90 | b.pos = 0 91 | if b.allowEOF && err == io.EOF { 92 | b.eof = true 93 | return false 94 | } 95 | b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err) 96 | return false 97 | } 98 | b.offset += int64(n) 99 | b.buf = b.buf[:n] 100 | b.pos = 0 101 | return true 102 | } 103 | 104 | func (b *buffer) seekForward(offset int64) { 105 | for b.offset < offset { 106 | if !b.reload() { 107 | return 108 | } 109 | } 110 | b.pos = len(b.buf) - int(b.offset-offset) 111 | } 112 | 113 | func (b *buffer) readOffset() int64 { 114 | return b.offset - int64(len(b.buf)) + int64(b.pos) 115 | } 116 | 117 | func (b *buffer) unreadByte() { 118 | if b.pos > 0 { 119 | b.pos-- 120 | } 121 | } 122 | 123 | func (b *buffer) unreadToken(t token) { 124 | b.unread = append(b.unread, t) 125 | } 126 | 127 | func (b *buffer) readToken() token { 128 | if n := len(b.unread); n > 0 { 129 | t := b.unread[n-1] 130 | b.unread = b.unread[:n-1] 131 | return t 132 | } 133 | 134 | // Find first non-space, non-comment byte. 135 | c := b.readByte() 136 | for { 137 | if isSpace(c) { 138 | if b.eof { 139 | return io.EOF 140 | } 141 | c = b.readByte() 142 | } else if c == '%' { 143 | for c != '\r' && c != '\n' { 144 | c = b.readByte() 145 | } 146 | } else { 147 | break 148 | } 149 | } 150 | 151 | switch c { 152 | case '<': 153 | if b.readByte() == '<' { 154 | return keyword("<<") 155 | } 156 | b.unreadByte() 157 | return b.readHexString() 158 | 159 | case '(': 160 | return b.readLiteralString() 161 | 162 | case '[', ']', '{', '}': 163 | return keyword(string(c)) 164 | 165 | case '/': 166 | return b.readName() 167 | 168 | case '>': 169 | if b.readByte() == '>' { 170 | return keyword(">>") 171 | } 172 | b.unreadByte() 173 | fallthrough 174 | 175 | default: 176 | if isDelim(c) { 177 | b.errorf("unexpected delimiter %#q", rune(c)) 178 | return nil 179 | } 180 | b.unreadByte() 181 | return b.readKeyword() 182 | } 183 | } 184 | 185 | func (b *buffer) readHexString() token { 186 | tmp := b.tmp[:0] 187 | for { 188 | Loop: 189 | c := b.readByte() 190 | if c == '>' { 191 | break 192 | } 193 | if isSpace(c) { 194 | goto Loop 195 | } 196 | Loop2: 197 | c2 := b.readByte() 198 | if isSpace(c2) { 199 | goto Loop2 200 | } 201 | x := unhex(c)<<4 | unhex(c2) 202 | if x < 0 { 203 | b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:]) 204 | break 205 | } 206 | tmp = append(tmp, byte(x)) 207 | } 208 | b.tmp = tmp 209 | return string(tmp) 210 | } 211 | 212 | func unhex(b byte) int { 213 | switch { 214 | case '0' <= b && b <= '9': 215 | return int(b) - '0' 216 | case 'a' <= b && b <= 'f': 217 | return int(b) - 'a' + 10 218 | case 'A' <= b && b <= 'F': 219 | return int(b) - 'A' + 10 220 | } 221 | return -1 222 | } 223 | 224 | func (b *buffer) readLiteralString() token { 225 | tmp := b.tmp[:0] 226 | depth := 1 227 | Loop: 228 | for { 229 | c := b.readByte() 230 | switch c { 231 | default: 232 | tmp = append(tmp, c) 233 | case '(': 234 | depth++ 235 | tmp = append(tmp, c) 236 | case ')': 237 | if depth--; depth == 0 { 238 | break Loop 239 | } 240 | tmp = append(tmp, c) 241 | case '\\': 242 | switch c = b.readByte(); c { 243 | default: 244 | b.errorf("invalid escape sequence \\%c", c) 245 | tmp = append(tmp, '\\', c) 246 | case 'n': 247 | tmp = append(tmp, '\n') 248 | case 'r': 249 | tmp = append(tmp, '\r') 250 | case 'b': 251 | tmp = append(tmp, '\b') 252 | case 't': 253 | tmp = append(tmp, '\t') 254 | case 'f': 255 | tmp = append(tmp, '\f') 256 | case '(', ')', '\\': 257 | tmp = append(tmp, c) 258 | case '\r': 259 | if b.readByte() != '\n' { 260 | b.unreadByte() 261 | } 262 | fallthrough 263 | case '\n': 264 | // no append 265 | case '0', '1', '2', '3', '4', '5', '6', '7': 266 | x := int(c - '0') 267 | for i := 0; i < 2; i++ { 268 | c = b.readByte() 269 | if c < '0' || c > '7' { 270 | b.unreadByte() 271 | break 272 | } 273 | x = x*8 + int(c-'0') 274 | } 275 | if x > 255 { 276 | b.errorf("invalid octal escape \\%03o", x) 277 | } 278 | tmp = append(tmp, byte(x)) 279 | } 280 | } 281 | } 282 | b.tmp = tmp 283 | return string(tmp) 284 | } 285 | 286 | func (b *buffer) readName() token { 287 | tmp := b.tmp[:0] 288 | for { 289 | c := b.readByte() 290 | if isDelim(c) || isSpace(c) { 291 | b.unreadByte() 292 | break 293 | } 294 | if c == '#' { 295 | x := unhex(b.readByte())<<4 | unhex(b.readByte()) 296 | if x < 0 { 297 | b.errorf("malformed name") 298 | } 299 | tmp = append(tmp, byte(x)) 300 | continue 301 | } 302 | tmp = append(tmp, c) 303 | } 304 | b.tmp = tmp 305 | return name(string(tmp)) 306 | } 307 | 308 | func (b *buffer) readKeyword() token { 309 | tmp := b.tmp[:0] 310 | for { 311 | c := b.readByte() 312 | if isDelim(c) || isSpace(c) { 313 | b.unreadByte() 314 | break 315 | } 316 | tmp = append(tmp, c) 317 | } 318 | b.tmp = tmp 319 | s := string(tmp) 320 | switch { 321 | case s == "true": 322 | return true 323 | case s == "false": 324 | return false 325 | case isInteger(s): 326 | x, err := strconv.ParseInt(s, 10, 64) 327 | if err != nil { 328 | b.errorf("invalid integer %s", s) 329 | } 330 | return x 331 | case isReal(s): 332 | x, err := strconv.ParseFloat(s, 64) 333 | if err != nil { 334 | b.errorf("invalid real %s", s) 335 | } 336 | return x 337 | } 338 | return keyword(string(tmp)) 339 | } 340 | 341 | func isInteger(s string) bool { 342 | if len(s) > 0 && (s[0] == '+' || s[0] == '-') { 343 | s = s[1:] 344 | } 345 | if len(s) == 0 { 346 | return false 347 | } 348 | for _, c := range s { 349 | if c < '0' || '9' < c { 350 | return false 351 | } 352 | } 353 | return true 354 | } 355 | 356 | func isReal(s string) bool { 357 | if len(s) > 0 && (s[0] == '+' || s[0] == '-') { 358 | s = s[1:] 359 | } 360 | if len(s) == 0 { 361 | return false 362 | } 363 | ndot := 0 364 | for _, c := range s { 365 | if c == '.' { 366 | ndot++ 367 | continue 368 | } 369 | if c < '0' || '9' < c { 370 | return false 371 | } 372 | } 373 | return ndot == 1 374 | } 375 | 376 | // An object is a PDF syntax object, one of the following Go types: 377 | // 378 | // bool, a PDF boolean 379 | // int64, a PDF integer 380 | // float64, a PDF real 381 | // string, a PDF string literal 382 | // name, a PDF name without the leading slash 383 | // dict, a PDF dictionary 384 | // array, a PDF array 385 | // stream, a PDF stream 386 | // objptr, a PDF object reference 387 | // objdef, a PDF object definition 388 | // 389 | // An object may also be nil, to represent the PDF null. 390 | type object interface{} 391 | 392 | type dict map[name]object 393 | 394 | type array []object 395 | 396 | type stream struct { 397 | hdr dict 398 | ptr objptr 399 | offset int64 400 | } 401 | 402 | type objptr struct { 403 | id uint32 404 | gen uint16 405 | } 406 | 407 | type objdef struct { 408 | ptr objptr 409 | obj object 410 | } 411 | 412 | func (b *buffer) readObject() object { 413 | tok := b.readToken() 414 | if kw, ok := tok.(keyword); ok { 415 | switch kw { 416 | case "null": 417 | return nil 418 | case "<<": 419 | return b.readDict() 420 | case "[": 421 | return b.readArray() 422 | } 423 | b.errorf("unexpected keyword %q parsing object", kw) 424 | return nil 425 | } 426 | 427 | if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 { 428 | tok = decryptString(b.key, b.useAES, b.objptr, str) 429 | } 430 | 431 | if !b.allowObjptr { 432 | return tok 433 | } 434 | 435 | if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 { 436 | tok2 := b.readToken() 437 | if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 { 438 | tok3 := b.readToken() 439 | switch tok3 { 440 | case keyword("R"): 441 | return objptr{uint32(t1), uint16(t2)} 442 | case keyword("obj"): 443 | old := b.objptr 444 | b.objptr = objptr{uint32(t1), uint16(t2)} 445 | obj := b.readObject() 446 | if _, ok := obj.(stream); !ok { 447 | tok4 := b.readToken() 448 | if tok4 != keyword("endobj") { 449 | b.errorf("missing endobj after indirect object definition") 450 | b.unreadToken(tok4) 451 | } 452 | } 453 | b.objptr = old 454 | return objdef{objptr{uint32(t1), uint16(t2)}, obj} 455 | } 456 | b.unreadToken(tok3) 457 | } 458 | b.unreadToken(tok2) 459 | } 460 | return tok 461 | } 462 | 463 | func (b *buffer) readArray() object { 464 | var x array 465 | for { 466 | tok := b.readToken() 467 | if tok == nil || tok == keyword("]") { 468 | break 469 | } 470 | b.unreadToken(tok) 471 | x = append(x, b.readObject()) 472 | } 473 | return x 474 | } 475 | 476 | func (b *buffer) readDict() object { 477 | x := make(dict) 478 | for { 479 | tok := b.readToken() 480 | if tok == nil || tok == keyword(">>") { 481 | break 482 | } 483 | n, ok := tok.(name) 484 | if !ok { 485 | b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok) 486 | continue 487 | } 488 | x[n] = b.readObject() 489 | } 490 | 491 | if !b.allowStream { 492 | return x 493 | } 494 | 495 | tok := b.readToken() 496 | if tok != keyword("stream") { 497 | b.unreadToken(tok) 498 | return x 499 | } 500 | 501 | switch b.readByte() { 502 | case '\r': 503 | if b.readByte() != '\n' { 504 | b.unreadByte() 505 | } 506 | case '\n': 507 | // ok 508 | default: 509 | b.errorf("stream keyword not followed by newline") 510 | } 511 | 512 | return stream{x, b.objptr, b.readOffset()} 513 | } 514 | 515 | func isSpace(b byte) bool { 516 | switch b { 517 | case '\x00', '\t', '\n', '\f', '\r', ' ': 518 | return true 519 | } 520 | return false 521 | } 522 | 523 | func isDelim(b byte) bool { 524 | switch b { 525 | case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%': 526 | return true 527 | } 528 | return false 529 | } 530 | -------------------------------------------------------------------------------- /page.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package pdf 6 | 7 | import ( 8 | "fmt" 9 | "strings" 10 | ) 11 | 12 | // A Page represent a single page in a PDF file. 13 | // The methods interpret a Page dictionary stored in V. 14 | type Page struct { 15 | V Value 16 | } 17 | 18 | // Page returns the page for the given page number. 19 | // Page numbers are indexed starting at 1, not 0. 20 | // If the page is not found, Page returns a Page with p.V.IsNull(). 21 | func (r *Reader) Page(num int) Page { 22 | num-- // now 0-indexed 23 | page := r.Trailer().Key("Root").Key("Pages") 24 | Search: 25 | for page.Key("Type").Name() == "Pages" { 26 | count := int(page.Key("Count").Int64()) 27 | if count < num { 28 | return Page{} 29 | } 30 | kids := page.Key("Kids") 31 | for i := 0; i < kids.Len(); i++ { 32 | kid := kids.Index(i) 33 | if kid.Key("Type").Name() == "Pages" { 34 | c := int(kid.Key("Count").Int64()) 35 | if num < c { 36 | page = kid 37 | continue Search 38 | } 39 | num -= c 40 | continue 41 | } 42 | if kid.Key("Type").Name() == "Page" { 43 | if num == 0 { 44 | return Page{kid} 45 | } 46 | num-- 47 | } 48 | } 49 | } 50 | return Page{} 51 | } 52 | 53 | // NumPage returns the number of pages in the PDF file. 54 | func (r *Reader) NumPage() int { 55 | return int(r.Trailer().Key("Root").Key("Pages").Key("Count").Int64()) 56 | } 57 | 58 | func (p Page) findInherited(key string) Value { 59 | for v := p.V; !v.IsNull(); v = v.Key("Parent") { 60 | if r := v.Key(key); !r.IsNull() { 61 | return r 62 | } 63 | } 64 | return Value{} 65 | } 66 | 67 | /* 68 | func (p Page) MediaBox() Value { 69 | return p.findInherited("MediaBox") 70 | } 71 | 72 | func (p Page) CropBox() Value { 73 | return p.findInherited("CropBox") 74 | } 75 | */ 76 | 77 | // Resources returns the resources dictionary associated with the page. 78 | func (p Page) Resources() Value { 79 | return p.findInherited("Resources") 80 | } 81 | 82 | // Fonts returns a list of the fonts associated with the page. 83 | func (p Page) Fonts() []string { 84 | return p.Resources().Key("Font").Keys() 85 | } 86 | 87 | // Font returns the font with the given name associated with the page. 88 | func (p Page) Font(name string) Font { 89 | return Font{p.Resources().Key("Font").Key(name)} 90 | } 91 | 92 | // A Font represent a font in a PDF file. 93 | // The methods interpret a Font dictionary stored in V. 94 | type Font struct { 95 | V Value 96 | } 97 | 98 | // BaseFont returns the font's name (BaseFont property). 99 | func (f Font) BaseFont() string { 100 | return f.V.Key("BaseFont").Name() 101 | } 102 | 103 | // FirstChar returns the code point of the first character in the font. 104 | func (f Font) FirstChar() int { 105 | return int(f.V.Key("FirstChar").Int64()) 106 | } 107 | 108 | // LastChar returns the code point of the last character in the font. 109 | func (f Font) LastChar() int { 110 | return int(f.V.Key("LastChar").Int64()) 111 | } 112 | 113 | // Widths returns the widths of the glyphs in the font. 114 | // In a well-formed PDF, len(f.Widths()) == f.LastChar()+1 - f.FirstChar(). 115 | func (f Font) Widths() []float64 { 116 | x := f.V.Key("Widths") 117 | var out []float64 118 | for i := 0; i < x.Len(); i++ { 119 | out = append(out, x.Index(i).Float64()) 120 | } 121 | return out 122 | } 123 | 124 | // Width returns the width of the given code point. 125 | func (f Font) Width(code int) float64 { 126 | first := f.FirstChar() 127 | last := f.LastChar() 128 | if code < first || last < code { 129 | return 0 130 | } 131 | return f.V.Key("Widths").Index(code - first).Float64() 132 | } 133 | 134 | // Encoder returns the encoding between font code point sequences and UTF-8. 135 | func (f Font) Encoder() TextEncoding { 136 | enc := f.V.Key("Encoding") 137 | switch enc.Kind() { 138 | case Name: 139 | switch enc.Name() { 140 | case "WinAnsiEncoding": 141 | return &byteEncoder{&winAnsiEncoding} 142 | case "MacRomanEncoding": 143 | return &byteEncoder{&macRomanEncoding} 144 | case "Identity-H": 145 | // TODO: Should be big-endian UCS-2 decoder 146 | return &nopEncoder{} 147 | default: 148 | println("unknown encoding", enc.Name()) 149 | return &nopEncoder{} 150 | } 151 | case Dict: 152 | return &dictEncoder{enc.Key("Differences")} 153 | case Null: 154 | // ok, try ToUnicode 155 | default: 156 | println("unexpected encoding", enc.String()) 157 | return &nopEncoder{} 158 | } 159 | 160 | toUnicode := f.V.Key("ToUnicode") 161 | if toUnicode.Kind() == Dict { 162 | m := readCmap(toUnicode) 163 | if m == nil { 164 | return &nopEncoder{} 165 | } 166 | return m 167 | } 168 | 169 | return &byteEncoder{&pdfDocEncoding} 170 | } 171 | 172 | type dictEncoder struct { 173 | v Value 174 | } 175 | 176 | func (e *dictEncoder) Decode(raw string) (text string) { 177 | r := make([]rune, 0, len(raw)) 178 | for i := 0; i < len(raw); i++ { 179 | ch := rune(raw[i]) 180 | n := -1 181 | for j := 0; j < e.v.Len(); j++ { 182 | x := e.v.Index(j) 183 | if x.Kind() == Integer { 184 | n = int(x.Int64()) 185 | continue 186 | } 187 | if x.Kind() == Name { 188 | if int(raw[i]) == n { 189 | r := nameToRune[x.Name()] 190 | if r != 0 { 191 | ch = r 192 | break 193 | } 194 | } 195 | n++ 196 | } 197 | } 198 | r = append(r, ch) 199 | } 200 | return string(r) 201 | } 202 | 203 | // A TextEncoding represents a mapping between 204 | // font code points and UTF-8 text. 205 | type TextEncoding interface { 206 | // Decode returns the UTF-8 text corresponding to 207 | // the sequence of code points in raw. 208 | Decode(raw string) (text string) 209 | } 210 | 211 | type nopEncoder struct { 212 | } 213 | 214 | func (e *nopEncoder) Decode(raw string) (text string) { 215 | return raw 216 | } 217 | 218 | type byteEncoder struct { 219 | table *[256]rune 220 | } 221 | 222 | func (e *byteEncoder) Decode(raw string) (text string) { 223 | r := make([]rune, 0, len(raw)) 224 | for i := 0; i < len(raw); i++ { 225 | r = append(r, e.table[raw[i]]) 226 | } 227 | return string(r) 228 | } 229 | 230 | type cmap struct { 231 | space [4][][2]string 232 | bfrange []bfrange 233 | } 234 | 235 | func (m *cmap) Decode(raw string) (text string) { 236 | var r []rune 237 | Parse: 238 | for len(raw) > 0 { 239 | for n := 1; n <= 4 && n <= len(raw); n++ { 240 | for _, space := range m.space[n-1] { 241 | if space[0] <= raw[:n] && raw[:n] <= space[1] { 242 | text := raw[:n] 243 | raw = raw[n:] 244 | for _, bf := range m.bfrange { 245 | if len(bf.lo) == n && bf.lo <= text && text <= bf.hi { 246 | if bf.dst.Kind() == String { 247 | s := bf.dst.RawString() 248 | if bf.lo != text { 249 | b := []byte(s) 250 | b[len(b)-1] += text[len(text)-1] - bf.lo[len(bf.lo)-1] 251 | s = string(b) 252 | } 253 | r = append(r, []rune(utf16Decode(s))...) 254 | continue Parse 255 | } 256 | if bf.dst.Kind() == Array { 257 | fmt.Printf("array %v\n", bf.dst) 258 | } else { 259 | fmt.Printf("unknown dst %v\n", bf.dst) 260 | } 261 | r = append(r, noRune) 262 | continue Parse 263 | } 264 | } 265 | fmt.Printf("no text for %q", text) 266 | r = append(r, noRune) 267 | continue Parse 268 | } 269 | } 270 | } 271 | println("no code space found") 272 | r = append(r, noRune) 273 | raw = raw[1:] 274 | } 275 | return string(r) 276 | } 277 | 278 | type bfrange struct { 279 | lo string 280 | hi string 281 | dst Value 282 | } 283 | 284 | func readCmap(toUnicode Value) *cmap { 285 | n := -1 286 | var m cmap 287 | ok := true 288 | Interpret(toUnicode, func(stk *Stack, op string) { 289 | if !ok { 290 | return 291 | } 292 | switch op { 293 | case "findresource": 294 | category := stk.Pop() 295 | key := stk.Pop() 296 | fmt.Println("findresource", key, category) 297 | stk.Push(newDict()) 298 | case "begincmap": 299 | stk.Push(newDict()) 300 | case "endcmap": 301 | stk.Pop() 302 | case "begincodespacerange": 303 | n = int(stk.Pop().Int64()) 304 | case "endcodespacerange": 305 | if n < 0 { 306 | println("missing begincodespacerange") 307 | ok = false 308 | return 309 | } 310 | for i := 0; i < n; i++ { 311 | hi, lo := stk.Pop().RawString(), stk.Pop().RawString() 312 | if len(lo) == 0 || len(lo) != len(hi) { 313 | println("bad codespace range") 314 | ok = false 315 | return 316 | } 317 | m.space[len(lo)-1] = append(m.space[len(lo)-1], [2]string{lo, hi}) 318 | } 319 | n = -1 320 | case "beginbfrange": 321 | n = int(stk.Pop().Int64()) 322 | case "endbfrange": 323 | if n < 0 { 324 | panic("missing beginbfrange") 325 | } 326 | for i := 0; i < n; i++ { 327 | dst, srcHi, srcLo := stk.Pop(), stk.Pop().RawString(), stk.Pop().RawString() 328 | m.bfrange = append(m.bfrange, bfrange{srcLo, srcHi, dst}) 329 | } 330 | case "defineresource": 331 | category := stk.Pop().Name() 332 | value := stk.Pop() 333 | key := stk.Pop().Name() 334 | fmt.Println("defineresource", key, value, category) 335 | stk.Push(value) 336 | default: 337 | println("interp\t", op) 338 | } 339 | }) 340 | if !ok { 341 | return nil 342 | } 343 | return &m 344 | } 345 | 346 | type matrix [3][3]float64 347 | 348 | var ident = matrix{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}} 349 | 350 | func (x matrix) mul(y matrix) matrix { 351 | var z matrix 352 | for i := 0; i < 3; i++ { 353 | for j := 0; j < 3; j++ { 354 | for k := 0; k < 3; k++ { 355 | z[i][j] += x[i][k] * y[k][j] 356 | } 357 | } 358 | } 359 | return z 360 | } 361 | 362 | // A Text represents a single piece of text drawn on a page. 363 | type Text struct { 364 | Font string // the font used 365 | FontSize float64 // the font size, in points (1/72 of an inch) 366 | X float64 // the X coordinate, in points, increasing left to right 367 | Y float64 // the Y coordinate, in points, increasing bottom to top 368 | W float64 // the width of the text, in points 369 | S string // the actual UTF-8 text 370 | } 371 | 372 | // A Rect represents a rectangle. 373 | type Rect struct { 374 | Min, Max Point 375 | } 376 | 377 | // A Point represents an X, Y pair. 378 | type Point struct { 379 | X float64 380 | Y float64 381 | } 382 | 383 | // Content describes the basic content on a page: the text and any drawn rectangles. 384 | type Content struct { 385 | Text []Text 386 | Rect []Rect 387 | } 388 | 389 | type gstate struct { 390 | Tc float64 391 | Tw float64 392 | Th float64 393 | Tl float64 394 | Tf Font 395 | Tfs float64 396 | Tmode int 397 | Trise float64 398 | Tm matrix 399 | Tlm matrix 400 | Trm matrix 401 | CTM matrix 402 | } 403 | 404 | // Content returns the page's content. 405 | func (p Page) Content() Content { 406 | strm := p.V.Key("Contents") 407 | var enc TextEncoding = &nopEncoder{} 408 | 409 | var g = gstate{ 410 | Th: 1, 411 | CTM: ident, 412 | } 413 | 414 | var text []Text 415 | showText := func(s string) { 416 | n := 0 417 | for _, ch := range enc.Decode(s) { 418 | Trm := matrix{{g.Tfs * g.Th, 0, 0}, {0, g.Tfs, 0}, {0, g.Trise, 1}}.mul(g.Tm).mul(g.CTM) 419 | w0 := g.Tf.Width(int(s[n])) 420 | n++ 421 | if ch != ' ' { 422 | f := g.Tf.BaseFont() 423 | if i := strings.Index(f, "+"); i >= 0 { 424 | f = f[i+1:] 425 | } 426 | text = append(text, Text{f, Trm[0][0], Trm[2][0], Trm[2][1], w0 / 1000 * Trm[0][0], string(ch)}) 427 | } 428 | tx := w0/1000*g.Tfs + g.Tc 429 | if ch == ' ' { 430 | tx += g.Tw 431 | } 432 | tx *= g.Th 433 | g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm) 434 | } 435 | } 436 | 437 | var rect []Rect 438 | var gstack []gstate 439 | Interpret(strm, func(stk *Stack, op string) { 440 | n := stk.Len() 441 | args := make([]Value, n) 442 | for i := n - 1; i >= 0; i-- { 443 | args[i] = stk.Pop() 444 | } 445 | switch op { 446 | default: 447 | //fmt.Println(op, args) 448 | return 449 | 450 | case "cm": // update g.CTM 451 | if len(args) != 6 { 452 | panic("bad g.Tm") 453 | } 454 | var m matrix 455 | for i := 0; i < 6; i++ { 456 | m[i/2][i%2] = args[i].Float64() 457 | } 458 | m[2][2] = 1 459 | g.CTM = m.mul(g.CTM) 460 | 461 | case "gs": // set parameters from graphics state resource 462 | gs := p.Resources().Key("ExtGState").Key(args[0].Name()) 463 | font := gs.Key("Font") 464 | if font.Kind() == Array && font.Len() == 2 { 465 | //fmt.Println("FONT", font) 466 | } 467 | 468 | case "f": // fill 469 | case "g": // setgray 470 | case "l": // lineto 471 | case "m": // moveto 472 | 473 | case "cs": // set colorspace non-stroking 474 | case "scn": // set color non-stroking 475 | 476 | case "re": // append rectangle to path 477 | if len(args) != 4 { 478 | panic("bad re") 479 | } 480 | x, y, w, h := args[0].Float64(), args[1].Float64(), args[2].Float64(), args[3].Float64() 481 | rect = append(rect, Rect{Point{x, y}, Point{x + w, y + h}}) 482 | 483 | case "q": // save graphics state 484 | gstack = append(gstack, g) 485 | 486 | case "Q": // restore graphics state 487 | n := len(gstack) - 1 488 | g = gstack[n] 489 | gstack = gstack[:n] 490 | 491 | case "BT": // begin text (reset text matrix and line matrix) 492 | g.Tm = ident 493 | g.Tlm = g.Tm 494 | 495 | case "ET": // end text 496 | 497 | case "T*": // move to start of next line 498 | x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}} 499 | g.Tlm = x.mul(g.Tlm) 500 | g.Tm = g.Tlm 501 | 502 | case "Tc": // set character spacing 503 | if len(args) != 1 { 504 | panic("bad g.Tc") 505 | } 506 | g.Tc = args[0].Float64() 507 | 508 | case "TD": // move text position and set leading 509 | if len(args) != 2 { 510 | panic("bad Td") 511 | } 512 | g.Tl = -args[1].Float64() 513 | fallthrough 514 | case "Td": // move text position 515 | if len(args) != 2 { 516 | panic("bad Td") 517 | } 518 | tx := args[0].Float64() 519 | ty := args[1].Float64() 520 | x := matrix{{1, 0, 0}, {0, 1, 0}, {tx, ty, 1}} 521 | g.Tlm = x.mul(g.Tlm) 522 | g.Tm = g.Tlm 523 | 524 | case "Tf": // set text font and size 525 | if len(args) != 2 { 526 | panic("bad TL") 527 | } 528 | f := args[0].Name() 529 | g.Tf = p.Font(f) 530 | enc = g.Tf.Encoder() 531 | if enc == nil { 532 | println("no cmap for", f) 533 | enc = &nopEncoder{} 534 | } 535 | g.Tfs = args[1].Float64() 536 | 537 | case "\"": // set spacing, move to next line, and show text 538 | if len(args) != 3 { 539 | panic("bad \" operator") 540 | } 541 | g.Tw = args[0].Float64() 542 | g.Tc = args[1].Float64() 543 | args = args[2:] 544 | fallthrough 545 | case "'": // move to next line and show text 546 | if len(args) != 1 { 547 | panic("bad ' operator") 548 | } 549 | x := matrix{{1, 0, 0}, {0, 1, 0}, {0, -g.Tl, 1}} 550 | g.Tlm = x.mul(g.Tlm) 551 | g.Tm = g.Tlm 552 | fallthrough 553 | case "Tj": // show text 554 | if len(args) != 1 { 555 | panic("bad Tj operator") 556 | } 557 | showText(args[0].RawString()) 558 | 559 | case "TJ": // show text, allowing individual glyph positioning 560 | v := args[0] 561 | for i := 0; i < v.Len(); i++ { 562 | x := v.Index(i) 563 | if x.Kind() == String { 564 | showText(x.RawString()) 565 | } else { 566 | tx := -x.Float64() / 1000 * g.Tfs * g.Th 567 | g.Tm = matrix{{1, 0, 0}, {0, 1, 0}, {tx, 0, 1}}.mul(g.Tm) 568 | } 569 | } 570 | 571 | case "TL": // set text leading 572 | if len(args) != 1 { 573 | panic("bad TL") 574 | } 575 | g.Tl = args[0].Float64() 576 | 577 | case "Tm": // set text matrix and line matrix 578 | if len(args) != 6 { 579 | panic("bad g.Tm") 580 | } 581 | var m matrix 582 | for i := 0; i < 6; i++ { 583 | m[i/2][i%2] = args[i].Float64() 584 | } 585 | m[2][2] = 1 586 | g.Tm = m 587 | g.Tlm = m 588 | 589 | case "Tr": // set text rendering mode 590 | if len(args) != 1 { 591 | panic("bad Tr") 592 | } 593 | g.Tmode = int(args[0].Int64()) 594 | 595 | case "Ts": // set text rise 596 | if len(args) != 1 { 597 | panic("bad Ts") 598 | } 599 | g.Trise = args[0].Float64() 600 | 601 | case "Tw": // set word spacing 602 | if len(args) != 1 { 603 | panic("bad g.Tw") 604 | } 605 | g.Tw = args[0].Float64() 606 | 607 | case "Tz": // set horizontal text scaling 608 | if len(args) != 1 { 609 | panic("bad Tz") 610 | } 611 | g.Th = args[0].Float64() / 100 612 | } 613 | }) 614 | return Content{text, rect} 615 | } 616 | 617 | // TextVertical implements sort.Interface for sorting 618 | // a slice of Text values in vertical order, top to bottom, 619 | // and then left to right within a line. 620 | type TextVertical []Text 621 | 622 | func (x TextVertical) Len() int { return len(x) } 623 | func (x TextVertical) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 624 | func (x TextVertical) Less(i, j int) bool { 625 | if x[i].Y != x[j].Y { 626 | return x[i].Y > x[j].Y 627 | } 628 | return x[i].X < x[j].X 629 | } 630 | 631 | // TextVertical implements sort.Interface for sorting 632 | // a slice of Text values in horizontal order, left to right, 633 | // and then top to bottom within a column. 634 | type TextHorizontal []Text 635 | 636 | func (x TextHorizontal) Len() int { return len(x) } 637 | func (x TextHorizontal) Swap(i, j int) { x[i], x[j] = x[j], x[i] } 638 | func (x TextHorizontal) Less(i, j int) bool { 639 | if x[i].X != x[j].X { 640 | return x[i].X < x[j].X 641 | } 642 | return x[i].Y > x[j].Y 643 | } 644 | 645 | // An Outline is a tree describing the outline (also known as the table of contents) 646 | // of a document. 647 | type Outline struct { 648 | Title string // title for this element 649 | Child []Outline // child elements 650 | } 651 | 652 | // Outline returns the document outline. 653 | // The Outline returned is the root of the outline tree and typically has no Title itself. 654 | // That is, the children of the returned root are the top-level entries in the outline. 655 | func (r *Reader) Outline() Outline { 656 | return buildOutline(r.Trailer().Key("Root").Key("Outlines")) 657 | } 658 | 659 | func buildOutline(entry Value) Outline { 660 | var x Outline 661 | x.Title = entry.Key("Title").Text() 662 | for child := entry.Key("First"); child.Kind() == Dict; child = child.Key("Next") { 663 | x.Child = append(x.Child, buildOutline(child)) 664 | } 665 | return x 666 | } 667 | -------------------------------------------------------------------------------- /read.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package pdf implements reading of PDF files. 6 | // 7 | // Overview 8 | // 9 | // PDF is Adobe's Portable Document Format, ubiquitous on the internet. 10 | // A PDF document is a complex data format built on a fairly simple structure. 11 | // This package exposes the simple structure along with some wrappers to 12 | // extract basic information. If more complex information is needed, it is 13 | // possible to extract that information by interpreting the structure exposed 14 | // by this package. 15 | // 16 | // Specifically, a PDF is a data structure built from Values, each of which has 17 | // one of the following Kinds: 18 | // 19 | // Null, for the null object. 20 | // Integer, for an integer. 21 | // Real, for a floating-point number. 22 | // Bool, for a boolean value. 23 | // Name, for a name constant (as in /Helvetica). 24 | // String, for a string constant. 25 | // Dict, for a dictionary of name-value pairs. 26 | // Array, for an array of values. 27 | // Stream, for an opaque data stream and associated header dictionary. 28 | // 29 | // The accessors on Value—Int64, Float64, Bool, Name, and so on—return 30 | // a view of the data as the given type. When there is no appropriate view, 31 | // the accessor returns a zero result. For example, the Name accessor returns 32 | // the empty string if called on a Value v for which v.Kind() != Name. 33 | // Returning zero values this way, especially from the Dict and Array accessors, 34 | // which themselves return Values, makes it possible to traverse a PDF quickly 35 | // without writing any error checking. On the other hand, it means that mistakes 36 | // can go unreported. 37 | // 38 | // The basic structure of the PDF file is exposed as the graph of Values. 39 | // 40 | // Most richer data structures in a PDF file are dictionaries with specific interpretations 41 | // of the name-value pairs. The Font and Page wrappers make the interpretation 42 | // of a specific Value as the corresponding type easier. They are only helpers, though: 43 | // they are implemented only in terms of the Value API and could be moved outside 44 | // the package. Equally important, traversal of other PDF data structures can be implemented 45 | // in other packages as needed. 46 | // 47 | package pdf 48 | 49 | // BUG(rsc): The package is incomplete, although it has been used successfully on some 50 | // large real-world PDF files. 51 | 52 | // BUG(rsc): There is no support for closing open PDF files. If you drop all references to a Reader, 53 | // the underlying reader will eventually be garbage collected. 54 | 55 | // BUG(rsc): The library makes no attempt at efficiency. A value cache maintained in the Reader 56 | // would probably help significantly. 57 | 58 | // BUG(rsc): The support for reading encrypted files is weak. 59 | 60 | // BUG(rsc): The Value API does not support error reporting. The intent is to allow users to 61 | // set an error reporting callback in Reader, but that code has not been implemented. 62 | 63 | import ( 64 | "bytes" 65 | "compress/zlib" 66 | "crypto/aes" 67 | "crypto/cipher" 68 | "crypto/md5" 69 | "crypto/rc4" 70 | "fmt" 71 | "io" 72 | "io/ioutil" 73 | "os" 74 | "sort" 75 | "strconv" 76 | ) 77 | 78 | // A Reader is a single PDF file open for reading. 79 | type Reader struct { 80 | f io.ReaderAt 81 | end int64 82 | xref []xref 83 | trailer dict 84 | trailerptr objptr 85 | key []byte 86 | useAES bool 87 | } 88 | 89 | type xref struct { 90 | ptr objptr 91 | inStream bool 92 | stream objptr 93 | offset int64 94 | } 95 | 96 | func (r *Reader) errorf(format string, args ...interface{}) { 97 | panic(fmt.Errorf(format, args...)) 98 | } 99 | 100 | // Open opens a file for reading. 101 | func Open(file string) (*Reader, error) { 102 | // TODO: Deal with closing file. 103 | f, err := os.Open(file) 104 | if err != nil { 105 | return nil, err 106 | } 107 | fi, err := f.Stat() 108 | if err != nil { 109 | f.Close() 110 | return nil, err 111 | } 112 | return NewReader(f, fi.Size()) 113 | } 114 | 115 | // NewReader opens a file for reading, using the data in f with the given total size. 116 | func NewReader(f io.ReaderAt, size int64) (*Reader, error) { 117 | return NewReaderEncrypted(f, size, nil) 118 | } 119 | 120 | // NewReaderEncrypted opens a file for reading, using the data in f with the given total size. 121 | // If the PDF is encrypted, NewReaderEncrypted calls pw repeatedly to obtain passwords 122 | // to try. If pw returns the empty string, NewReaderEncrypted stops trying to decrypt 123 | // the file and returns an error. 124 | func NewReaderEncrypted(f io.ReaderAt, size int64, pw func() string) (*Reader, error) { 125 | buf := make([]byte, 10) 126 | f.ReadAt(buf, 0) 127 | if !bytes.HasPrefix(buf, []byte("%PDF-1.")) || buf[7] < '0' || buf[7] > '7' || buf[8] != '\r' && buf[8] != '\n' { 128 | return nil, fmt.Errorf("not a PDF file: invalid header") 129 | } 130 | end := size 131 | const endChunk = 100 132 | buf = make([]byte, endChunk) 133 | f.ReadAt(buf, end-endChunk) 134 | for len(buf) > 0 && buf[len(buf)-1] == '\n' || buf[len(buf)-1] == '\r' { 135 | buf = buf[:len(buf)-1] 136 | } 137 | buf = bytes.TrimRight(buf, "\r\n\t ") 138 | if !bytes.HasSuffix(buf, []byte("%%EOF")) { 139 | return nil, fmt.Errorf("not a PDF file: missing %%%%EOF") 140 | } 141 | i := findLastLine(buf, "startxref") 142 | if i < 0 { 143 | return nil, fmt.Errorf("malformed PDF file: missing final startxref") 144 | } 145 | 146 | r := &Reader{ 147 | f: f, 148 | end: end, 149 | } 150 | pos := end - endChunk + int64(i) 151 | b := newBuffer(io.NewSectionReader(f, pos, end-pos), pos) 152 | if b.readToken() != keyword("startxref") { 153 | return nil, fmt.Errorf("malformed PDF file: missing startxref") 154 | } 155 | startxref, ok := b.readToken().(int64) 156 | if !ok { 157 | return nil, fmt.Errorf("malformed PDF file: startxref not followed by integer") 158 | } 159 | b = newBuffer(io.NewSectionReader(r.f, startxref, r.end-startxref), startxref) 160 | xref, trailerptr, trailer, err := readXref(r, b) 161 | if err != nil { 162 | return nil, err 163 | } 164 | r.xref = xref 165 | r.trailer = trailer 166 | r.trailerptr = trailerptr 167 | if trailer["Encrypt"] == nil { 168 | return r, nil 169 | } 170 | err = r.initEncrypt("") 171 | if err == nil { 172 | return r, nil 173 | } 174 | if pw == nil || err != ErrInvalidPassword { 175 | return nil, err 176 | } 177 | for { 178 | next := pw() 179 | if next == "" { 180 | break 181 | } 182 | if r.initEncrypt(next) == nil { 183 | return r, nil 184 | } 185 | } 186 | return nil, err 187 | } 188 | 189 | // Trailer returns the file's Trailer value. 190 | func (r *Reader) Trailer() Value { 191 | return Value{r, r.trailerptr, r.trailer} 192 | } 193 | 194 | func readXref(r *Reader, b *buffer) ([]xref, objptr, dict, error) { 195 | tok := b.readToken() 196 | if tok == keyword("xref") { 197 | return readXrefTable(r, b) 198 | } 199 | if _, ok := tok.(int64); ok { 200 | b.unreadToken(tok) 201 | return readXrefStream(r, b) 202 | } 203 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", tok) 204 | } 205 | 206 | func readXrefStream(r *Reader, b *buffer) ([]xref, objptr, dict, error) { 207 | obj1 := b.readObject() 208 | obj, ok := obj1.(objdef) 209 | if !ok { 210 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj1)) 211 | } 212 | strmptr := obj.ptr 213 | strm, ok := obj.obj.(stream) 214 | if !ok { 215 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj)) 216 | } 217 | if strm.hdr["Type"] != name("XRef") { 218 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream does not have type XRef") 219 | } 220 | size, ok := strm.hdr["Size"].(int64) 221 | if !ok { 222 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream missing Size") 223 | } 224 | table := make([]xref, size) 225 | 226 | table, err := readXrefStreamData(r, strm, table, size) 227 | if err != nil { 228 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err) 229 | } 230 | 231 | for prevoff := strm.hdr["Prev"]; prevoff != nil; { 232 | off, ok := prevoff.(int64) 233 | if !ok { 234 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff) 235 | } 236 | b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off) 237 | obj1 := b.readObject() 238 | obj, ok := obj1.(objdef) 239 | if !ok { 240 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj1)) 241 | } 242 | prevstrm, ok := obj.obj.(stream) 243 | if !ok { 244 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj)) 245 | } 246 | prevoff = prevstrm.hdr["Prev"] 247 | prev := Value{r, objptr{}, prevstrm} 248 | if prev.Kind() != Stream { 249 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream is not stream: %v", prev) 250 | } 251 | if prev.Key("Type").Name() != "XRef" { 252 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream does not have type XRef") 253 | } 254 | psize := prev.Key("Size").Int64() 255 | if psize > size { 256 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream larger than last stream") 257 | } 258 | if table, err = readXrefStreamData(r, prev.data.(stream), table, psize); err != nil { 259 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: reading xref prev stream: %v", err) 260 | } 261 | } 262 | 263 | return table, strmptr, strm.hdr, nil 264 | } 265 | 266 | func readXrefStreamData(r *Reader, strm stream, table []xref, size int64) ([]xref, error) { 267 | index, _ := strm.hdr["Index"].(array) 268 | if index == nil { 269 | index = array{int64(0), size} 270 | } 271 | if len(index)%2 != 0 { 272 | return nil, fmt.Errorf("invalid Index array %v", objfmt(index)) 273 | } 274 | ww, ok := strm.hdr["W"].(array) 275 | if !ok { 276 | return nil, fmt.Errorf("xref stream missing W array") 277 | } 278 | 279 | var w []int 280 | for _, x := range ww { 281 | i, ok := x.(int64) 282 | if !ok || int64(int(i)) != i { 283 | return nil, fmt.Errorf("invalid W array %v", objfmt(ww)) 284 | } 285 | w = append(w, int(i)) 286 | } 287 | if len(w) < 3 { 288 | return nil, fmt.Errorf("invalid W array %v", objfmt(ww)) 289 | } 290 | 291 | v := Value{r, objptr{}, strm} 292 | wtotal := 0 293 | for _, wid := range w { 294 | wtotal += wid 295 | } 296 | buf := make([]byte, wtotal) 297 | data := v.Reader() 298 | for len(index) > 0 { 299 | start, ok1 := index[0].(int64) 300 | n, ok2 := index[1].(int64) 301 | if !ok1 || !ok2 { 302 | return nil, fmt.Errorf("malformed Index pair %v %v %T %T", objfmt(index[0]), objfmt(index[1]), index[0], index[1]) 303 | } 304 | index = index[2:] 305 | for i := 0; i < int(n); i++ { 306 | _, err := io.ReadFull(data, buf) 307 | if err != nil { 308 | return nil, fmt.Errorf("error reading xref stream: %v", err) 309 | } 310 | v1 := decodeInt(buf[0:w[0]]) 311 | if w[0] == 0 { 312 | v1 = 1 313 | } 314 | v2 := decodeInt(buf[w[0] : w[0]+w[1]]) 315 | v3 := decodeInt(buf[w[0]+w[1] : w[0]+w[1]+w[2]]) 316 | x := int(start) + i 317 | for cap(table) <= x { 318 | table = append(table[:cap(table)], xref{}) 319 | } 320 | if table[x].ptr != (objptr{}) { 321 | continue 322 | } 323 | switch v1 { 324 | case 0: 325 | table[x] = xref{ptr: objptr{0, 65535}} 326 | case 1: 327 | table[x] = xref{ptr: objptr{uint32(x), uint16(v3)}, offset: int64(v2)} 328 | case 2: 329 | table[x] = xref{ptr: objptr{uint32(x), 0}, inStream: true, stream: objptr{uint32(v2), 0}, offset: int64(v3)} 330 | default: 331 | fmt.Printf("invalid xref stream type %d: %x\n", v1, buf) 332 | } 333 | } 334 | } 335 | return table, nil 336 | } 337 | 338 | func decodeInt(b []byte) int { 339 | x := 0 340 | for _, c := range b { 341 | x = x<<8 | int(c) 342 | } 343 | return x 344 | } 345 | 346 | func readXrefTable(r *Reader, b *buffer) ([]xref, objptr, dict, error) { 347 | var table []xref 348 | 349 | table, err := readXrefTableData(b, table) 350 | if err != nil { 351 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err) 352 | } 353 | 354 | trailer, ok := b.readObject().(dict) 355 | if !ok { 356 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref table not followed by trailer dictionary") 357 | } 358 | 359 | for prevoff := trailer["Prev"]; prevoff != nil; { 360 | off, ok := prevoff.(int64) 361 | if !ok { 362 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff) 363 | } 364 | b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off) 365 | tok := b.readToken() 366 | if tok != keyword("xref") { 367 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev does not point to xref") 368 | } 369 | table, err = readXrefTableData(b, table) 370 | if err != nil { 371 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err) 372 | } 373 | 374 | trailer, ok := b.readObject().(dict) 375 | if !ok { 376 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev table not followed by trailer dictionary") 377 | } 378 | prevoff = trailer["Prev"] 379 | } 380 | 381 | size, ok := trailer[name("Size")].(int64) 382 | if !ok { 383 | return nil, objptr{}, nil, fmt.Errorf("malformed PDF: trailer missing /Size entry") 384 | } 385 | 386 | if size < int64(len(table)) { 387 | table = table[:size] 388 | } 389 | 390 | return table, objptr{}, trailer, nil 391 | } 392 | 393 | func readXrefTableData(b *buffer, table []xref) ([]xref, error) { 394 | for { 395 | tok := b.readToken() 396 | if tok == keyword("trailer") { 397 | break 398 | } 399 | start, ok1 := tok.(int64) 400 | n, ok2 := b.readToken().(int64) 401 | if !ok1 || !ok2 { 402 | return nil, fmt.Errorf("malformed xref table") 403 | } 404 | for i := 0; i < int(n); i++ { 405 | off, ok1 := b.readToken().(int64) 406 | gen, ok2 := b.readToken().(int64) 407 | alloc, ok3 := b.readToken().(keyword) 408 | if !ok1 || !ok2 || !ok3 || alloc != keyword("f") && alloc != keyword("n") { 409 | return nil, fmt.Errorf("malformed xref table") 410 | } 411 | x := int(start) + i 412 | for cap(table) <= x { 413 | table = append(table[:cap(table)], xref{}) 414 | } 415 | if len(table) <= x { 416 | table = table[:x+1] 417 | } 418 | if alloc == "n" && table[x].offset == 0 { 419 | table[x] = xref{ptr: objptr{uint32(x), uint16(gen)}, offset: int64(off)} 420 | } 421 | } 422 | } 423 | return table, nil 424 | } 425 | 426 | func findLastLine(buf []byte, s string) int { 427 | bs := []byte(s) 428 | max := len(buf) 429 | for { 430 | i := bytes.LastIndex(buf[:max], bs) 431 | if i <= 0 || i+len(bs) >= len(buf) { 432 | return -1 433 | } 434 | if (buf[i-1] == '\n' || buf[i-1] == '\r') && (buf[i+len(bs)] == '\n' || buf[i+len(bs)] == '\r') { 435 | return i 436 | } 437 | max = i 438 | } 439 | } 440 | 441 | // A Value is a single PDF value, such as an integer, dictionary, or array. 442 | // The zero Value is a PDF null (Kind() == Null, IsNull() = true). 443 | type Value struct { 444 | r *Reader 445 | ptr objptr 446 | data interface{} 447 | } 448 | 449 | // IsNull reports whether the value is a null. It is equivalent to Kind() == Null. 450 | func (v Value) IsNull() bool { 451 | return v.data == nil 452 | } 453 | 454 | // A ValueKind specifies the kind of data underlying a Value. 455 | type ValueKind int 456 | 457 | // The PDF value kinds. 458 | const ( 459 | Null ValueKind = iota 460 | Bool 461 | Integer 462 | Real 463 | String 464 | Name 465 | Dict 466 | Array 467 | Stream 468 | ) 469 | 470 | // Kind reports the kind of value underlying v. 471 | func (v Value) Kind() ValueKind { 472 | switch v.data.(type) { 473 | default: 474 | return Null 475 | case bool: 476 | return Bool 477 | case int64: 478 | return Integer 479 | case float64: 480 | return Real 481 | case string: 482 | return String 483 | case name: 484 | return Name 485 | case dict: 486 | return Dict 487 | case array: 488 | return Array 489 | case stream: 490 | return Stream 491 | } 492 | } 493 | 494 | // String returns a textual representation of the value v. 495 | // Note that String is not the accessor for values with Kind() == String. 496 | // To access such values, see RawString, Text, and TextFromUTF16. 497 | func (v Value) String() string { 498 | return objfmt(v.data) 499 | } 500 | 501 | func objfmt(x interface{}) string { 502 | switch x := x.(type) { 503 | default: 504 | return fmt.Sprint(x) 505 | case string: 506 | if isPDFDocEncoded(x) { 507 | return strconv.Quote(pdfDocDecode(x)) 508 | } 509 | if isUTF16(x) { 510 | return strconv.Quote(utf16Decode(x[2:])) 511 | } 512 | return strconv.Quote(x) 513 | case name: 514 | return "/" + string(x) 515 | case dict: 516 | var keys []string 517 | for k := range x { 518 | keys = append(keys, string(k)) 519 | } 520 | sort.Strings(keys) 521 | var buf bytes.Buffer 522 | buf.WriteString("<<") 523 | for i, k := range keys { 524 | elem := x[name(k)] 525 | if i > 0 { 526 | buf.WriteString(" ") 527 | } 528 | buf.WriteString("/") 529 | buf.WriteString(k) 530 | buf.WriteString(" ") 531 | buf.WriteString(objfmt(elem)) 532 | } 533 | buf.WriteString(">>") 534 | return buf.String() 535 | 536 | case array: 537 | var buf bytes.Buffer 538 | buf.WriteString("[") 539 | for i, elem := range x { 540 | if i > 0 { 541 | buf.WriteString(" ") 542 | } 543 | buf.WriteString(objfmt(elem)) 544 | } 545 | buf.WriteString("]") 546 | return buf.String() 547 | 548 | case stream: 549 | return fmt.Sprintf("%v@%d", objfmt(x.hdr), x.offset) 550 | 551 | case objptr: 552 | return fmt.Sprintf("%d %d R", x.id, x.gen) 553 | 554 | case objdef: 555 | return fmt.Sprintf("{%d %d obj}%v", x.ptr.id, x.ptr.gen, objfmt(x.obj)) 556 | } 557 | } 558 | 559 | // Bool returns v's boolean value. 560 | // If v.Kind() != Bool, Bool returns false. 561 | func (v Value) Bool() bool { 562 | x, ok := v.data.(bool) 563 | if !ok { 564 | return false 565 | } 566 | return x 567 | } 568 | 569 | // Int64 returns v's int64 value. 570 | // If v.Kind() != Int64, Int64 returns 0. 571 | func (v Value) Int64() int64 { 572 | x, ok := v.data.(int64) 573 | if !ok { 574 | return 0 575 | } 576 | return x 577 | } 578 | 579 | // Float64 returns v's float64 value, converting from integer if necessary. 580 | // If v.Kind() != Float64 and v.Kind() != Int64, Float64 returns 0. 581 | func (v Value) Float64() float64 { 582 | x, ok := v.data.(float64) 583 | if !ok { 584 | x, ok := v.data.(int64) 585 | if ok { 586 | return float64(x) 587 | } 588 | return 0 589 | } 590 | return x 591 | } 592 | 593 | // RawString returns v's string value. 594 | // If v.Kind() != String, RawString returns the empty string. 595 | func (v Value) RawString() string { 596 | x, ok := v.data.(string) 597 | if !ok { 598 | return "" 599 | } 600 | return x 601 | } 602 | 603 | // Text returns v's string value interpreted as a ``text string'' (defined in the PDF spec) 604 | // and converted to UTF-8. 605 | // If v.Kind() != String, Text returns the empty string. 606 | func (v Value) Text() string { 607 | x, ok := v.data.(string) 608 | if !ok { 609 | return "" 610 | } 611 | if isPDFDocEncoded(x) { 612 | return pdfDocDecode(x) 613 | } 614 | if isUTF16(x) { 615 | return utf16Decode(x[2:]) 616 | } 617 | return x 618 | } 619 | 620 | // TextFromUTF16 returns v's string value interpreted as big-endian UTF-16 621 | // and then converted to UTF-8. 622 | // If v.Kind() != String or if the data is not valid UTF-16, TextFromUTF16 returns 623 | // the empty string. 624 | func (v Value) TextFromUTF16() string { 625 | x, ok := v.data.(string) 626 | if !ok { 627 | return "" 628 | } 629 | if len(x)%2 == 1 { 630 | return "" 631 | } 632 | if x == "" { 633 | return "" 634 | } 635 | return utf16Decode(x) 636 | } 637 | 638 | // Name returns v's name value. 639 | // If v.Kind() != Name, Name returns the empty string. 640 | // The returned name does not include the leading slash: 641 | // if v corresponds to the name written using the syntax /Helvetica, 642 | // Name() == "Helvetica". 643 | func (v Value) Name() string { 644 | x, ok := v.data.(name) 645 | if !ok { 646 | return "" 647 | } 648 | return string(x) 649 | } 650 | 651 | // Key returns the value associated with the given name key in the dictionary v. 652 | // Like the result of the Name method, the key should not include a leading slash. 653 | // If v is a stream, Key applies to the stream's header dictionary. 654 | // If v.Kind() != Dict and v.Kind() != Stream, Key returns a null Value. 655 | func (v Value) Key(key string) Value { 656 | x, ok := v.data.(dict) 657 | if !ok { 658 | strm, ok := v.data.(stream) 659 | if !ok { 660 | return Value{} 661 | } 662 | x = strm.hdr 663 | } 664 | return v.r.resolve(v.ptr, x[name(key)]) 665 | } 666 | 667 | // Keys returns a sorted list of the keys in the dictionary v. 668 | // If v is a stream, Keys applies to the stream's header dictionary. 669 | // If v.Kind() != Dict and v.Kind() != Stream, Keys returns nil. 670 | func (v Value) Keys() []string { 671 | x, ok := v.data.(dict) 672 | if !ok { 673 | strm, ok := v.data.(stream) 674 | if !ok { 675 | return nil 676 | } 677 | x = strm.hdr 678 | } 679 | keys := []string{} // not nil 680 | for k := range x { 681 | keys = append(keys, string(k)) 682 | } 683 | sort.Strings(keys) 684 | return keys 685 | } 686 | 687 | // Index returns the i'th element in the array v. 688 | // If v.Kind() != Array or if i is outside the array bounds, 689 | // Index returns a null Value. 690 | func (v Value) Index(i int) Value { 691 | x, ok := v.data.(array) 692 | if !ok || i < 0 || i >= len(x) { 693 | return Value{} 694 | } 695 | return v.r.resolve(v.ptr, x[i]) 696 | } 697 | 698 | // Len returns the length of the array v. 699 | // If v.Kind() != Array, Len returns a null Value. 700 | func (v Value) Len() int { 701 | x, ok := v.data.(array) 702 | if !ok { 703 | return 0 704 | } 705 | return len(x) 706 | } 707 | 708 | func (r *Reader) resolve(parent objptr, x interface{}) Value { 709 | if ptr, ok := x.(objptr); ok { 710 | if ptr.id >= uint32(len(r.xref)) { 711 | return Value{} 712 | } 713 | xref := r.xref[ptr.id] 714 | if xref.ptr != ptr || !xref.inStream && xref.offset == 0 { 715 | return Value{} 716 | } 717 | var obj object 718 | if xref.inStream { 719 | strm := r.resolve(parent, xref.stream) 720 | Search: 721 | for { 722 | if strm.Kind() != Stream { 723 | panic("not a stream") 724 | } 725 | if strm.Key("Type").Name() != "ObjStm" { 726 | panic("not an object stream") 727 | } 728 | n := int(strm.Key("N").Int64()) 729 | first := strm.Key("First").Int64() 730 | if first == 0 { 731 | panic("missing First") 732 | } 733 | b := newBuffer(strm.Reader(), 0) 734 | b.allowEOF = true 735 | for i := 0; i < n; i++ { 736 | id, _ := b.readToken().(int64) 737 | off, _ := b.readToken().(int64) 738 | if uint32(id) == ptr.id { 739 | b.seekForward(first + off) 740 | x = b.readObject() 741 | break Search 742 | } 743 | } 744 | ext := strm.Key("Extends") 745 | if ext.Kind() != Stream { 746 | panic("cannot find object in stream") 747 | } 748 | strm = ext 749 | } 750 | } else { 751 | b := newBuffer(io.NewSectionReader(r.f, xref.offset, r.end-xref.offset), xref.offset) 752 | b.key = r.key 753 | b.useAES = r.useAES 754 | obj = b.readObject() 755 | def, ok := obj.(objdef) 756 | if !ok { 757 | panic(fmt.Errorf("loading %v: found %T instead of objdef", ptr, obj)) 758 | return Value{} 759 | } 760 | if def.ptr != ptr { 761 | panic(fmt.Errorf("loading %v: found %v", ptr, def.ptr)) 762 | } 763 | x = def.obj 764 | } 765 | parent = ptr 766 | } 767 | 768 | switch x := x.(type) { 769 | case nil, bool, int64, float64, name, dict, array, stream: 770 | return Value{r, parent, x} 771 | case string: 772 | return Value{r, parent, x} 773 | default: 774 | panic(fmt.Errorf("unexpected value type %T in resolve", x)) 775 | } 776 | } 777 | 778 | type errorReadCloser struct { 779 | err error 780 | } 781 | 782 | func (e *errorReadCloser) Read([]byte) (int, error) { 783 | return 0, e.err 784 | } 785 | 786 | func (e *errorReadCloser) Close() error { 787 | return e.err 788 | } 789 | 790 | // Reader returns the data contained in the stream v. 791 | // If v.Kind() != Stream, Reader returns a ReadCloser that 792 | // responds to all reads with a ``stream not present'' error. 793 | func (v Value) Reader() io.ReadCloser { 794 | x, ok := v.data.(stream) 795 | if !ok { 796 | return &errorReadCloser{fmt.Errorf("stream not present")} 797 | } 798 | var rd io.Reader 799 | rd = io.NewSectionReader(v.r.f, x.offset, v.Key("Length").Int64()) 800 | if v.r.key != nil { 801 | rd = decryptStream(v.r.key, v.r.useAES, x.ptr, rd) 802 | } 803 | filter := v.Key("Filter") 804 | param := v.Key("DecodeParms") 805 | switch filter.Kind() { 806 | default: 807 | panic(fmt.Errorf("unsupported filter %v", filter)) 808 | case Null: 809 | // ok 810 | case Name: 811 | rd = applyFilter(rd, filter.Name(), param) 812 | case Array: 813 | for i := 0; i < filter.Len(); i++ { 814 | rd = applyFilter(rd, filter.Index(i).Name(), param.Index(i)) 815 | } 816 | } 817 | 818 | return ioutil.NopCloser(rd) 819 | } 820 | 821 | func applyFilter(rd io.Reader, name string, param Value) io.Reader { 822 | switch name { 823 | default: 824 | panic("unknown filter " + name) 825 | case "FlateDecode": 826 | zr, err := zlib.NewReader(rd) 827 | if err != nil { 828 | panic(err) 829 | } 830 | pred := param.Key("Predictor") 831 | if pred.Kind() == Null { 832 | return zr 833 | } 834 | columns := param.Key("Columns").Int64() 835 | switch pred.Int64() { 836 | default: 837 | fmt.Println("unknown predictor", pred) 838 | panic("pred") 839 | case 12: 840 | return &pngUpReader{r: zr, hist: make([]byte, 1+columns), tmp: make([]byte, 1+columns)} 841 | } 842 | } 843 | } 844 | 845 | type pngUpReader struct { 846 | r io.Reader 847 | hist []byte 848 | tmp []byte 849 | pend []byte 850 | } 851 | 852 | func (r *pngUpReader) Read(b []byte) (int, error) { 853 | n := 0 854 | for len(b) > 0 { 855 | if len(r.pend) > 0 { 856 | m := copy(b, r.pend) 857 | n += m 858 | b = b[m:] 859 | r.pend = r.pend[m:] 860 | continue 861 | } 862 | _, err := io.ReadFull(r.r, r.tmp) 863 | if err != nil { 864 | return n, err 865 | } 866 | if r.tmp[0] != 2 { 867 | return n, fmt.Errorf("malformed PNG-Up encoding") 868 | } 869 | for i, b := range r.tmp { 870 | r.hist[i] += b 871 | } 872 | r.pend = r.hist[1:] 873 | } 874 | return n, nil 875 | } 876 | 877 | var passwordPad = []byte{ 878 | 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08, 879 | 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A, 880 | } 881 | 882 | func (r *Reader) initEncrypt(password string) error { 883 | // See PDF 32000-1:2008, §7.6. 884 | encrypt, _ := r.resolve(objptr{}, r.trailer["Encrypt"]).data.(dict) 885 | if encrypt["Filter"] != name("Standard") { 886 | return fmt.Errorf("unsupported PDF: encryption filter %v", objfmt(encrypt["Filter"])) 887 | } 888 | n, _ := encrypt["Length"].(int64) 889 | if n == 0 { 890 | n = 40 891 | } 892 | if n%8 != 0 || n > 128 || n < 40 { 893 | return fmt.Errorf("malformed PDF: %d-bit encryption key", n) 894 | } 895 | V, _ := encrypt["V"].(int64) 896 | if V != 1 && V != 2 && (V != 4 || !okayV4(encrypt)) { 897 | return fmt.Errorf("unsupported PDF: encryption version V=%d; %v", V, objfmt(encrypt)) 898 | } 899 | 900 | ids, ok := r.trailer["ID"].(array) 901 | if !ok || len(ids) < 1 { 902 | return fmt.Errorf("malformed PDF: missing ID in trailer") 903 | } 904 | idstr, ok := ids[0].(string) 905 | if !ok { 906 | return fmt.Errorf("malformed PDF: missing ID in trailer") 907 | } 908 | ID := []byte(idstr) 909 | 910 | R, _ := encrypt["R"].(int64) 911 | if R < 2 { 912 | return fmt.Errorf("malformed PDF: encryption revision R=%d", R) 913 | } 914 | if R > 4 { 915 | return fmt.Errorf("unsupported PDF: encryption revision R=%d", R) 916 | } 917 | O, _ := encrypt["O"].(string) 918 | U, _ := encrypt["U"].(string) 919 | if len(O) != 32 || len(U) != 32 { 920 | return fmt.Errorf("malformed PDF: missing O= or U= encryption parameters") 921 | } 922 | p, _ := encrypt["P"].(int64) 923 | P := uint32(p) 924 | 925 | // TODO: Password should be converted to Latin-1. 926 | pw := []byte(password) 927 | h := md5.New() 928 | if len(pw) >= 32 { 929 | h.Write(pw[:32]) 930 | } else { 931 | h.Write(pw) 932 | h.Write(passwordPad[:32-len(pw)]) 933 | } 934 | h.Write([]byte(O)) 935 | h.Write([]byte{byte(P), byte(P >> 8), byte(P >> 16), byte(P >> 24)}) 936 | h.Write([]byte(ID)) 937 | key := h.Sum(nil) 938 | 939 | if R >= 3 { 940 | for i := 0; i < 50; i++ { 941 | h.Reset() 942 | h.Write(key[:n/8]) 943 | key = h.Sum(key[:0]) 944 | } 945 | key = key[:n/8] 946 | } else { 947 | key = key[:40/8] 948 | } 949 | 950 | c, err := rc4.NewCipher(key) 951 | if err != nil { 952 | return fmt.Errorf("malformed PDF: invalid RC4 key: %v", err) 953 | } 954 | 955 | var u []byte 956 | if R == 2 { 957 | u = make([]byte, 32) 958 | copy(u, passwordPad) 959 | c.XORKeyStream(u, u) 960 | } else { 961 | h.Reset() 962 | h.Write(passwordPad) 963 | h.Write([]byte(ID)) 964 | u = h.Sum(nil) 965 | c.XORKeyStream(u, u) 966 | 967 | for i := 1; i <= 19; i++ { 968 | key1 := make([]byte, len(key)) 969 | copy(key1, key) 970 | for j := range key1 { 971 | key1[j] ^= byte(i) 972 | } 973 | c, _ = rc4.NewCipher(key1) 974 | c.XORKeyStream(u, u) 975 | } 976 | } 977 | 978 | if !bytes.HasPrefix([]byte(U), u) { 979 | return ErrInvalidPassword 980 | } 981 | 982 | r.key = key 983 | r.useAES = V == 4 984 | 985 | return nil 986 | } 987 | 988 | var ErrInvalidPassword = fmt.Errorf("encrypted PDF: invalid password") 989 | 990 | func okayV4(encrypt dict) bool { 991 | cf, ok := encrypt["CF"].(dict) 992 | if !ok { 993 | return false 994 | } 995 | stmf, ok := encrypt["StmF"].(name) 996 | if !ok { 997 | return false 998 | } 999 | strf, ok := encrypt["StrF"].(name) 1000 | if !ok { 1001 | return false 1002 | } 1003 | if stmf != strf { 1004 | return false 1005 | } 1006 | cfparam, ok := cf[stmf].(dict) 1007 | if cfparam["AuthEvent"] != nil && cfparam["AuthEvent"] != name("DocOpen") { 1008 | return false 1009 | } 1010 | if cfparam["Length"] != nil && cfparam["Length"] != int64(16) { 1011 | return false 1012 | } 1013 | if cfparam["CFM"] != name("AESV2") { 1014 | return false 1015 | } 1016 | return true 1017 | } 1018 | 1019 | func cryptKey(key []byte, useAES bool, ptr objptr) []byte { 1020 | h := md5.New() 1021 | h.Write(key) 1022 | h.Write([]byte{byte(ptr.id), byte(ptr.id >> 8), byte(ptr.id >> 16), byte(ptr.gen), byte(ptr.gen >> 8)}) 1023 | if useAES { 1024 | h.Write([]byte("sAlT")) 1025 | } 1026 | return h.Sum(nil) 1027 | } 1028 | 1029 | func decryptString(key []byte, useAES bool, ptr objptr, x string) string { 1030 | key = cryptKey(key, useAES, ptr) 1031 | if useAES { 1032 | panic("AES not implemented") 1033 | } else { 1034 | c, _ := rc4.NewCipher(key) 1035 | data := []byte(x) 1036 | c.XORKeyStream(data, data) 1037 | x = string(data) 1038 | } 1039 | return x 1040 | } 1041 | 1042 | func decryptStream(key []byte, useAES bool, ptr objptr, rd io.Reader) io.Reader { 1043 | key = cryptKey(key, useAES, ptr) 1044 | if useAES { 1045 | cb, err := aes.NewCipher(key) 1046 | if err != nil { 1047 | panic("AES: " + err.Error()) 1048 | } 1049 | iv := make([]byte, 16) 1050 | io.ReadFull(rd, iv) 1051 | cbc := cipher.NewCBCDecrypter(cb, iv) 1052 | rd = &cbcReader{cbc: cbc, rd: rd, buf: make([]byte, 16)} 1053 | } else { 1054 | c, _ := rc4.NewCipher(key) 1055 | rd = &cipher.StreamReader{c, rd} 1056 | } 1057 | return rd 1058 | } 1059 | 1060 | type cbcReader struct { 1061 | cbc cipher.BlockMode 1062 | rd io.Reader 1063 | buf []byte 1064 | pend []byte 1065 | } 1066 | 1067 | func (r *cbcReader) Read(b []byte) (n int, err error) { 1068 | if len(r.pend) == 0 { 1069 | _, err = io.ReadFull(r.rd, r.buf) 1070 | if err != nil { 1071 | return 0, err 1072 | } 1073 | r.cbc.CryptBlocks(r.buf, r.buf) 1074 | r.pend = r.buf 1075 | } 1076 | n = copy(b, r.pend) 1077 | r.pend = r.pend[n:] 1078 | return n, nil 1079 | } 1080 | --------------------------------------------------------------------------------