├── go.mod ├── .gitignore ├── testdata ├── basic.xls ├── basic.xlsx ├── basic2.xls ├── basic2.xlsx ├── testing.xls ├── multi_test.xls ├── multi_test.xlsx ├── basic.tsv ├── testing.tsv ├── basic2.tsv └── multi_test.tsv ├── .github └── workflows │ └── go.yml ├── xls ├── comp_test.go ├── cfb │ ├── interface.go │ ├── slicereader.go │ ├── simple_test.go │ └── cfb.go ├── simple_test.go ├── structs.go ├── crypto │ ├── rc4.go │ └── crypto.go ├── hyperlinks.go ├── strings.go ├── xls.go ├── sheets.go └── records.go ├── xlsx ├── comp_test.go ├── simple_test.go ├── types.go ├── xlsx.go ├── workbook.go └── sheets.go ├── LICENSE ├── errs.go ├── simple ├── tsv.go ├── csv.go └── simple.go ├── cmd ├── grater │ └── main.go └── grate2tsv │ └── main.go ├── README.md ├── commonxl ├── frac_test.go ├── numbers.go ├── dates.go ├── fmt_test.go ├── sheet.go ├── fmt.go ├── formats.go └── cell.go └── grate.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pbnjay/grate 2 | 3 | go 1.16 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cmd/grate2tsv/results 2 | testdata 3 | 4 | *.pprof 5 | *.pdf 6 | -------------------------------------------------------------------------------- /testdata/basic.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/basic.xls -------------------------------------------------------------------------------- /testdata/basic.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/basic.xlsx -------------------------------------------------------------------------------- /testdata/basic2.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/basic2.xls -------------------------------------------------------------------------------- /testdata/basic2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/basic2.xlsx -------------------------------------------------------------------------------- /testdata/testing.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/testing.xls -------------------------------------------------------------------------------- /testdata/multi_test.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/multi_test.xls -------------------------------------------------------------------------------- /testdata/multi_test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbnjay/grate/HEAD/testdata/multi_test.xlsx -------------------------------------------------------------------------------- /testdata/basic.tsv: -------------------------------------------------------------------------------- 1 | a b c d 2 | 1 Hello 42.0 0.0 3 | 2 World 99.1 0.01 4 | 3 This 7e8 0.001 5 | 4 Tests 2.4e-8 0.0001 6 | 5 Text 0.0001 0.00001 -------------------------------------------------------------------------------- /testdata/testing.tsv: -------------------------------------------------------------------------------- 1 | title 1 title 2 title 3 title 4 2 | c c c c 3 | b 2 3 4 4 | b 2 j 4 5 | b 1 2 1 6 | b 4 3 2 7 | 1 1 1 1 8 | -------------------------------------------------------------------------------- /testdata/basic2.tsv: -------------------------------------------------------------------------------- 1 | a b c d 2 | 1 Hello 42.0 0.00000 3 | 2 World 99.1 0.01000 4 | 3 This 7E+08 0.00100 5 | 4 Tests 2.4E-08 0.00010 6 | 5 Text 0.0001 0.00001 -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.17 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test XLS 25 | run: go test -v ./xls 26 | 27 | - name: Test XLSX 28 | run: go test -v ./xlsx 29 | 30 | - name: Test CommonXL 31 | run: go test -v ./commonxl 32 | -------------------------------------------------------------------------------- /testdata/multi_test.tsv: -------------------------------------------------------------------------------- 1 | Integers Floats Fractions Dates v1 2 | One 1 1.234 1/2 "July 11, 2004" TRUE 3 | Two 4 1.2345678 42 1/3 11 Jul 00 TRUE 4 | Three 6 1.0 1/5 9-Sep FALSE 5 | Two Words 99 -42.1 -4 1/5 Jun-42 TRUE 6 | Three Small Words 123456789 123456789.0 9999 9/10 1-1-01 FALSE 7 | This is a longer sentence that fills the cell and overflows 1000000000000 1000000000000.0 8 | Some merged cells (tall version) 123456789 7 38 2/15 "July 12, 2004" 9 | 5 123456790.234 1/5 22 Aug 00 10 | 1000000000001 1111 1/10 11 | 12 | Merged columns (single row) 13 | 14 | Merged cells again (big box) custom yes/no bools! 15 | A yes 16 | B no 17 | C 18 | D 19 | -------------------------------------------------------------------------------- /xls/comp_test.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestAllFiles(t *testing.T) { 11 | err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error { 12 | if info.IsDir() { 13 | return nil 14 | } 15 | if !strings.HasSuffix(info.Name(), ".xls") { 16 | return nil 17 | } 18 | wb, err := Open(p) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | sheets, err := wb.List() 24 | if err != nil { 25 | return err 26 | } 27 | for _, s := range sheets { 28 | sheet, err := wb.Get(s) 29 | if err != nil { 30 | return err 31 | } 32 | 33 | for sheet.Next() { 34 | sheet.Strings() 35 | } 36 | } 37 | 38 | return wb.Close() 39 | }) 40 | if err != nil { 41 | t.Fatal(err) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /xlsx/comp_test.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestAllFiles(t *testing.T) { 11 | err := filepath.Walk("../testdata", func(p string, info os.FileInfo, err error) error { 12 | if info.IsDir() { 13 | return nil 14 | } 15 | if !strings.HasSuffix(info.Name(), ".xlsx") { 16 | return nil 17 | } 18 | wb, err := Open(p) 19 | if err != nil { 20 | return err 21 | } 22 | 23 | sheets, err := wb.List() 24 | if err != nil { 25 | return err 26 | } 27 | for _, s := range sheets { 28 | sheet, err := wb.Get(s) 29 | if err != nil { 30 | return err 31 | } 32 | 33 | for sheet.Next() { 34 | sheet.Strings() 35 | } 36 | } 37 | 38 | return wb.Close() 39 | }) 40 | if err != nil { 41 | t.Fatal(err) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jeremy Jay 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /xls/cfb/interface.go: -------------------------------------------------------------------------------- 1 | package cfb 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "os" 7 | ) 8 | 9 | // Open a Compound File Binary Format document. 10 | func Open(filename string) (*Document, error) { 11 | d := &Document{} 12 | f, err := os.Open(filename) 13 | if err != nil { 14 | return nil, err 15 | } 16 | err = d.load(f) 17 | if err != nil { 18 | return nil, err 19 | } 20 | return d, nil 21 | } 22 | 23 | // List the streams contained in the document. 24 | func (d *Document) List() ([]string, error) { 25 | var res []string 26 | for _, e := range d.dir { 27 | if e.ObjectType == typeStream { 28 | res = append(res, e.String()) 29 | } 30 | } 31 | return res, nil 32 | } 33 | 34 | // Open the named stream contained in the document. 35 | func (d *Document) Open(name string) (io.ReadSeeker, error) { 36 | for _, e := range d.dir { 37 | if e.String() == name && e.ObjectType == typeStream { 38 | if e.StreamSize < uint64(d.header.MiniStreamCutoffSize) { 39 | return d.getMiniStreamReader(uint32(e.StartingSectorLocation), e.StreamSize) 40 | } else if e.StreamSize != 0 { 41 | return d.getStreamReader(uint32(e.StartingSectorLocation), e.StreamSize) 42 | } 43 | } 44 | } 45 | return nil, fmt.Errorf("cfb: stream '%s' not found", name) 46 | } 47 | -------------------------------------------------------------------------------- /errs.go: -------------------------------------------------------------------------------- 1 | package grate 2 | 3 | import "errors" 4 | 5 | var ( 6 | // configure at build time by adding go build arguments: 7 | // -ldflags="-X github.com/pbnjay/grate.loglevel=debug" 8 | loglevel string = "warn" 9 | 10 | // Debug should be set to true to expose detailed logging. 11 | Debug bool = (loglevel == "debug") 12 | ) 13 | 14 | // ErrInvalidScanType is returned by Scan for invalid arguments. 15 | var ErrInvalidScanType = errors.New("grate: Scan only supports *bool, *int, *float64, *string, *time.Time arguments") 16 | 17 | // ErrNotInFormat is used to auto-detect file types using the defined OpenFunc 18 | // It is returned by OpenFunc when the code does not detect correct file formats. 19 | var ErrNotInFormat = errors.New("grate: file is not in this format") 20 | 21 | // ErrUnknownFormat is used when grate does not know how to open a file format. 22 | var ErrUnknownFormat = errors.New("grate: file format is not known/supported") 23 | 24 | type errx struct { 25 | errs []error 26 | } 27 | 28 | func (e errx) Error() string { 29 | return e.errs[0].Error() 30 | } 31 | func (e errx) Unwrap() error { 32 | if len(e.errs) > 1 { 33 | return e.errs[1] 34 | } 35 | return nil 36 | } 37 | 38 | // WrapErr wraps a set of errors. 39 | func WrapErr(e ...error) error { 40 | if len(e) == 1 { 41 | return e[0] 42 | } 43 | return errx{errs: e} 44 | } 45 | -------------------------------------------------------------------------------- /simple/tsv.go: -------------------------------------------------------------------------------- 1 | package simple 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "strings" 7 | 8 | "github.com/pbnjay/grate" 9 | ) 10 | 11 | var _ = grate.Register("tsv", 10, OpenTSV) 12 | 13 | // OpenTSV defines a Source's instantiation function. 14 | // It should return ErrNotInFormat immediately if filename is not of the correct file type. 15 | func OpenTSV(filename string) (grate.Source, error) { 16 | f, err := os.Open(filename) 17 | if err != nil { 18 | return nil, err 19 | } 20 | defer f.Close() 21 | t := &simpleFile{ 22 | filename: filename, 23 | iterRow: -1, 24 | } 25 | 26 | s := bufio.NewScanner(f) 27 | total := 0 28 | ncols := make(map[int]int) 29 | for s.Scan() { 30 | r := strings.Split(s.Text(), "\t") 31 | ncols[len(r)]++ 32 | total++ 33 | t.rows = append(t.rows, r) 34 | } 35 | if s.Err() != nil { 36 | // this can only be read errors, not format 37 | return nil, s.Err() 38 | } 39 | 40 | // kinda arbitrary metrics for detecting TSV 41 | looksGood := 0 42 | for c, n := range ncols { 43 | if c <= 1 { 44 | continue 45 | } 46 | if n > 10 && float64(n)/float64(total) > 0.8 { 47 | // more than 80% of rows have the same number of columns, we're good 48 | looksGood = 2 49 | } else if n > 25 && looksGood == 0 { 50 | looksGood = 1 51 | } 52 | } 53 | if looksGood == 1 { 54 | return t, grate.ErrNotInFormat 55 | } 56 | 57 | return t, nil 58 | } 59 | -------------------------------------------------------------------------------- /cmd/grater/main.go: -------------------------------------------------------------------------------- 1 | // Command grater extracts contents of the tabular files to stdout. 2 | package main 3 | 4 | import ( 5 | "flag" 6 | "fmt" 7 | "os" 8 | "strings" 9 | 10 | "github.com/pbnjay/grate" 11 | _ "github.com/pbnjay/grate/simple" // tsv and csv support 12 | _ "github.com/pbnjay/grate/xls" 13 | _ "github.com/pbnjay/grate/xlsx" 14 | ) 15 | 16 | func main() { 17 | flagDebug := flag.Bool("v", false, "debug log") 18 | flag.Parse() 19 | if flag.NArg() < 1 { 20 | fmt.Fprintf(os.Stderr, "USAGE: %s [file1.xls file2.xlsx file3.tsv ...]\n", os.Args[0]) 21 | fmt.Fprintf(os.Stderr, " Extracts contents of the tabular files to stdout\n") 22 | os.Exit(1) 23 | } 24 | grate.Debug = *flagDebug 25 | for _, fn := range flag.Args() { 26 | wb, err := grate.Open(fn) 27 | if err != nil { 28 | fmt.Fprintln(os.Stderr, err) 29 | continue 30 | } 31 | 32 | sheets, err := wb.List() 33 | if err != nil { 34 | wb.Close() 35 | fmt.Fprintln(os.Stderr, err) 36 | continue 37 | } 38 | 39 | for _, s := range sheets { 40 | sheet, err := wb.Get(s) 41 | if err != nil { 42 | fmt.Fprintln(os.Stderr, err) 43 | continue 44 | } 45 | 46 | for sheet.Next() { 47 | if *flagDebug { 48 | dtypes := sheet.Types() 49 | fmt.Println(strings.Join(dtypes, "\t")) 50 | } 51 | row := sheet.Strings() 52 | fmt.Println(strings.Join(row, "\t")) 53 | } 54 | } 55 | wb.Close() 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /simple/csv.go: -------------------------------------------------------------------------------- 1 | package simple 2 | 3 | import ( 4 | "encoding/csv" 5 | "os" 6 | 7 | "github.com/pbnjay/grate" 8 | ) 9 | 10 | var _ = grate.Register("csv", 15, OpenCSV) 11 | 12 | // OpenCSV defines a Source's instantiation function. 13 | // It should return ErrNotInFormat immediately if filename is not of the correct file type. 14 | func OpenCSV(filename string) (grate.Source, error) { 15 | f, err := os.Open(filename) 16 | if err != nil { 17 | return nil, err 18 | } 19 | defer f.Close() 20 | t := &simpleFile{ 21 | filename: filename, 22 | iterRow: -1, 23 | } 24 | 25 | s := csv.NewReader(f) 26 | s.FieldsPerRecord = -1 27 | 28 | total := 0 29 | ncols := make(map[int]int) 30 | rec, err := s.Read() 31 | for ; err == nil; rec, err = s.Read() { 32 | ncols[len(rec)]++ 33 | total++ 34 | t.rows = append(t.rows, rec) 35 | } 36 | if err != nil { 37 | switch perr := err.(type) { 38 | case *csv.ParseError: 39 | return nil, grate.WrapErr(perr, grate.ErrNotInFormat) 40 | } 41 | if total < 10 { 42 | // probably? not in this format 43 | return nil, grate.WrapErr(err, grate.ErrNotInFormat) 44 | } 45 | return nil, err 46 | } 47 | 48 | // kinda arbitrary metrics for detecting CSV 49 | looksGood := 0 50 | for c, n := range ncols { 51 | if c <= 1 { 52 | continue 53 | } 54 | if n > 10 && float64(n)/float64(total) > 0.8 { 55 | // more than 80% of rows have the same number of columns, we're good 56 | looksGood = 2 57 | } else if n > 25 && looksGood == 0 { 58 | looksGood = 1 59 | } 60 | } 61 | if looksGood == 1 { 62 | return t, grate.ErrNotInFormat 63 | } 64 | 65 | return t, nil 66 | } 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # grate 2 | 3 | A Go native tabular data extraction package. Currently supports `.xls`, `.xlsx`, `.csv`, `.tsv` formats. 4 | 5 | # Why? 6 | 7 | Grate focuses on speed and stability first, and makes no attempt to parse charts, figures, or other content types that may be present embedded within the input files. It tries to perform as few allocations as possible and errs on the side of caution. 8 | 9 | There are certainly still some bugs and edge cases, but we have run it successfully on a set of 400k `.xls` and `.xlsx` files to catch many bugs and error conditions. Please file an issue with any feedback and additional problem files. 10 | 11 | # Usage 12 | 13 | Grate provides a simple standard interface for all supported filetypes, allowing access to both named worksheets in spreadsheets and single tables in plaintext formats. 14 | 15 | ```go 16 | package main 17 | 18 | import ( 19 | "fmt" 20 | "os" 21 | "strings" 22 | 23 | "github.com/pbnjay/grate" 24 | _ "github.com/pbnjay/grate/simple" // tsv and csv support 25 | _ "github.com/pbnjay/grate/xls" 26 | _ "github.com/pbnjay/grate/xlsx" 27 | ) 28 | 29 | func main() { 30 | wb, _ := grate.Open(os.Args[1]) // open the file 31 | sheets, _ := wb.List() // list available sheets 32 | for _, s := range sheets { // enumerate each sheet name 33 | sheet, _ := wb.Get(s) // open the sheet 34 | for sheet.Next() { // enumerate each row of data 35 | row := sheet.Strings() // get the row's content as []string 36 | fmt.Println(strings.Join(row, "\t")) 37 | } 38 | } 39 | wb.Close() 40 | } 41 | ``` 42 | 43 | # License 44 | 45 | All source code is licensed under the [MIT License](https://raw.github.com/pbnjay/grate/master/LICENSE). 46 | -------------------------------------------------------------------------------- /commonxl/frac_test.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | type testcaseFrac struct { 9 | v float64 10 | s string 11 | n int 12 | } 13 | 14 | var fracs = []testcaseFrac{ 15 | {0, "0", 1}, 16 | {0.5, "1/2", 1}, 17 | {-0.5, "-1/2", 1}, 18 | {0.125, "1/8", 1}, 19 | 20 | {10, "10", 1}, 21 | {-10, "-10", 1}, 22 | {10.5, "10 1/2", 1}, 23 | {-10.5, "-10 1/2", 1}, 24 | 25 | {10.25, "10 1/4", 1}, 26 | {10.75, "10 3/4", 1}, 27 | {10.667, "10 2/3", 1}, 28 | 29 | {-10.25, "-10 1/4", 1}, 30 | {-10.75, "-10 3/4", 1}, 31 | {-10.667, "-10 2/3", 1}, 32 | 33 | {3.14159, "3 1/7", 1}, 34 | {3.14159, "3 1/7", 2}, 35 | {3.14159, "3 16/113", 3}, 36 | {3.14159, "3 431/3044", 4}, 37 | {3.14159, "3 3432/24239", 5}, 38 | {3.14159, "3 14159/100000", 6}, 39 | 40 | {math.Pi, "3 1/7", 1}, 41 | {math.Pi, "3 1/7", 2}, 42 | {math.Pi, "3 16/113", 3}, // err = 2.6e-7 43 | {math.Pi, "3 16/113", 4}, // better because 431/3044 err = 2.6e-6 44 | {math.Pi, "3 14093/99532", 5}, 45 | {math.Pi, "3 14093/99532", 6}, 46 | 47 | {-math.Pi, "-3 1/7", 1}, 48 | {-math.Pi, "-3 1/7", 2}, 49 | {-math.Pi, "-3 16/113", 3}, // err = 2.6e-7 50 | {-math.Pi, "-3 16/113", 4}, // better because 431/3044 err = 2.6e-6 51 | {-math.Pi, "-3 14093/99532", 5}, 52 | {-math.Pi, "-3 14093/99532", 6}, 53 | 54 | // TODO: fixed denominator fractions (e.g. "??/8" ) 55 | // TODO: string interpolations (e.g. '0 "pounds and " ??/100 "pence"') 56 | // examples: https://bettersolutions.com/excel/formatting/number-tab-fractions.htm 57 | } 58 | 59 | func TestFractions(t *testing.T) { 60 | for _, c := range fracs { 61 | ff := fracFmtFunc(c.n) 62 | fs := ff(nil, c.v) 63 | if c.s != fs { 64 | t.Fatalf("fractions failed: got: '%s' expected: '%s' for %T(%v)", 65 | fs, c.s, c.v, c.v) 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /commonxl/numbers.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | // DecimalToWholeFraction converts a floating point value into a whole 8 | // number and fraction approximation with at most nn digits in the numerator 9 | // and nd digits in the denominator. 10 | func DecimalToWholeFraction(val float64, nn, nd int) (whole, num, den int) { 11 | wholeF, part := math.Modf(val) 12 | if part == 0.0 { 13 | return int(wholeF), 0, 1 14 | } 15 | if part < 0.0 { 16 | part = -part 17 | } 18 | whole = int(wholeF) 19 | num, den = DecimalToFraction(part, nn, nd) 20 | return 21 | } 22 | 23 | // DecimalToFraction converts a floating point value into a fraction 24 | // approximation with at most nn digits in the numerator and nd 25 | // digits in the denominator. 26 | func DecimalToFraction(val float64, nn, nd int) (num, den int) { 27 | // http://web.archive.org/web/20111027100847/http://homepage.smc.edu/kennedy_john/DEC2FRAC.PDF 28 | sign := 1 29 | z := val 30 | if val < 0 { 31 | sign = -1 32 | z = -val 33 | } 34 | if nn == 0 { 35 | nn = 2 36 | } 37 | if nd == 0 { 38 | nd = 2 39 | } 40 | maxn := math.Pow(10.0, float64(nn)) // numerator with nn digits 41 | maxd := math.Pow(10.0, float64(nd)) // denominator with nd digits 42 | 43 | _, fracPart := math.Modf(val) 44 | if fracPart == 0.0 { 45 | return int(z) * sign, 1 46 | } 47 | if fracPart < 1e-9 { 48 | return sign, int(1e9) 49 | } 50 | if fracPart > 1e9 { 51 | return int(1e9) * sign, 1 52 | } 53 | 54 | diff := 1.0 55 | denom := 1.0 56 | numer := 0.0 57 | var lastDenom, lastNumer float64 58 | for diff > 1e-10 && z != math.Floor(z) { 59 | z = 1 / (z - math.Floor(z)) 60 | tmp := denom 61 | denom = (denom * math.Floor(z)) + lastDenom 62 | lastDenom = tmp 63 | lastNumer = numer 64 | numer = math.Round(val * denom) 65 | if numer >= maxn || denom >= maxd { 66 | return sign * int(lastNumer), int(lastDenom) 67 | } 68 | diff = val - (numer / denom) 69 | if diff < 0.0 { 70 | diff = -diff 71 | } 72 | } 73 | return sign * int(numer), int(denom) 74 | } 75 | -------------------------------------------------------------------------------- /xlsx/simple_test.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | "os" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/pbnjay/grate/commonxl" 11 | ) 12 | 13 | var testFilePairs = [][]string{ 14 | {"../testdata/basic.xlsx", "../testdata/basic.tsv"}, 15 | 16 | // TODO: custom formatter support 17 | //{"../testdata/basic2.xlsx", "../testdata/basic2.tsv"}, 18 | 19 | // TODO: datetime and fraction formatter support 20 | //{"../testdata/multi_test.xlsx", "../testdata/multi_test.tsv"}, 21 | } 22 | 23 | func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) { 24 | f, err := os.Open(fn) 25 | if err != nil { 26 | return nil, err 27 | } 28 | xs := &commonxl.Sheet{ 29 | Formatter: ff, 30 | } 31 | 32 | row := 0 33 | s := bufio.NewScanner(f) 34 | for s.Scan() { 35 | record := strings.Split(s.Text(), "\t") 36 | for i, val := range record { 37 | xs.Put(row, i, val, 0) 38 | } 39 | row++ 40 | } 41 | return xs, f.Close() 42 | } 43 | 44 | func TestBasic(t *testing.T) { 45 | for _, fnames := range testFilePairs { 46 | var trueData *commonxl.Sheet 47 | log.Println("Testing ", fnames[0]) 48 | 49 | wb, err := Open(fnames[0]) 50 | if err != nil { 51 | t.Fatal(err) 52 | } 53 | 54 | sheets, err := wb.List() 55 | if err != nil { 56 | t.Fatal(err) 57 | } 58 | firstLoad := true 59 | for _, s := range sheets { 60 | sheet, err := wb.Get(s) 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | xsheet := sheet.(*commonxl.Sheet) 65 | if firstLoad { 66 | trueData, err = loadTestData(fnames[1], xsheet.Formatter) 67 | if err != nil { 68 | t.Fatal(err) 69 | } 70 | firstLoad = false 71 | } 72 | 73 | for xrow, xdata := range xsheet.Rows { 74 | for xcol, xval := range xdata { 75 | //t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol]) 76 | if !trueData.Rows[xrow][xcol].Equal(xval) { 77 | t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol, 78 | xval, trueData.Rows[xrow][xcol]) 79 | t.Fail() 80 | } 81 | } 82 | } 83 | } 84 | 85 | err = wb.Close() 86 | if err != nil { 87 | t.Fatal(err) 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /xls/simple_test.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "bufio" 5 | "log" 6 | "os" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/pbnjay/grate/commonxl" 11 | ) 12 | 13 | var testFilePairs = [][]string{ 14 | {"../testdata/basic.xls", "../testdata/basic.tsv"}, 15 | {"../testdata/testing.xls", "../testdata/testing.tsv"}, 16 | 17 | // TODO: custom formatter support 18 | //{"../testdata/basic2.xls", "../testdata/basic2.tsv"}, 19 | 20 | // TODO: datetime and fraction formatter support 21 | //{"../testdata/multi_test.xls", "../testdata/multi_test.tsv"}, 22 | } 23 | 24 | func loadTestData(fn string, ff *commonxl.Formatter) (*commonxl.Sheet, error) { 25 | f, err := os.Open(fn) 26 | if err != nil { 27 | return nil, err 28 | } 29 | xs := &commonxl.Sheet{ 30 | Formatter: ff, 31 | } 32 | 33 | row := 0 34 | s := bufio.NewScanner(f) 35 | for s.Scan() { 36 | record := strings.Split(s.Text(), "\t") 37 | for i, val := range record { 38 | xs.Put(row, i, val, 0) 39 | } 40 | row++ 41 | } 42 | return xs, f.Close() 43 | } 44 | 45 | func TestBasic(t *testing.T) { 46 | for _, fnames := range testFilePairs { 47 | var trueData *commonxl.Sheet 48 | log.Println("Testing ", fnames[0]) 49 | 50 | wb, err := Open(fnames[0]) 51 | if err != nil { 52 | t.Fatal(err) 53 | } 54 | 55 | sheets, err := wb.List() 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | firstLoad := true 60 | for _, s := range sheets { 61 | sheet, err := wb.Get(s) 62 | if err != nil { 63 | t.Fatal(err) 64 | } 65 | xsheet := sheet.(*commonxl.Sheet) 66 | if firstLoad { 67 | trueData, err = loadTestData(fnames[1], xsheet.Formatter) 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | firstLoad = false 72 | } 73 | 74 | for xrow, xdata := range xsheet.Rows { 75 | for xcol, xval := range xdata { 76 | //t.Logf("at %s (%d,%d) expect '%v'", fnames[0], xrow, xcol, trueData.Rows[xrow][xcol]) 77 | if !trueData.Rows[xrow][xcol].Equal(xval) { 78 | t.Logf("mismatch at %s (%d,%d): '%v' <> '%v' expected", fnames[0], xrow, xcol, 79 | xval, trueData.Rows[xrow][xcol]) 80 | t.Fail() 81 | } 82 | } 83 | } 84 | } 85 | 86 | err = wb.Close() 87 | if err != nil { 88 | t.Fatal(err) 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /commonxl/dates.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | ) 7 | 8 | // ConvertToDate converts a floating-point value using the 9 | // Excel date serialization conventions. 10 | func (x *Formatter) ConvertToDate(val float64) time.Time { 11 | // http://web.archive.org/web/20190808062235/http://aa.usno.navy.mil/faq/docs/JD_Formula.php 12 | v := int(val) 13 | if v < 61 { 14 | jdate := val + 0.5 15 | if (x.flags & fMode1904) != 0 { 16 | jdate += 2416480.5 17 | } else { 18 | jdate += 2415018.5 19 | } 20 | JD := int(jdate) 21 | frac := jdate - float64(JD) 22 | 23 | L := JD + 68569 24 | N := 4 * L / 146097 25 | L = L - (146097*N+3)/4 26 | I := 4000 * (L + 1) / 1461001 27 | L = L - 1461*I/4 + 31 28 | J := 80 * L / 2447 29 | day := L - 2447*J/80 30 | L = J / 11 31 | month := time.Month(J + 2 - 12*L) 32 | year := 100*(N-49) + I + L 33 | 34 | t := time.Duration(float64(time.Hour*24) * frac) 35 | return time.Date(year, month, day, 0, 0, 0, 0, time.UTC).Add(t) 36 | } 37 | frac := val - float64(v) 38 | date := time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC) 39 | if (x.flags & fMode1904) == 0 { 40 | date = time.Date(1899, 12, 30, 0, 0, 0, 0, time.UTC) 41 | } 42 | 43 | t := time.Duration(float64(time.Hour*24) * frac) 44 | return date.AddDate(0, 0, v).Add(t) 45 | } 46 | 47 | func timeFmtFunc(f string) FmtFunc { 48 | return func(x *Formatter, v interface{}) string { 49 | t, ok := v.(time.Time) 50 | if !ok { 51 | fval, ok := convertToFloat64(v) 52 | if !ok { 53 | return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY" 54 | } 55 | t = x.ConvertToDate(fval) 56 | } 57 | //log.Println("formatting date", t, "with", f, "=", t.Format(f)) 58 | return t.Format(f) 59 | } 60 | } 61 | 62 | // same as above but replaces "AM" and "PM" with chinese translations. 63 | // TODO: implement others 64 | func cnTimeFmtFunc(f string) FmtFunc { 65 | return func(x *Formatter, v interface{}) string { 66 | t, ok := v.(time.Time) 67 | if !ok { 68 | fval, ok := convertToFloat64(v) 69 | if !ok { 70 | return "MUST BE time.Time OR numeric TO FORMAT CORRECTLY" 71 | } 72 | t = x.ConvertToDate(fval) 73 | } 74 | s := t.Format(f) 75 | s = strings.Replace(s, `AM`, `上午`, 1) 76 | return strings.Replace(s, `PM`, `下午`, 1) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /xlsx/types.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "encoding/xml" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | type CellType string 10 | 11 | // CellTypes define data type in section 18.18.11 12 | const ( 13 | BlankCellType CellType = "" 14 | BooleanCellType CellType = "b" 15 | DateCellType CellType = "d" 16 | ErrorCellType CellType = "e" 17 | NumberCellType CellType = "n" 18 | SharedStringCellType CellType = "s" 19 | FormulaStringCellType CellType = "str" 20 | InlineStringCellType CellType = "inlineStr" 21 | ) 22 | 23 | type staticCellType rune 24 | 25 | const ( 26 | staticBlank staticCellType = 0 27 | 28 | // marks a continuation column within a merged cell. 29 | continueColumnMerged staticCellType = '→' 30 | // marks the last column of a merged cell. 31 | endColumnMerged staticCellType = '⇥' 32 | 33 | // marks a continuation row within a merged cell. 34 | continueRowMerged staticCellType = '↓' 35 | // marks the last row of a merged cell. 36 | endRowMerged staticCellType = '⤓' 37 | ) 38 | 39 | func (s staticCellType) String() string { 40 | if s == 0 { 41 | return "" 42 | } 43 | return string([]rune{rune(s)}) 44 | } 45 | 46 | // returns the 0-based index of the column string: 47 | // "A"=0, "B"=1, "AA"=26, "BB"=53 48 | func col2int(col string) int { 49 | idx := 0 50 | for _, c := range col { 51 | idx *= 26 52 | idx += int(c - '@') 53 | } 54 | return idx - 1 55 | } 56 | 57 | func refToIndexes(r string) (column, row int) { 58 | if len(r) < 2 { 59 | return -1, -1 60 | } 61 | i1 := strings.IndexAny(r, "0123456789") 62 | if i1 <= 0 { 63 | return -1, -1 64 | } 65 | 66 | // A1 Reference mode 67 | col1 := r[:i1] 68 | i2 := strings.IndexByte(r[i1:], 'C') 69 | if i2 == -1 { 70 | rn, _ := strconv.ParseInt(r[i1:], 10, 64) 71 | return col2int(col1), int(rn) - 1 72 | } 73 | 74 | // R1C1 Reference Mode 75 | col1 = r[i1:i2] 76 | row1 := r[i2+1:] 77 | cn, _ := strconv.ParseInt(col1, 10, 64) 78 | rn, _ := strconv.ParseInt(row1, 10, 64) 79 | return int(cn), int(rn) - 1 80 | } 81 | 82 | func getAttrs(attrs []xml.Attr, keys ...string) []string { 83 | res := make([]string, len(keys)) 84 | for _, a := range attrs { 85 | for i, k := range keys { 86 | if a.Name.Local == k { 87 | res[i] = a.Value 88 | } 89 | } 90 | } 91 | return res 92 | } 93 | -------------------------------------------------------------------------------- /xls/cfb/slicereader.go: -------------------------------------------------------------------------------- 1 | package cfb 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | ) 7 | 8 | // SliceReader wraps a list of slices as a io.ReadSeeker that 9 | // can transparently merge them into a single coherent stream. 10 | type SliceReader struct { 11 | CSize []int64 12 | Data [][]byte 13 | Index uint 14 | Offset uint 15 | } 16 | 17 | // Read implements the io.Reader interface. 18 | func (s *SliceReader) Read(b []byte) (int, error) { 19 | if s.Index >= uint(len(s.Data)) { 20 | return 0, io.EOF 21 | } 22 | n := copy(b, s.Data[s.Index][s.Offset:]) 23 | if n > 0 { 24 | s.Offset += uint(n) 25 | if s.Offset == uint(len(s.Data[s.Index])) { 26 | s.Offset = 0 27 | s.Index++ 28 | } 29 | return n, nil 30 | } 31 | 32 | return 0, io.EOF 33 | } 34 | 35 | var x io.Seeker 36 | 37 | // Seek implements the io.Seeker interface. 38 | func (s *SliceReader) Seek(offset int64, whence int) (int64, error) { 39 | if len(s.CSize) != len(s.Data) { 40 | // calculate the cumulative block size cache 41 | s.CSize = make([]int64, len(s.Data)) 42 | sz := int64(0) 43 | for i, d := range s.Data { 44 | s.CSize[i] = sz 45 | sz += int64(len(d)) 46 | } 47 | } 48 | if s.Index >= uint(len(s.CSize)) { 49 | s.Index = uint(len(s.CSize) - 1) 50 | s.Offset = uint(len(s.Data[s.Index])) 51 | } 52 | // current offset in stream 53 | trueOffset := int64(s.Offset) + s.CSize[int(s.Index)] 54 | if offset == 0 && whence == io.SeekCurrent { 55 | // just asking for current position 56 | return trueOffset, nil 57 | } 58 | 59 | switch whence { 60 | case io.SeekStart: 61 | if offset < 0 { 62 | return -1, errors.New("xls: invalid seek offset") 63 | } 64 | s.Index = 0 65 | s.Offset = 0 66 | trueOffset = 0 67 | 68 | case io.SeekEnd: 69 | if offset > 0 { 70 | return -1, errors.New("xls: invalid seek offset") 71 | } 72 | 73 | s.Index = uint(len(s.Data) - 1) 74 | s.Offset = uint(len(s.Data[s.Index])) 75 | trueOffset = int64(s.Offset) + s.CSize[s.Index] 76 | 77 | default: 78 | // current position already defined 79 | } 80 | 81 | wantOffset := offset + trueOffset 82 | for trueOffset != wantOffset { 83 | loOffset := s.CSize[int(s.Index)] 84 | hiOffset := s.CSize[int(s.Index)] + int64(len(s.Data[s.Index])) 85 | if wantOffset > loOffset && wantOffset < hiOffset { 86 | s.Offset = uint(wantOffset - loOffset) 87 | return wantOffset, nil 88 | } 89 | 90 | if trueOffset > wantOffset { 91 | s.Index-- 92 | s.Offset = 0 93 | trueOffset = s.CSize[int(s.Index)] 94 | } else if trueOffset < wantOffset { 95 | s.Index++ 96 | s.Offset = 0 97 | trueOffset = s.CSize[int(s.Index)] 98 | } 99 | } 100 | return wantOffset, nil 101 | } 102 | -------------------------------------------------------------------------------- /xls/cfb/simple_test.go: -------------------------------------------------------------------------------- 1 | package cfb 2 | 3 | import ( 4 | "io" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "testing" 9 | ) 10 | 11 | func TestHeader(t *testing.T) { 12 | d := &Document{} 13 | f, _ := os.Open("../../testdata/test.xls") 14 | err := d.load(f) 15 | if err != nil { 16 | t.Fatal(err) 17 | } 18 | } 19 | 20 | func TestHeader2(t *testing.T) { 21 | d := &Document{} 22 | f, _ := os.Open("../../testdata/test2.xls") 23 | err := d.load(f) 24 | if err != nil { 25 | t.Fatal(err) 26 | } 27 | } 28 | 29 | func TestHeader3(t *testing.T) { 30 | d := &Document{} 31 | f, _ := os.Open("../../testdata/test3.xls") 32 | err := d.load(f) 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | } 37 | 38 | func TestHeader4(t *testing.T) { 39 | d := &Document{} 40 | f, _ := os.Open("../../testdata/test4.xls") 41 | err := d.load(f) 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | log.Println(d.List()) 47 | 48 | r, err := d.Open("Workbook") 49 | if err != nil { 50 | t.Fatal(err) 51 | } 52 | book, err := ioutil.ReadAll(r) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | log.Println(len(book)) 57 | 58 | r, err = d.Open("\x05DocumentSummaryInformation") 59 | if err != nil { 60 | t.Fatal(err) 61 | } 62 | data, err := ioutil.ReadAll(r) 63 | if err != nil { 64 | t.Fatal(err) 65 | } 66 | log.Println(len(data)) 67 | } 68 | 69 | var testSlices = [][]byte{ 70 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 71 | {10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, 72 | {20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, 73 | {30, 31, 32, 33, 34, 35, 36, 37, 38, 39}, 74 | {40, 41, 42, 43, 44, 45, 46, 47, 48, 49}, 75 | } 76 | 77 | func TestSliceReader(t *testing.T) { 78 | sr := &SliceReader{ 79 | Data: testSlices, 80 | } 81 | var uno, old [1]byte 82 | _, err := sr.Read(uno[:]) 83 | for err == nil { 84 | old[0] = uno[0] 85 | _, err = sr.Read(uno[:]) 86 | if err == nil && uno[0] != (old[0]+1) { 87 | log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) 88 | t.Fail() 89 | } 90 | } 91 | sr.Seek(0, io.SeekStart) 92 | _, err = sr.Read(uno[:]) 93 | for err == nil { 94 | old[0] = uno[0] 95 | _, err = sr.Read(uno[:]) 96 | if err == nil && uno[0] != (old[0]+1) { 97 | log.Printf("read data out of order new=%d, old=%d", old[0], uno[0]) 98 | t.Fail() 99 | } 100 | } 101 | sr.Seek(10, io.SeekStart) 102 | _, err = sr.Read(uno[:]) 103 | if uno[0] != 10 { 104 | log.Printf("unexpected element %d (expected %d)", uno[0], 10) 105 | t.Fail() 106 | } 107 | sr.Seek(35, io.SeekStart) 108 | _, err = sr.Read(uno[:]) 109 | if uno[0] != 35 { 110 | log.Printf("unexpected element %d (expected %d)", uno[0], 35) 111 | t.Fail() 112 | } 113 | sr.Seek(7, io.SeekCurrent) 114 | _, err = sr.Read(uno[:]) 115 | if uno[0] != 43 { 116 | log.Printf("unexpected element %d (expected %d)", uno[0], 43) 117 | t.Fail() 118 | } 119 | sr.Seek(-9, io.SeekCurrent) 120 | _, err = sr.Read(uno[:]) 121 | if uno[0] != 35 { 122 | log.Printf("unexpected element %d (expected %d)", uno[0], 35) 123 | t.Fail() 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /xls/structs.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | ) 7 | 8 | type header struct { 9 | Version uint16 // An unsigned integer that specifies the BIFF version of the file. The value MUST be 0x0600. 10 | DocType uint16 //An unsigned integer that specifies the document type of the substream of records following this record. For more information about the layout of the sub-streams in the workbook stream see File Structure. 11 | RupBuild uint16 // An unsigned integer that specifies the build identifier. 12 | RupYear uint16 // An unsigned integer that specifies the year when this BIFF version was first created. The value MUST be 0x07CC or 0x07CD. 13 | MiscBits uint64 // lots of miscellaneous bits and flags we're not going to check 14 | } 15 | 16 | // 2.1.4 17 | type rec struct { 18 | RecType recordType // 19 | RecSize uint16 // must be between 0 and 8224 20 | Data []byte // len(rec.data) = rec.recsize 21 | } 22 | 23 | type boundSheet struct { 24 | Position uint32 // A FilePointer as specified in [MS-OSHARED] section 2.2.1.5 that specifies the stream position of the start of the BOF record for the sheet. 25 | HiddenState byte // (2 bits) An unsigned integer that specifies the hidden state of the sheet. MUST be a value from the following table: 26 | SheetType byte // An unsigned integer that specifies the sheet type. 00=worksheet 27 | Name string 28 | } 29 | 30 | /////// 31 | type shRow struct { 32 | RowIndex uint16 // 0-based 33 | FirstCol uint16 // 0-based 34 | LastCol uint16 // 1-based! 35 | Height uint16 36 | Reserved uint32 37 | Flags uint32 38 | } 39 | 40 | type shRef8 struct { 41 | FirstRow uint16 // 0-based 42 | LastRow uint16 // 0-based 43 | FirstCol uint16 // 0-based 44 | LastCol uint16 // 0-based 45 | } 46 | type shMulRK struct { 47 | RowIndex uint16 // 0-based 48 | FirstCol uint16 // 0-based 49 | Values []RkRec 50 | LastCol uint16 // 0-based? 51 | } 52 | type RkRec struct { 53 | IXFCell uint16 54 | Value RKNumber 55 | } 56 | 57 | type shRK struct { 58 | RowIndex uint16 // 0-based 59 | Col uint16 // 0-based 60 | IXFCell uint16 61 | Value RKNumber 62 | } 63 | 64 | type RKNumber uint32 65 | 66 | func (r RKNumber) IsInteger() bool { 67 | if (r & 1) != 0 { 68 | // has 2 decimals 69 | return false 70 | } 71 | if (r & 2) == 0 { 72 | // is part of a float 73 | return false 74 | } 75 | return true 76 | } 77 | 78 | func (r RKNumber) Int() int { 79 | val := int32(r) >> 2 80 | if (r&1) == 0 && (r&2) != 0 { 81 | return int(val) 82 | } 83 | if (r&1) != 0 && (r&2) != 0 { 84 | return int(val / 100) 85 | } 86 | return 0 87 | } 88 | 89 | func (r RKNumber) Float64() float64 { 90 | val := int32(r) >> 2 91 | v2 := math.Float64frombits(uint64(val) << 34) 92 | 93 | if (r&1) == 0 && (r&2) == 0 { 94 | return v2 95 | } 96 | if (r&1) != 0 && (r&2) == 0 { 97 | return v2 / 100.0 98 | } 99 | return 0.0 100 | } 101 | 102 | func (r RKNumber) String() string { 103 | if r.IsInteger() { 104 | return fmt.Sprint(r.Int()) 105 | } 106 | return fmt.Sprint(r.Float64()) 107 | } 108 | -------------------------------------------------------------------------------- /simple/simple.go: -------------------------------------------------------------------------------- 1 | package simple 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "path/filepath" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | "github.com/pbnjay/grate" 12 | ) 13 | 14 | // represents a set of data collections. 15 | type simpleFile struct { 16 | filename string 17 | rows [][]string 18 | iterRow int 19 | } 20 | 21 | // List the individual data tables within this source. 22 | func (t *simpleFile) List() ([]string, error) { 23 | return []string{filepath.Base(t.filename)}, nil 24 | } 25 | 26 | func (t *simpleFile) Close() error { 27 | return nil 28 | } 29 | 30 | // Get a Collection from the source by name. 31 | func (t *simpleFile) Get(name string) (grate.Collection, error) { 32 | return t, nil 33 | } 34 | 35 | // Next advances to the next record of content. 36 | // It MUST be called prior to any Scan(). 37 | func (t *simpleFile) Next() bool { 38 | t.iterRow++ 39 | return t.iterRow < len(t.rows) 40 | } 41 | 42 | // Strings extracts values from the current record into a list of strings. 43 | func (t *simpleFile) Strings() []string { 44 | return t.rows[t.iterRow] 45 | } 46 | 47 | // Formats extracts the format code for the current record into a list. 48 | func (t *simpleFile) Formats() []string { 49 | res := make([]string, len(t.rows[t.iterRow])) 50 | for i := range res { 51 | res[i] = "General" 52 | } 53 | return res 54 | } 55 | 56 | // Types extracts the data types from the current record into a list. 57 | // options: "boolean", "integer", "float", "string", "date", 58 | // and special cases: "blank", "hyperlink" which are string types 59 | func (t *simpleFile) Types() []string { 60 | res := make([]string, len(t.rows[t.iterRow])) 61 | for i, v := range t.rows[t.iterRow] { 62 | if v == "" { 63 | res[i] = "blank" 64 | } else { 65 | res[i] = "string" 66 | } 67 | } 68 | return res 69 | } 70 | 71 | // Scan extracts values from the current record into the provided arguments 72 | // Arguments must be pointers to one of 5 supported types: 73 | // bool, int, float64, string, or time.Time 74 | func (t *simpleFile) Scan(args ...interface{}) error { 75 | var err error 76 | row := t.rows[t.iterRow] 77 | if len(row) != len(args) { 78 | return fmt.Errorf("grate/simple: expected %d Scan destinations, got %d", len(row), len(args)) 79 | } 80 | 81 | for i, a := range args { 82 | switch v := a.(type) { 83 | case *bool: 84 | switch strings.ToLower(row[i]) { 85 | case "1", "t", "true", "y", "yes": 86 | *v = true 87 | default: 88 | *v = false 89 | } 90 | case *int: 91 | var n int64 92 | n, err = strconv.ParseInt(row[i], 10, 64) 93 | *v = int(n) 94 | case *float64: 95 | *v, err = strconv.ParseFloat(row[i], 64) 96 | case *string: 97 | *v = row[i] 98 | case *time.Time: 99 | return errors.New("grate/simple: time.Time not supported, you must parse date strings manually") 100 | default: 101 | return grate.ErrInvalidScanType 102 | } 103 | if err != nil { 104 | return err 105 | } 106 | } 107 | return nil 108 | } 109 | 110 | // IsEmpty returns true if there are no data values. 111 | func (t *simpleFile) IsEmpty() bool { 112 | return len(t.rows) == 0 113 | } 114 | 115 | // Err returns the last error that occured. 116 | func (t *simpleFile) Err() error { 117 | return nil 118 | } 119 | -------------------------------------------------------------------------------- /grate.go: -------------------------------------------------------------------------------- 1 | // Package grate opens tabular data files (such as spreadsheets and delimited plaintext files) 2 | // and allows programmatic access to the data contents in a consistent interface. 3 | package grate 4 | 5 | import ( 6 | "errors" 7 | "log" 8 | "sort" 9 | ) 10 | 11 | // Source represents a set of data collections. 12 | type Source interface { 13 | // List the individual data tables within this source. 14 | List() ([]string, error) 15 | 16 | // Get a Collection from the source by name. 17 | Get(name string) (Collection, error) 18 | 19 | // Close the source and discard memory. 20 | Close() error 21 | } 22 | 23 | // Collection represents an iterable collection of records. 24 | type Collection interface { 25 | // Next advances to the next record of content. 26 | // It MUST be called prior to any Scan(). 27 | Next() bool 28 | 29 | // Strings extracts values from the current record into a list of strings. 30 | Strings() []string 31 | 32 | // Types extracts the data types from the current record into a list. 33 | // options: "boolean", "integer", "float", "string", "date", 34 | // and special cases: "blank", "hyperlink" which are string types 35 | Types() []string 36 | 37 | // Formats extracts the format codes for the current record into a list. 38 | Formats() []string 39 | 40 | // Scan extracts values from the current record into the provided arguments 41 | // Arguments must be pointers to one of 5 supported types: 42 | // bool, int64, float64, string, or time.Time 43 | // If invalid, returns ErrInvalidScanType 44 | Scan(args ...interface{}) error 45 | 46 | // IsEmpty returns true if there are no data values. 47 | IsEmpty() bool 48 | 49 | // Err returns the last error that occured. 50 | Err() error 51 | } 52 | 53 | // OpenFunc defines a Source's instantiation function. 54 | // It should return ErrNotInFormat immediately if filename is not of the correct file type. 55 | type OpenFunc func(filename string) (Source, error) 56 | 57 | // Open a tabular data file and return a Source for accessing it's contents. 58 | func Open(filename string) (Source, error) { 59 | for _, o := range srcTable { 60 | src, err := o.op(filename) 61 | if err == nil { 62 | return src, nil 63 | } 64 | if !errors.Is(err, ErrNotInFormat) { 65 | return nil, err 66 | } 67 | if Debug { 68 | log.Println(" ", filename, "is not in", o.name, "format") 69 | } 70 | } 71 | return nil, ErrUnknownFormat 72 | } 73 | 74 | type srcOpenTab struct { 75 | name string 76 | pri int 77 | op OpenFunc 78 | } 79 | 80 | var srcTable = make([]*srcOpenTab, 0, 20) 81 | 82 | // Register the named source as a grate datasource implementation. 83 | func Register(name string, priority int, opener OpenFunc) error { 84 | if Debug { 85 | log.Println("Registering the", name, "format at priority", priority) 86 | } 87 | srcTable = append(srcTable, &srcOpenTab{name: name, pri: priority, op: opener}) 88 | sort.Slice(srcTable, func(i, j int) bool { 89 | return srcTable[i].pri < srcTable[j].pri 90 | }) 91 | return nil 92 | } 93 | 94 | const ( 95 | // ContinueColumnMerged marks a continuation column within a merged cell. 96 | ContinueColumnMerged = "→" 97 | // EndColumnMerged marks the last column of a merged cell. 98 | EndColumnMerged = "⇥" 99 | 100 | // ContinueRowMerged marks a continuation row within a merged cell. 101 | ContinueRowMerged = "↓" 102 | // EndRowMerged marks the last row of a merged cell. 103 | EndRowMerged = "⤓" 104 | ) 105 | -------------------------------------------------------------------------------- /xls/crypto/rc4.go: -------------------------------------------------------------------------------- 1 | package crypto 2 | 3 | import ( 4 | "bytes" 5 | "crypto/md5" 6 | "crypto/rc4" 7 | "encoding/binary" 8 | "fmt" 9 | ) 10 | 11 | var _ Decryptor = &rc4Writer{} 12 | 13 | func (d *rc4Writer) Write(data []byte) (n int, err error) { 14 | x := len(data) 15 | for len(data) > 0 { 16 | n := copy(d.bytes[d.offset:], data) 17 | d.offset += n 18 | if d.offset >= 1024 { 19 | if d.offset != 1024 { 20 | panic("invalid offset from write") 21 | } 22 | d.Flush() 23 | } 24 | data = data[n:] 25 | } 26 | return x, nil 27 | } 28 | 29 | func (d *rc4Writer) Read(data []byte) (n int, err error) { 30 | return d.buf.Read(data) 31 | } 32 | 33 | // Reset to block 0, and clear all written and readable data. 34 | func (d *rc4Writer) Reset() { 35 | d.block = 0 36 | d.offset = 0 37 | d.buf.Reset() 38 | } 39 | 40 | // Flush tells the decryptor to decrypt the latest block. 41 | func (d *rc4Writer) Flush() { 42 | var zeros [1024]byte 43 | 44 | endpad := 0 45 | if d.offset < 1024 { 46 | endpad = copy(d.bytes[d.offset:], zeros[:]) 47 | d.offset += endpad 48 | } 49 | if d.offset != 1024 { 50 | panic("invalid offset fill") 51 | } 52 | 53 | // decrypt and write results to output buffer 54 | d.startBlock() 55 | d.dec.XORKeyStream(d.bytes[:], d.bytes[:]) 56 | d.buf.Write(d.bytes[:1024-endpad]) 57 | 58 | d.offset = 0 59 | d.block++ 60 | } 61 | 62 | // SetPassword for the decryption. 63 | func (d *rc4Writer) SetPassword(password []byte) { 64 | d.Password = make([]rune, len(password)) 65 | for i, p := range password { 66 | d.Password[i] = rune(p) 67 | } 68 | 69 | /// compute the first part of the encryption key 70 | result := generateStd97Key(d.Password, d.Salt) 71 | d.encKey = make([]byte, len(result)) 72 | copy(d.encKey, result) 73 | } 74 | 75 | type rc4Writer struct { 76 | block uint32 77 | offset int 78 | bytes [1024]byte 79 | 80 | // records the decrypted data 81 | buf bytes.Buffer 82 | 83 | /////// 84 | 85 | // decrypter for RC4 content streams 86 | dec *rc4.Cipher 87 | 88 | cipherKey []byte // H1 per 2.3.6.2 89 | encKey []byte // Hfinal per 2.3.6.2 90 | 91 | Salt []byte 92 | Password []rune 93 | } 94 | 95 | func (d *rc4Writer) Bytes() []byte { 96 | return d.buf.Bytes() 97 | } 98 | 99 | func (d *rc4Writer) Verify(everifier, everifierHash []byte) error { 100 | d.Reset() 101 | d.startBlock() 102 | 103 | var temp1 [16]byte 104 | var temp2 [16]byte 105 | d.dec.XORKeyStream(temp1[:], everifier) 106 | d.dec.XORKeyStream(temp2[:], everifierHash) 107 | 108 | newhash := md5.Sum(temp1[:]) 109 | for i, c := range newhash { 110 | if temp2[i] != c { 111 | return fmt.Errorf("verification failed") 112 | } 113 | } 114 | return nil 115 | } 116 | 117 | ///////////////////// 118 | 119 | func (d *rc4Writer) startBlock() { 120 | if d.encKey == nil { 121 | d.SetPassword([]byte(DefaultXLSPassword)) 122 | } 123 | 124 | d.cipherKey = make([]byte, 16) 125 | copy(d.cipherKey, d.encKey[:5]) 126 | binary.LittleEndian.PutUint32(d.cipherKey[5:], d.block) 127 | mhash := md5.Sum(d.cipherKey[:9]) 128 | d.dec, _ = rc4.NewCipher(mhash[:]) 129 | } 130 | 131 | func generateStd97Key(passData []rune, salt []byte) []byte { 132 | if len(passData) == 0 || len(salt) != 16 { 133 | panic("invalid keygen material") 134 | } 135 | 136 | passBytes := make([]byte, len(passData)*2) 137 | 138 | for i, c := range passData { 139 | binary.LittleEndian.PutUint16(passBytes[2*i:], uint16(c)) 140 | } 141 | 142 | // digest the IV then copy back into pKeyData 143 | h0 := md5.Sum(passBytes) 144 | 145 | // now do the final set of keygen ops 146 | msum := md5.New() 147 | for i := 0; i < 16; i++ { 148 | msum.Write(h0[:5]) 149 | msum.Write(salt) 150 | } 151 | // return H1 152 | temp := make([]byte, 0, 16) 153 | temp = msum.Sum(temp) 154 | return temp 155 | } 156 | -------------------------------------------------------------------------------- /commonxl/fmt_test.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "log" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | type testcaseNums struct { 10 | v interface{} 11 | s string 12 | } 13 | 14 | var commas = []testcaseNums{ 15 | {10, "10"}, 16 | {float64(10), "10"}, 17 | {float64(10) + 0.12345, "10.12345"}, 18 | {-10, "-10"}, 19 | {float64(-10), "-10"}, 20 | {float64(-10) + 0.12345, "-9.87655"}, 21 | {uint16(10), "10"}, 22 | {100, "100"}, 23 | {float64(100), "100"}, 24 | {float64(100) + 0.12345, "100.12345"}, 25 | {-100, "-100"}, 26 | {float64(-100), "-100"}, 27 | {float64(-100) + 0.12345, "-99.87655"}, 28 | {uint16(100), "100"}, 29 | {1000, "1,000"}, 30 | {float64(1000), "1,000"}, 31 | {float64(1000) + 0.12345, "1,000.12345"}, 32 | {-1000, "-1,000"}, 33 | {float64(-1000), "-1,000"}, 34 | {float64(-1000) + 0.12345, "-999.87655"}, 35 | {uint16(1000), "1,000"}, 36 | {10000, "10,000"}, 37 | {float64(10000), "10,000"}, 38 | {float64(10000) + 0.12345, "10,000.12345"}, 39 | {-10000, "-10,000"}, 40 | {float64(-10000), "-10,000"}, 41 | {float64(-10000) + 0.12345, "-9,999.87655"}, 42 | {uint16(10000), "10,000"}, 43 | {100000, "100,000"}, 44 | {float64(100000), "100,000"}, 45 | {float64(100000) + 0.12345, "100,000.12345"}, 46 | {-100000, "-100,000"}, 47 | {float64(-100000), "-100,000"}, 48 | {float64(-100000) + 0.12345, "-99,999.87655"}, 49 | {uint64(100000), "100,000"}, 50 | {1000000, "1,000,000"}, 51 | {float64(1000000), "1e+06"}, 52 | {float64(1000000) + 0.12345, "1.00000012345e+06"}, 53 | {-1000000, "-1,000,000"}, 54 | {float64(-1000000), "-1e+06"}, 55 | {float64(-1000000) + 0.12345, "-999,999.87655"}, 56 | {uint64(1000000), "1,000,000"}, 57 | {10000000, "10,000,000"}, 58 | {float64(10000000), "1e+07"}, 59 | {float64(10000000) + 0.12345, "1.000000012345e+07"}, 60 | {-10000000, "-10,000,000"}, 61 | {float64(-10000000), "-1e+07"}, 62 | {float64(-10000000) + 0.12345, "-9.99999987655e+06"}, 63 | {uint64(10000000), "10,000,000"}, 64 | {100000000, "100,000,000"}, 65 | {float64(100000000), "1e+08"}, 66 | {float64(100000000) + 0.12345, "1.0000000012345e+08"}, 67 | {-100000000, "-100,000,000"}, 68 | {float64(-100000000), "-1e+08"}, 69 | {float64(-100000000) + 0.12345, "-9.999999987655e+07"}, 70 | {uint64(100000000), "100,000,000"}, 71 | } 72 | 73 | func TestCommas(t *testing.T) { 74 | cf := addCommas(identFunc) 75 | for _, c := range commas { 76 | fs := cf(nil, c.v) 77 | if c.s != fs { 78 | t.Fatalf("commas failed: get '%s' but expected '%s' for %T(%v)", 79 | fs, c.s, c.v, c.v) 80 | } 81 | } 82 | } 83 | 84 | func TestDateFormats(t *testing.T) { 85 | var testDates = []time.Time{ 86 | time.Date(1901, 7, 11, 1, 5, 0, 0, time.UTC), 87 | time.Date(1905, 7, 11, 4, 10, 0, 0, time.UTC), 88 | time.Date(1904, 7, 11, 8, 15, 0, 0, time.UTC), 89 | time.Date(1993, 7, 11, 12, 20, 0, 0, time.UTC), 90 | time.Date(1983, 7, 11, 16, 30, 0, 0, time.UTC), 91 | time.Date(1983, 7, 11, 20, 45, 0, 0, time.UTC), 92 | time.Date(2000, 12, 31, 23, 59, 0, 0, time.UTC), 93 | time.Date(2002, 12, 31, 23, 59, 0, 0, time.UTC), 94 | time.Date(2012, 3, 10, 9, 30, 0, 0, time.UTC), 95 | time.Date(2014, 3, 27, 9, 37, 0, 0, time.UTC), 96 | } 97 | 98 | fx := &Formatter{} 99 | for _, t := range testDates { 100 | for fid, ctype := range builtInFormatTypes { 101 | if ctype != DateCell { 102 | continue 103 | } 104 | ff, _ := goFormatters[fid] 105 | // mainly testing these don't crash... 106 | log.Println(ff(fx, t)) 107 | } 108 | } 109 | } 110 | func TestBoolFormats(t *testing.T) { 111 | ff, _ := makeFormatter(`"yes";"yes";"no"`) 112 | 113 | if "no" != ff(nil, false) { 114 | t.Fatal(`false should be "no"`) 115 | } 116 | if "no" != ff(nil, 0) { 117 | t.Fatal(`0 should be "no"`) 118 | } 119 | if "no" != ff(nil, 0.0) { 120 | t.Fatal(`0.0 should be "no"`) 121 | } 122 | 123 | ///// 124 | 125 | if "yes" != ff(nil, true) { 126 | t.Fatal(`true should be "yes"`) 127 | } 128 | if "yes" != ff(nil, 99) { 129 | t.Fatal(`99 should be "yes"`) 130 | } 131 | if "yes" != ff(nil, -4) { 132 | t.Fatal(`-4 should be "yes"`) 133 | } 134 | 135 | if "yes" != ff(nil, 4.0) { 136 | t.Fatal(`4.0 should be "yes"`) 137 | } 138 | if "yes" != ff(nil, -99.0) { 139 | t.Fatal(`-99.0 should be "yes"`) 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /xlsx/xlsx.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "archive/zip" 5 | "encoding/xml" 6 | "errors" 7 | "io" 8 | "log" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | 13 | "github.com/pbnjay/grate" 14 | "github.com/pbnjay/grate/commonxl" 15 | ) 16 | 17 | var _ = grate.Register("xlsx", 5, Open) 18 | 19 | // Document contains an Office Open XML document. 20 | type Document struct { 21 | filename string 22 | f *os.File 23 | r *zip.Reader 24 | primaryDoc string 25 | 26 | // type => id => filename 27 | rels map[string]map[string]string 28 | sheets []*Sheet 29 | strings []string 30 | xfs []uint16 31 | fmt commonxl.Formatter 32 | } 33 | 34 | func (d *Document) Close() error { 35 | d.xfs = d.xfs[:0] 36 | d.xfs = nil 37 | d.strings = d.strings[:0] 38 | d.strings = nil 39 | d.sheets = d.sheets[:0] 40 | d.sheets = nil 41 | return d.f.Close() 42 | } 43 | 44 | func Open(filename string) (grate.Source, error) { 45 | f, err := os.Open(filename) 46 | if err != nil { 47 | return nil, err 48 | } 49 | info, err := f.Stat() 50 | if err != nil { 51 | return nil, err 52 | } 53 | z, err := zip.NewReader(f, info.Size()) 54 | if err != nil { 55 | return nil, grate.WrapErr(err, grate.ErrNotInFormat) 56 | } 57 | d := &Document{ 58 | filename: filename, 59 | f: f, 60 | r: z, 61 | } 62 | 63 | d.rels = make(map[string]map[string]string, 4) 64 | 65 | // parse the primary relationships 66 | dec, c, err := d.openXML("_rels/.rels") 67 | if err != nil { 68 | return nil, grate.WrapErr(err, grate.ErrNotInFormat) 69 | } 70 | err = d.parseRels(dec, "") 71 | c.Close() 72 | if err != nil { 73 | return nil, grate.WrapErr(err, grate.ErrNotInFormat) 74 | } 75 | if d.primaryDoc == "" { 76 | return nil, errors.New("xlsx: invalid document") 77 | } 78 | 79 | // parse the secondary relationships to primary doc 80 | base := filepath.Base(d.primaryDoc) 81 | sub := strings.TrimSuffix(d.primaryDoc, base) 82 | relfn := filepath.Join(sub, "_rels", base+".rels") 83 | dec, c, err = d.openXML(relfn) 84 | if err != nil { 85 | return nil, err 86 | } 87 | err = d.parseRels(dec, sub) 88 | c.Close() 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | // parse the workbook structure 94 | dec, c, err = d.openXML(d.primaryDoc) 95 | if err != nil { 96 | return nil, err 97 | } 98 | err = d.parseWorkbook(dec) 99 | c.Close() 100 | if err != nil { 101 | return nil, err 102 | } 103 | 104 | styn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"] 105 | for _, sst := range styn { 106 | // parse the shared string table 107 | dec, c, err = d.openXML(sst) 108 | if err != nil { 109 | return nil, err 110 | } 111 | err = d.parseStyles(dec) 112 | c.Close() 113 | if err != nil { 114 | return nil, err 115 | } 116 | } 117 | 118 | ssn := d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"] 119 | for _, sst := range ssn { 120 | // parse the shared string table 121 | dec, c, err = d.openXML(sst) 122 | if err != nil { 123 | return nil, err 124 | } 125 | err = d.parseSharedStrings(dec) 126 | c.Close() 127 | if err != nil { 128 | return nil, err 129 | } 130 | } 131 | 132 | return d, nil 133 | } 134 | 135 | func (d *Document) openXML(name string) (*xml.Decoder, io.Closer, error) { 136 | if grate.Debug { 137 | log.Println(" openXML", name) 138 | } 139 | for _, zf := range d.r.File { 140 | if zf.Name == name { 141 | zfr, err := zf.Open() 142 | if err != nil { 143 | return nil, nil, err 144 | } 145 | dec := xml.NewDecoder(zfr) 146 | return dec, zfr, nil 147 | } 148 | } 149 | return nil, nil, io.EOF 150 | } 151 | 152 | func (d *Document) List() ([]string, error) { 153 | res := make([]string, 0, len(d.sheets)) 154 | for _, s := range d.sheets { 155 | res = append(res, s.name) 156 | } 157 | return res, nil 158 | } 159 | 160 | func (d *Document) Get(sheetName string) (grate.Collection, error) { 161 | for _, s := range d.sheets { 162 | if s.name == sheetName { 163 | if s.err == errNotLoaded { 164 | s.err = s.parseSheet() 165 | } 166 | return s.wrapped, s.err 167 | } 168 | } 169 | return nil, errors.New("xlsx: sheet not found") 170 | } 171 | -------------------------------------------------------------------------------- /xls/crypto/crypto.go: -------------------------------------------------------------------------------- 1 | // Package crypto implements excel encryption algorithms from the 2 | // MS-OFFCRYPTO design specs. Currently only standard/basic RC4 3 | // "obfuscation" is supported. 4 | package crypto 5 | 6 | import ( 7 | "bytes" 8 | "encoding/binary" 9 | "fmt" 10 | ) 11 | 12 | // Decryptor describes methods to decrypt an excel sheet. 13 | type Decryptor interface { 14 | // SetPassword for the decryption. 15 | SetPassword(password []byte) 16 | 17 | // Read implements the io.Reader interface. 18 | Read(p []byte) (n int, err error) 19 | 20 | // Write implements the io.Writer interface. 21 | Write(p []byte) (n int, err error) 22 | 23 | // Bytes returns the decrypted data. 24 | Bytes() []byte 25 | 26 | // Flush tells the decryptor to decrypt the latest block. 27 | Flush() 28 | 29 | // Reset the decryptor, and clear all written and readable data. 30 | Reset() 31 | } 32 | 33 | // Algorithms designed based on specs in MS-OFFCRYPTO: 34 | // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-offcrypto/3c34d72a-1a61-4b52-a893-196f9157f083 35 | 36 | // Important notes from MS-XLS section 2.2.10: 37 | // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06 38 | 39 | // When obfuscating or encrypting BIFF records in these streams the record type and 40 | // record size components MUST NOT be obfuscated or encrypted. 41 | // In addition the following records MUST NOT be obfuscated or encrypted: 42 | // BOF (section 2.4.21), FilePass (section 2.4.117), UsrExcl (section 2.4.339), 43 | // FileLock (section 2.4.116), InterfaceHdr (section 2.4.146), RRDInfo (section 2.4.227), 44 | // and RRDHead (section 2.4.226). Additionally, the lbPlyPos field of the BoundSheet8 45 | // record (section 2.4.28) MUST NOT be encrypted. 46 | 47 | // For RC4 encryption and RC4 CryptoAPI encryption, the Unicode password string is used 48 | // to generate the encryption key as specified in [MS-OFFCRYPTO] section 2.3.6.2 or 49 | // [MS-OFFCRYPTO] section 2.3.5.2 depending on the RC4 algorithm used. The record data 50 | // is then encrypted by the specific RC4 algorithm in 1024-byte blocks. The block number 51 | // is set to zero at the beginning of every BIFF record stream, and incremented by one 52 | // at each 1024-byte boundary. Bytes to be encrypted are passed into the RC4 encryption 53 | // function and then written to the stream. For unencrypted records and the record 54 | // headers consisting of the record type and record size, a byte buffer of all zeros, 55 | // of the same size as the section of unencrypted bytes, is passed into the RC4 56 | // encryption function. The results are then ignored and the unencrypted bytes are 57 | // written to the stream. 58 | 59 | // DefaultXLSPassword is the default encryption password defined by note 60 | // <100> Section 2.4.191: If the value of the wPassword field of the Password record in 61 | // the Globals Substream is not 0x0000, Excel 97, Excel 2000, Excel 2002, Office Excel 62 | // 2003, Office Excel 2007, and Excel 2010 encrypt the document as specified in [MS-OFFCRYPTO], 63 | // section 2.3. If an encryption password is not specified or the workbook or sheet is only 64 | // protected, the document is encrypted with the default password of: 65 | 66 | // DefaultXLSPassword is the default Excel encryption password. 67 | var DefaultXLSPassword = "VelvetSweatshop" 68 | 69 | ///////////// 70 | 71 | // 2.3.6.1 72 | type basicRC4Encryption struct { 73 | MajorVersion uint16 74 | MinorVersion uint16 75 | Salt [16]byte 76 | Verifier [16]byte 77 | VerifierHash [16]byte 78 | } 79 | 80 | // NewBasicRC4 implements the standard RC4 decryption. 81 | func NewBasicRC4(data []byte) (Decryptor, error) { 82 | h := basicRC4Encryption{} 83 | b := bytes.NewReader(data) 84 | err := binary.Read(b, binary.LittleEndian, &h) 85 | if err != nil { 86 | return nil, err 87 | } 88 | if h.MinorVersion != 1 { 89 | return nil, fmt.Errorf("xls: unknown basic-RC4 minor version %d (%d byte record)", 90 | h.MinorVersion, len(data)) 91 | } 92 | if len(data) != 52 { 93 | return nil, fmt.Errorf("xls: data length is invalid (expected 52 bytes, got %d)", 94 | len(data)) 95 | } 96 | 97 | d := &rc4Writer{ 98 | Salt: make([]byte, len(h.Salt)), 99 | } 100 | copy(d.Salt, h.Salt[:]) 101 | 102 | return d, d.Verify(h.Verifier[:], h.VerifierHash[:]) 103 | } 104 | -------------------------------------------------------------------------------- /xls/hyperlinks.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "fmt" 7 | "strings" 8 | "unicode/utf16" 9 | ) 10 | 11 | func decodeHyperlinks(raw []byte) (displayText, linkText string, err error) { 12 | raw = raw[16:] // skip classid 13 | slen := binary.LittleEndian.Uint32(raw[:4]) 14 | if slen != 2 { 15 | return "", "", errors.New("xls: unknown hyperlink version") 16 | } 17 | 18 | flags := binary.LittleEndian.Uint32(raw[4:8]) 19 | raw = raw[8:] 20 | if (flags & hlstmfHasDisplayName) != 0 { 21 | slen = binary.LittleEndian.Uint32(raw[:4]) 22 | raw = raw[4:] 23 | us := make([]uint16, slen) 24 | for i := 0; i < int(slen); i++ { 25 | us[i] = binary.LittleEndian.Uint16(raw) 26 | raw = raw[2:] 27 | } 28 | displayText = string(utf16.Decode(us)) 29 | } 30 | 31 | if (flags & hlstmfHasFrameName) != 0 { 32 | // skip a HyperlinkString containing target Frame 33 | slen = binary.LittleEndian.Uint32(raw[:4]) 34 | raw = raw[4+(slen*2):] 35 | } 36 | 37 | if (flags & hlstmfHasMoniker) != 0 { 38 | if (flags & hlstmfMonikerSavedAsStr) != 0 { 39 | // read HyperlinkString containing the URL 40 | slen = binary.LittleEndian.Uint32(raw[:4]) 41 | raw = raw[4:] 42 | us := make([]uint16, slen) 43 | for i := 0; i < int(slen); i++ { 44 | us[i] = binary.LittleEndian.Uint16(raw) 45 | raw = raw[2:] 46 | } 47 | linkText = string(utf16.Decode(us)) 48 | 49 | } else { 50 | n := 0 51 | var err error 52 | linkText, n, err = parseHyperlinkMoniker(raw) 53 | raw = raw[n:] 54 | if err != nil { 55 | return "", "", err 56 | } 57 | } 58 | } 59 | 60 | if (flags & hlstmfHasLocationStr) != 0 { 61 | slen = binary.LittleEndian.Uint32(raw[:4]) 62 | raw = raw[4:] 63 | us := make([]uint16, slen) 64 | for i := 0; i < int(slen); i++ { 65 | us[i] = binary.LittleEndian.Uint16(raw) 66 | raw = raw[2:] 67 | } 68 | linkText = string(utf16.Decode(us)) 69 | } 70 | 71 | linkText = strings.Trim(linkText, " \v\f\t\r\n\x00") 72 | displayText = strings.Trim(displayText, " \v\f\t\r\n\x00") 73 | return 74 | } 75 | 76 | func parseHyperlinkMoniker(raw []byte) (string, int, error) { 77 | classid := raw[:16] 78 | no := 16 79 | 80 | isURLMoniker := true 81 | isFileMoniker := true 82 | urlMonikerClassID := [16]byte{0xE0, 0xC9, 0xEA, 0x79, 0xF9, 0xBA, 0xCE, 0x11, 0x8C, 0x82, 0x00, 0xAA, 0x00, 0x4B, 0xA9, 0x0B} 83 | fileMonikerClassID := [16]byte{0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46} 84 | for i, b := range classid { 85 | if urlMonikerClassID[i] != b { 86 | isURLMoniker = false 87 | } 88 | if fileMonikerClassID[i] != b { 89 | isFileMoniker = false 90 | } 91 | } 92 | if isURLMoniker { 93 | length := binary.LittleEndian.Uint32(raw[no:]) 94 | no += 4 95 | length /= 2 96 | buf := make([]uint16, length) 97 | for i := 0; i < int(length); i++ { 98 | buf[i] = binary.LittleEndian.Uint16(raw[no:]) 99 | no += 2 100 | } 101 | if length > 12 && buf[length-13] == 0 { 102 | buf = buf[:length-12] 103 | } 104 | return string(utf16.Decode(buf)), no, nil 105 | } 106 | if isFileMoniker { 107 | //x := binary.LittleEndian.Uint16(raw[no:]) //cAnti 108 | length := binary.LittleEndian.Uint32(raw[no+2:]) //ansiLength 109 | no += 6 110 | buf := raw[no : no+int(length)] 111 | 112 | // skip 24 more bytes for misc fixed properties 113 | no += int(length) + 24 114 | 115 | length = binary.LittleEndian.Uint32(raw[no:]) // cbUnicodePathSize 116 | no += 4 117 | if length > 0 { 118 | no += 6 119 | length -= 6 120 | buf2 := make([]uint16, length/2) 121 | for i := 0; i < int(length/2); i++ { 122 | buf2[i] = binary.LittleEndian.Uint16(raw[no:]) 123 | no += 2 124 | } 125 | return string(utf16.Decode(buf2)), no, nil 126 | } 127 | 128 | return string(buf), no, nil 129 | } 130 | 131 | return "", 0, fmt.Errorf("xls: unknown moniker classid") 132 | } 133 | 134 | // HLink flags 135 | const ( 136 | hlstmfHasMoniker = uint32(0x001) 137 | hlstmfIsAbsolute = uint32(0x002) 138 | hlstmfSiteGaveDisplayName = uint32(0x004) 139 | hlstmfHasLocationStr = uint32(0x008) 140 | hlstmfHasDisplayName = uint32(0x010) 141 | hlstmfHasGUID = uint32(0x020) 142 | hlstmfHasCreationTime = uint32(0x040) 143 | hlstmfHasFrameName = uint32(0x080) 144 | hlstmfMonikerSavedAsStr = uint32(0x100) 145 | hlstmfAbsFromGetdataRel = uint32(0x200) 146 | ) 147 | -------------------------------------------------------------------------------- /xlsx/workbook.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "encoding/xml" 5 | "errors" 6 | "io" 7 | "log" 8 | "path/filepath" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/pbnjay/grate" 13 | ) 14 | 15 | func (d *Document) parseRels(dec *xml.Decoder, basedir string) error { 16 | tok, err := dec.RawToken() 17 | for ; err == nil; tok, err = dec.RawToken() { 18 | switch v := tok.(type) { 19 | case xml.StartElement: 20 | switch v.Name.Local { 21 | case "Relationships": 22 | // container 23 | case "Relationship": 24 | vals := make(map[string]string, 5) 25 | for _, a := range v.Attr { 26 | vals[a.Name.Local] = a.Value 27 | } 28 | if _, ok := d.rels[vals["Type"]]; !ok { 29 | d.rels[vals["Type"]] = make(map[string]string) 30 | } 31 | if strings.HasPrefix(vals["Target"], "/") { 32 | // handle malformed "absolute" paths cleanly 33 | d.rels[vals["Type"]][vals["Id"]] = vals["Target"][1:] 34 | } else { 35 | d.rels[vals["Type"]][vals["Id"]] = filepath.Join(basedir, vals["Target"]) 36 | } 37 | if vals["Type"] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" { 38 | d.primaryDoc = vals["Target"] 39 | } 40 | default: 41 | if grate.Debug { 42 | log.Println(" Unhandled relationship xml tag", v.Name.Local, v.Attr) 43 | } 44 | } 45 | case xml.EndElement: 46 | // not needed 47 | default: 48 | if grate.Debug { 49 | log.Printf(" Unhandled relationship xml tokens %T %+v", tok, tok) 50 | } 51 | } 52 | } 53 | if err == io.EOF { 54 | err = nil 55 | } 56 | return err 57 | } 58 | 59 | func (d *Document) parseWorkbook(dec *xml.Decoder) error { 60 | tok, err := dec.RawToken() 61 | for ; err == nil; tok, err = dec.RawToken() { 62 | switch v := tok.(type) { 63 | case xml.StartElement: 64 | switch v.Name.Local { 65 | case "sheet": 66 | vals := make(map[string]string, 5) 67 | for _, a := range v.Attr { 68 | vals[a.Name.Local] = a.Value 69 | } 70 | sheetID, ok1 := vals["id"] 71 | sheetName, ok2 := vals["name"] 72 | if !ok1 || !ok2 { 73 | return errors.New("xlsx: invalid sheet definition") 74 | } 75 | s := &Sheet{ 76 | d: d, 77 | relID: sheetID, 78 | name: sheetName, 79 | docname: d.rels["http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"][sheetID], 80 | err: errNotLoaded, 81 | } 82 | d.sheets = append(d.sheets, s) 83 | case "workbook", "sheets": 84 | // containers 85 | default: 86 | if grate.Debug { 87 | log.Println(" Unhandled workbook xml tag", v.Name.Local, v.Attr) 88 | } 89 | } 90 | case xml.EndElement: 91 | // not needed 92 | default: 93 | if grate.Debug { 94 | log.Printf(" Unhandled workbook xml tokens %T %+v", tok, tok) 95 | } 96 | } 97 | } 98 | if err == io.EOF { 99 | err = nil 100 | } 101 | return err 102 | } 103 | 104 | func (d *Document) parseStyles(dec *xml.Decoder) error { 105 | baseNumFormats := []string{} 106 | d.xfs = d.xfs[:0] 107 | 108 | section := 0 109 | tok, err := dec.RawToken() 110 | for ; err == nil; tok, err = dec.RawToken() { 111 | switch v := tok.(type) { 112 | case xml.StartElement: 113 | switch v.Name.Local { 114 | case "styleSheet": 115 | // container 116 | case "numFmt": 117 | ax := getAttrs(v.Attr, "numFmtId", "formatCode") 118 | fmtNo, _ := strconv.ParseInt(ax[0], 10, 16) 119 | d.fmt.Add(uint16(fmtNo), ax[1]) 120 | 121 | case "cellStyleXfs": 122 | section = 1 123 | case "cellXfs": 124 | section = 2 125 | ax := getAttrs(v.Attr, "count") 126 | n, _ := strconv.ParseInt(ax[0], 10, 64) 127 | d.xfs = make([]uint16, 0, n) 128 | 129 | case "xf": 130 | ax := getAttrs(v.Attr, "numFmtId", "applyNumberFormat", "xfId") 131 | if section == 1 { 132 | // load base styles, but only save number format 133 | if ax[1] == "0" { 134 | baseNumFormats = append(baseNumFormats, "0") 135 | } else { 136 | baseNumFormats = append(baseNumFormats, ax[0]) 137 | } 138 | } else if section == 2 { 139 | // actual referencable cell styles 140 | // 1) get base style so we can inherit format properly 141 | baseID, _ := strconv.ParseInt(ax[2], 10, 64) 142 | numFmtID := "0" 143 | if len(baseNumFormats) > int(baseID) { 144 | numFmtID = baseNumFormats[baseID] 145 | } 146 | 147 | // 2) check if this XF overrides the base format 148 | if ax[1] == "0" { 149 | // remove the format (if it was inherited) 150 | numFmtID = "0" 151 | } else { 152 | numFmtID = ax[0] 153 | } 154 | 155 | nfid, _ := strconv.ParseInt(numFmtID, 10, 16) 156 | d.xfs = append(d.xfs, uint16(nfid)) 157 | } else { 158 | panic("wheres is this xf??") 159 | } 160 | default: 161 | if grate.Debug { 162 | log.Println(" Unhandled style xml tag", v.Name.Local, v.Attr) 163 | } 164 | } 165 | case xml.EndElement: 166 | switch v.Name.Local { 167 | case "cellStyleXfs": 168 | section = 0 169 | case "cellXfs": 170 | section = 0 171 | } 172 | default: 173 | if grate.Debug { 174 | log.Printf(" Unhandled style xml tokens %T %+v", tok, tok) 175 | } 176 | } 177 | } 178 | if err == io.EOF { 179 | err = nil 180 | } 181 | return err 182 | } 183 | 184 | func (d *Document) parseSharedStrings(dec *xml.Decoder) error { 185 | val := "" 186 | tok, err := dec.RawToken() 187 | for ; err == nil; tok, err = dec.RawToken() { 188 | switch v := tok.(type) { 189 | case xml.CharData: 190 | val += string(v) 191 | case xml.StartElement: 192 | switch v.Name.Local { 193 | case "si": 194 | val = "" 195 | case "t": 196 | // no attributes to parse, we only want the CharData ... 197 | case "sst": 198 | // main container 199 | default: 200 | if grate.Debug { 201 | log.Println(" Unhandled SST xml tag", v.Name.Local, v.Attr) 202 | } 203 | } 204 | case xml.EndElement: 205 | if v.Name.Local == "si" { 206 | d.strings = append(d.strings, val) 207 | continue 208 | } 209 | default: 210 | if grate.Debug { 211 | log.Printf(" Unhandled SST xml token %T %+v", tok, tok) 212 | } 213 | } 214 | } 215 | if err == io.EOF { 216 | err = nil 217 | } 218 | return err 219 | } 220 | -------------------------------------------------------------------------------- /xlsx/sheets.go: -------------------------------------------------------------------------------- 1 | package xlsx 2 | 3 | import ( 4 | "encoding/xml" 5 | "errors" 6 | "io" 7 | "log" 8 | "path/filepath" 9 | "strconv" 10 | "strings" 11 | 12 | "github.com/pbnjay/grate" 13 | "github.com/pbnjay/grate/commonxl" 14 | ) 15 | 16 | type Sheet struct { 17 | d *Document 18 | relID string 19 | name string 20 | docname string 21 | 22 | err error 23 | 24 | wrapped *commonxl.Sheet 25 | } 26 | 27 | var errNotLoaded = errors.New("xlsx: sheet not loaded") 28 | 29 | func (s *Sheet) parseSheet() error { 30 | s.wrapped = &commonxl.Sheet{ 31 | Formatter: &s.d.fmt, 32 | } 33 | linkmap := make(map[string]string) 34 | base := filepath.Base(s.docname) 35 | sub := strings.TrimSuffix(s.docname, base) 36 | relsname := filepath.Join(sub, "_rels", base+".rels") 37 | dec, clo, err := s.d.openXML(relsname) 38 | if err == nil { 39 | // rels might not exist for every sheet 40 | tok, err := dec.RawToken() 41 | for ; err == nil; tok, err = dec.RawToken() { 42 | if v, ok := tok.(xml.StartElement); ok && v.Name.Local == "Relationship" { 43 | ax := getAttrs(v.Attr, "Id", "Type", "Target", "TargetMode") 44 | if ax[3] == "External" && ax[1] == "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" { 45 | linkmap[ax[0]] = ax[2] 46 | } 47 | } 48 | } 49 | clo.Close() 50 | } 51 | 52 | dec, clo, err = s.d.openXML(s.docname) 53 | if err != nil { 54 | return err 55 | } 56 | defer clo.Close() 57 | 58 | currentCellType := BlankCellType 59 | currentCell := "" 60 | var fno uint16 61 | var maxCol, maxRow int 62 | 63 | tok, err := dec.RawToken() 64 | for ; err == nil; tok, err = dec.RawToken() { 65 | switch v := tok.(type) { 66 | case xml.CharData: 67 | if currentCell == "" { 68 | continue 69 | } 70 | c, r := refToIndexes(currentCell) 71 | if c >= 0 && r >= 0 { 72 | var val interface{} = string(v) 73 | 74 | switch currentCellType { 75 | case BooleanCellType: 76 | if v[0] == '1' { 77 | val = true 78 | } else { 79 | val = false 80 | } 81 | case DateCellType: 82 | log.Println("CELL DATE", val, fno) 83 | case NumberCellType: 84 | fval, err := strconv.ParseFloat(string(v), 64) 85 | if err == nil { 86 | val = fval 87 | } 88 | //log.Println("CELL NUMBER", val, numFormat) 89 | case SharedStringCellType: 90 | //log.Println("CELL SHSTR", val, currentCellType, numFormat) 91 | si, _ := strconv.ParseInt(string(v), 10, 64) 92 | val = s.d.strings[si] 93 | case BlankCellType: 94 | //log.Println("CELL BLANK") 95 | // don't place any values 96 | continue 97 | case ErrorCellType, FormulaStringCellType, InlineStringCellType: 98 | //log.Println("CELL ERR/FORM/INLINE", val, currentCellType) 99 | default: 100 | log.Println("CELL UNKNOWN", val, currentCellType, fno) 101 | } 102 | s.wrapped.Put(r, c, val, fno) 103 | } else { 104 | //log.Println("FAIL row/col: ", currentCell) 105 | } 106 | case xml.StartElement: 107 | switch v.Name.Local { 108 | case "dimension": 109 | ax := getAttrs(v.Attr, "ref") 110 | if ax[0] == "A1" { 111 | maxCol, maxRow = 1, 1 112 | // short-circuit empty sheet 113 | s.wrapped.Resize(1, 1) 114 | continue 115 | } 116 | dims := strings.Split(ax[0], ":") 117 | if len(dims) == 1 { 118 | maxCol, maxRow = refToIndexes(dims[0]) 119 | } else { 120 | //minCol, minRow := refToIndexes(dims[0]) 121 | maxCol, maxRow = refToIndexes(dims[1]) 122 | } 123 | s.wrapped.Resize(maxRow, maxCol) 124 | //log.Println("DIMENSION:", s.minRow, s.minCol, ">", s.maxRow, s.maxCol) 125 | case "row": 126 | //currentRow = ax["r"] // unsigned int row index 127 | //log.Println("ROW", currentRow) 128 | case "c": 129 | ax := getAttrs(v.Attr, "t", "r", "s") 130 | currentCellType = CellType(ax[0]) 131 | if currentCellType == BlankCellType { 132 | currentCellType = NumberCellType 133 | } 134 | currentCell = ax[1] // always an A1 style reference 135 | style := ax[2] 136 | sid, _ := strconv.ParseInt(style, 10, 64) 137 | if len(s.d.xfs) > int(sid) { 138 | fno = s.d.xfs[sid] 139 | } else { 140 | fno = 0 141 | } 142 | //log.Println("CELL", currentCell, sid, numFormat, currentCellType) 143 | case "v": 144 | //log.Println("CELL VALUE", ax) 145 | 146 | case "mergeCell": 147 | ax := getAttrs(v.Attr, "ref") 148 | dims := strings.Split(ax[0], ":") 149 | startCol, startRow := refToIndexes(dims[0]) 150 | endCol, endRow := startCol, startRow 151 | if len(dims) > 1 { 152 | endCol, endRow = refToIndexes(dims[1]) 153 | } 154 | if endRow > maxRow { 155 | endRow = maxRow 156 | } 157 | if endCol > maxCol { 158 | endCol = maxCol 159 | } 160 | for r := startRow; r <= endRow; r++ { 161 | for c := startCol; c <= endCol; c++ { 162 | if r == startRow && c == startCol { 163 | // has data already! 164 | } else if c == startCol { 165 | // first and last column MAY be the same 166 | if r == endRow { 167 | s.wrapped.Put(r, c, grate.EndRowMerged, 0) 168 | } else { 169 | s.wrapped.Put(r, c, grate.ContinueRowMerged, 0) 170 | } 171 | } else if c == endCol { 172 | // first and last column are NOT the same 173 | s.wrapped.Put(r, c, grate.EndColumnMerged, 0) 174 | } else { 175 | s.wrapped.Put(r, c, grate.ContinueColumnMerged, 0) 176 | } 177 | } 178 | } 179 | 180 | case "hyperlink": 181 | ax := getAttrs(v.Attr, "ref", "id") 182 | col, row := refToIndexes(ax[0]) 183 | link := linkmap[ax[1]] 184 | s.wrapped.Put(row, col, link, 0) 185 | s.wrapped.SetURL(row, col, link) 186 | 187 | case "worksheet", "mergeCells", "hyperlinks": 188 | // containers 189 | case "f": 190 | //log.Println("start: ", v.Name.Local, v.Attr) 191 | default: 192 | if grate.Debug { 193 | log.Println(" Unhandled sheet xml tag", v.Name.Local, v.Attr) 194 | } 195 | } 196 | case xml.EndElement: 197 | 198 | switch v.Name.Local { 199 | case "c": 200 | currentCell = "" 201 | case "row": 202 | //currentRow = "" 203 | } 204 | default: 205 | if grate.Debug { 206 | log.Printf(" Unhandled sheet xml tokens %T %+v", tok, tok) 207 | } 208 | } 209 | } 210 | if err == io.EOF { 211 | err = nil 212 | } 213 | return err 214 | } 215 | -------------------------------------------------------------------------------- /xls/strings.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "io" 7 | "io/ioutil" 8 | "unicode/utf16" 9 | ) 10 | 11 | // 2.5.240 12 | func decodeShortXLUnicodeString(raw []byte) (string, int, error) { 13 | // identical to decodeXLUnicodeString except for cch=8bits instead of 16 14 | cch := int(raw[0]) 15 | flags := raw[1] 16 | raw = raw[2:] 17 | 18 | content := make([]uint16, cch) 19 | if (flags & 0x1) == 0 { 20 | // 16-bit characters but only the bottom 8bits 21 | contentBytes := raw[:cch] 22 | for i, x := range contentBytes { 23 | content[i] = uint16(x) 24 | } 25 | cch += 2 // to return the offset 26 | } else { 27 | // 16-bit characters 28 | for i := 0; i < cch; i++ { 29 | content[i] = binary.LittleEndian.Uint16(raw[:2]) 30 | raw = raw[2:] 31 | } 32 | cch += cch + 2 // to return the offset 33 | } 34 | return string(utf16.Decode(content)), cch, nil 35 | } 36 | 37 | // 2.5.294 38 | func decodeXLUnicodeString(raw []byte) (string, int, error) { 39 | // identical to decodeShortXLUnicodeString except for cch=16bits instead of 8 40 | cch := int(binary.LittleEndian.Uint16(raw[:2])) 41 | flags := raw[2] 42 | raw = raw[3:] 43 | 44 | content := make([]uint16, cch) 45 | if (flags & 0x1) == 0 { 46 | // 16-bit characters but only the bottom 8bits 47 | contentBytes := raw[:cch] 48 | for i, x := range contentBytes { 49 | content[i] = uint16(x) 50 | } 51 | cch += 3 // to return the offset 52 | } else { 53 | // 16-bit characters 54 | for i := 0; i < cch; i++ { 55 | content[i] = binary.LittleEndian.Uint16(raw[:2]) 56 | raw = raw[2:] 57 | } 58 | cch += cch + 3 // to return the offset 59 | } 60 | return string(utf16.Decode(content)), cch, nil 61 | } 62 | 63 | // 2.5.293 64 | func decodeXLUnicodeRichExtendedString(r io.Reader) (string, error) { 65 | var cch, cRun uint16 66 | var flags uint8 67 | var cbExtRs int32 68 | err := binary.Read(r, binary.LittleEndian, &cch) 69 | if err != nil { 70 | return "", err 71 | } 72 | err = binary.Read(r, binary.LittleEndian, &flags) 73 | if err != nil { 74 | return "", err 75 | } 76 | if (flags & 0x8) != 0 { 77 | // rich formating data is present 78 | err = binary.Read(r, binary.LittleEndian, &cRun) 79 | if err != nil { 80 | return "", err 81 | } 82 | } 83 | if (flags & 0x4) != 0 { 84 | // phonetic string data is present 85 | err = binary.Read(r, binary.LittleEndian, &cbExtRs) 86 | if err != nil { 87 | return "", err 88 | } 89 | } 90 | 91 | content := make([]uint16, cch) 92 | if (flags & 0x1) == 0 { 93 | // 16-bit characters but only the bottom 8bits 94 | contentBytes := make([]byte, cch) 95 | n, err2 := io.ReadFull(r, contentBytes) 96 | if n == 0 && err2 != io.ErrUnexpectedEOF { 97 | err = err2 98 | } 99 | if uint16(n) < cch { 100 | contentBytes = contentBytes[:n] 101 | content = content[:n] 102 | } 103 | 104 | for i, x := range contentBytes { 105 | content[i] = uint16(x) 106 | } 107 | 108 | } else { 109 | // 16-bit characters 110 | err = binary.Read(r, binary.LittleEndian, content) 111 | } 112 | if err != nil { 113 | return "", err 114 | } 115 | ////// 116 | 117 | if cRun > 0 { 118 | // rich formating data is present 119 | _, err = io.CopyN(ioutil.Discard, r, int64(cRun)*4) 120 | if err != nil { 121 | return "", err 122 | } 123 | } 124 | if cbExtRs > 0 { 125 | // phonetic string data is present 126 | _, err = io.CopyN(ioutil.Discard, r, int64(cbExtRs)) 127 | if err != nil { 128 | return "", err 129 | } 130 | } 131 | ////// 132 | 133 | return string(utf16.Decode(content)), nil 134 | } 135 | 136 | // read in an array of XLUnicodeRichExtendedString s 137 | func parseSST(recs []*rec) ([]string, error) { 138 | // The quirky thing about this code is that when strings cross a record 139 | // boundary, there's an intervening flags byte that MAY change the string 140 | // from an 8-bit encoding to 16-bit or vice versa. 141 | 142 | //totalRefs := binary.LittleEndian.Uint32(recs[0].Data[0:4]) 143 | numStrings := binary.LittleEndian.Uint32(recs[0].Data[4:8]) 144 | 145 | all := make([]string, 0, numStrings) 146 | current := make([]uint16, 32*1024) 147 | 148 | buf := recs[0].Data[8:] 149 | for i := 0; i < len(recs); { 150 | var cRunBytes int 151 | var flags byte 152 | var cbExtRs uint32 153 | 154 | for len(buf) > 0 { 155 | slen := binary.LittleEndian.Uint16(buf) 156 | buf = buf[2:] 157 | flags = buf[0] 158 | buf = buf[1:] 159 | 160 | if (flags & 0x8) != 0 { 161 | // rich formating data is present 162 | cRun := binary.LittleEndian.Uint16(buf) 163 | cRunBytes = int(cRun) * 4 164 | buf = buf[2:] 165 | } 166 | if (flags & 0x4) != 0 { 167 | // phonetic string data is present 168 | cbExtRs = binary.LittleEndian.Uint32(buf) 169 | buf = buf[4:] 170 | } 171 | 172 | /////// 173 | blx := len(buf) 174 | bly := len(buf) - 5 175 | if blx > 5 { 176 | blx = 5 177 | } 178 | if bly < 0 { 179 | bly = 0 180 | } 181 | 182 | // this block will read the string data, but transparently 183 | // handle continuing across records 184 | if int(slen) > cap(current) { 185 | current = make([]uint16, slen) 186 | } else { 187 | current = current[:slen] 188 | } 189 | for j := 0; j < int(slen); j++ { 190 | if len(buf) == 0 { 191 | i++ 192 | if (recs[i].Data[0] & 1) == 0 { 193 | flags &= 0xFE 194 | } else { 195 | flags |= 1 196 | } 197 | buf = recs[i].Data[1:] 198 | } 199 | 200 | if (flags & 1) == 0 { //8-bit 201 | current[j] = uint16(buf[0]) 202 | buf = buf[1:] 203 | } else { //16-bit 204 | current[j] = uint16(binary.LittleEndian.Uint16(buf[:2])) 205 | buf = buf[2:] 206 | if len(buf) == 1 { 207 | return nil, errors.New("xls: off by one") 208 | } 209 | } 210 | } 211 | 212 | s := string(utf16.Decode(current)) 213 | all = append(all, s) 214 | 215 | /////// 216 | 217 | for cRunBytes > 0 { 218 | if len(buf) >= int(cRunBytes) { 219 | buf = buf[cRunBytes:] 220 | cRunBytes = 0 221 | } else { 222 | cRunBytes -= len(buf) 223 | i++ 224 | buf = recs[i].Data 225 | } 226 | } 227 | 228 | for cbExtRs > 0 { 229 | if len(buf) >= int(cbExtRs) { 230 | buf = buf[cbExtRs:] 231 | cbExtRs = 0 232 | } else { 233 | cbExtRs -= uint32(len(buf)) 234 | i++ 235 | buf = recs[i].Data 236 | } 237 | } 238 | } 239 | i++ 240 | if i < len(recs) { 241 | buf = recs[i].Data 242 | } 243 | } 244 | 245 | return all, nil 246 | } 247 | -------------------------------------------------------------------------------- /commonxl/sheet.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "time" 7 | 8 | "github.com/pbnjay/grate" 9 | ) 10 | 11 | // Sheet holds raw and rendered values for a spreadsheet. 12 | type Sheet struct { 13 | Formatter *Formatter 14 | NumRows int 15 | NumCols int 16 | Rows [][]Cell 17 | 18 | CurRow int 19 | } 20 | 21 | // Resize the sheet for the number of rows and cols given. 22 | // Newly added cells default to blank. 23 | func (s *Sheet) Resize(rows, cols int) { 24 | for i := range s.Rows { 25 | if i > rows { 26 | break 27 | } 28 | n := cols - len(s.Rows[i]) 29 | if n <= 0 { 30 | continue 31 | } 32 | s.Rows[i] = append(s.Rows[i], make([]Cell, n)...) 33 | } 34 | 35 | if rows <= 0 { 36 | rows = 1 37 | } 38 | if cols <= 0 { 39 | cols = 1 40 | } 41 | s.CurRow = 0 42 | s.NumRows = rows 43 | s.NumCols = cols 44 | 45 | for rows >= len(s.Rows) { 46 | s.Rows = append(s.Rows, make([]Cell, cols)) 47 | } 48 | } 49 | 50 | // Put the value at the cell location given. 51 | func (s *Sheet) Put(row, col int, value interface{}, fmtNum uint16) { 52 | //log.Println(row, col, value, fmtNum) 53 | if row >= s.NumRows || col >= s.NumCols { 54 | if grate.Debug { 55 | log.Printf("grate: cell out of bounds row %d>=%d, col %d>=%d", 56 | row, s.NumRows, col, s.NumCols) 57 | } 58 | 59 | // per the spec, this is an invalid Excel file 60 | // but we'll resize in place instead of crashing out 61 | if row >= s.NumRows { 62 | s.NumRows = row + 1 63 | } 64 | if col >= s.NumCols { 65 | s.NumCols = col + 1 66 | } 67 | s.Resize(s.NumRows, s.NumCols) 68 | } 69 | 70 | if spec, ok := value.(string); ok { 71 | if spec == grate.EndRowMerged || spec == grate.EndColumnMerged || spec == grate.ContinueRowMerged || spec == grate.ContinueColumnMerged { 72 | s.Rows[row][col] = NewCell(value) 73 | s.Rows[row][col][1] = StaticCell 74 | return 75 | } 76 | } 77 | 78 | ct, ok := s.Formatter.getCellType(fmtNum) 79 | if !ok || fmtNum == 0 { 80 | s.Rows[row][col] = NewCell(value) 81 | } else { 82 | s.Rows[row][col] = NewCellWithType(value, ct, s.Formatter) 83 | } 84 | s.Rows[row][col].SetFormatNumber(fmtNum) 85 | } 86 | 87 | // Set changes the value in an existing cell location. 88 | // NB Currently only used for populating string results for formulas. 89 | func (s *Sheet) Set(row, col int, value interface{}) { 90 | if row > s.NumRows || col > s.NumCols { 91 | log.Println("grate: cell out of bounds") 92 | return 93 | } 94 | 95 | s.Rows[row][col][0] = value 96 | s.Rows[row][col][1] = StringCell 97 | } 98 | 99 | // SetURL adds a hyperlink to an existing cell location. 100 | func (s *Sheet) SetURL(row, col int, link string) { 101 | if row > s.NumRows || col > s.NumCols { 102 | log.Println("grate: cell out of bounds") 103 | return 104 | } 105 | 106 | s.Rows[row][col].SetURL(link) 107 | } 108 | 109 | // Next advances to the next record of content. 110 | // It MUST be called prior to any Scan(). 111 | func (s *Sheet) Next() bool { 112 | if (s.CurRow + 1) > len(s.Rows) { 113 | return false 114 | } 115 | s.CurRow++ 116 | return true 117 | } 118 | 119 | // Raw extracts the raw Cell interfaces underlying the current row. 120 | func (s *Sheet) Raw() []Cell { 121 | rr := make([]Cell, s.NumCols) 122 | for i, cell := range s.Rows[s.CurRow-1] { 123 | rr[i] = cell.Clone() 124 | } 125 | return rr 126 | } 127 | 128 | // Strings extracts values from the current record into a list of strings. 129 | func (s *Sheet) Strings() []string { 130 | res := make([]string, s.NumCols) 131 | for i, cell := range s.Rows[s.CurRow-1] { 132 | if cell.Type() == BlankCell { 133 | res[i] = "" 134 | continue 135 | } 136 | if cell.Type() == StaticCell { 137 | res[i] = cell.Value().(string) 138 | continue 139 | } 140 | val := cell.Value() 141 | fs, ok := s.Formatter.Apply(cell.FormatNo(), val) 142 | if !ok { 143 | fs = fmt.Sprint(val) 144 | } 145 | res[i] = fs 146 | } 147 | return res 148 | } 149 | 150 | // Types extracts the data types from the current record into a list. 151 | // options: "boolean", "integer", "float", "string", "date", 152 | // and special cases: "blank", "hyperlink" which are string types 153 | func (s *Sheet) Types() []string { 154 | res := make([]string, s.NumCols) 155 | for i, cell := range s.Rows[s.CurRow-1] { 156 | res[i] = cell.Type().String() 157 | } 158 | return res 159 | } 160 | 161 | // Formats extracts the format code for the current record into a list. 162 | func (s *Sheet) Formats() []string { 163 | ok := true 164 | res := make([]string, s.NumCols) 165 | for i, cell := range s.Rows[s.CurRow-1] { 166 | res[i], ok = builtInFormats[cell.FormatNo()] 167 | if !ok { 168 | res[i] = fmt.Sprint(cell.FormatNo()) 169 | } 170 | } 171 | return res 172 | } 173 | 174 | // Scan extracts values from the current record into the provided arguments 175 | // Arguments must be pointers to one of 5 supported types: 176 | // bool, int64, float64, string, or time.Time 177 | // If invalid, returns ErrInvalidScanType 178 | func (s *Sheet) Scan(args ...interface{}) error { 179 | row := s.Rows[s.CurRow-1] 180 | 181 | for i, a := range args { 182 | val := row[i].Value() 183 | 184 | switch v := a.(type) { 185 | case bool, int64, float64, string, time.Time: 186 | return fmt.Errorf("scan destinations must be pointer (arg %d is not)", i) 187 | case *bool: 188 | if x, ok := val.(bool); ok { 189 | *v = x 190 | } else { 191 | return fmt.Errorf("scan destination %d expected *%T, not *bool", i, val) 192 | } 193 | case *int64: 194 | if x, ok := val.(int64); ok { 195 | *v = x 196 | } else { 197 | return fmt.Errorf("scan destination %d expected *%T, not *int64", i, val) 198 | } 199 | case *float64: 200 | if x, ok := val.(float64); ok { 201 | *v = x 202 | } else { 203 | return fmt.Errorf("scan destination %d expected *%T, not *float64", i, val) 204 | } 205 | case *string: 206 | if x, ok := val.(string); ok { 207 | *v = x 208 | } else { 209 | return fmt.Errorf("scan destination %d expected *%T, not *string", i, val) 210 | } 211 | case *time.Time: 212 | if x, ok := val.(time.Time); ok { 213 | *v = x 214 | } else { 215 | return fmt.Errorf("scan destination %d expected *%T, not *time.Time", i, val) 216 | } 217 | default: 218 | return fmt.Errorf("scan destination for arg %d is not supported (%T)", i, a) 219 | } 220 | } 221 | return nil 222 | } 223 | 224 | // IsEmpty returns true if there are no data values. 225 | func (s *Sheet) IsEmpty() bool { 226 | return (s.NumCols <= 1 && s.NumRows <= 1) 227 | } 228 | 229 | // Err returns the last error that occured. 230 | func (s *Sheet) Err() error { 231 | return nil 232 | } 233 | -------------------------------------------------------------------------------- /cmd/grate2tsv/main.go: -------------------------------------------------------------------------------- 1 | // Command grate2tsv is a highly parallel tabular data extraction tool. It's 2 | // probably not necessary in your situation, but is included here since it 3 | // is a good stress test of the codebase. 4 | // 5 | // Files on the command line will be parsed and extracted to the "results" 6 | // subdirectory under a heirarchical arrangement (to make our filesystems 7 | // more responsive), and a "results.txt" file will be created logging basic 8 | // information and errors for each file. 9 | package main 10 | 11 | import ( 12 | "bufio" 13 | "crypto/md5" 14 | "flag" 15 | "fmt" 16 | "io" 17 | "io/ioutil" 18 | "log" 19 | "os" 20 | "path/filepath" 21 | "regexp" 22 | "runtime" 23 | "runtime/pprof" 24 | "strings" 25 | "sync" 26 | "time" 27 | 28 | "github.com/pbnjay/grate" 29 | _ "github.com/pbnjay/grate/simple" 30 | _ "github.com/pbnjay/grate/xls" 31 | _ "github.com/pbnjay/grate/xlsx" 32 | ) 33 | 34 | var ( 35 | logfile = flag.String("l", "", "save processing logs to `filename.txt`") 36 | pretend = flag.Bool("p", false, "pretend to output .tsv") 37 | infoFile = flag.String("i", "results.txt", "`filename` to record stats about the process") 38 | removeNewlines = flag.Bool("r", true, "remove embedded tabs, newlines, and condense spaces in cell contents") 39 | trimSpaces = flag.Bool("w", true, "trim whitespace from cell contents") 40 | skipBlanks = flag.Bool("b", true, "discard blank rows from the output") 41 | cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") 42 | memprofile = flag.String("memprofile", "", "write memory profile to file") 43 | 44 | timeFormat = "2006-01-02 15:04:05" 45 | fstats *os.File 46 | 47 | procWG sync.WaitGroup 48 | cleanup = make(chan *output, 100) 49 | outpool = sync.Pool{New: func() interface{} { 50 | return &output{} 51 | }} 52 | ) 53 | 54 | type output struct { 55 | f *os.File 56 | b *bufio.Writer 57 | } 58 | 59 | func main() { 60 | flag.Parse() 61 | 62 | if *memprofile != "" { 63 | f, err := os.Create(*memprofile) 64 | if err != nil { 65 | log.Fatal(err) 66 | } 67 | defer func() { 68 | runtime.GC() 69 | pprof.WriteHeapProfile(f) 70 | f.Close() 71 | }() 72 | } 73 | 74 | if *cpuprofile != "" { 75 | f, err := os.Create(*cpuprofile) 76 | if err != nil { 77 | log.Fatal(err) 78 | } 79 | pprof.StartCPUProfile(f) 80 | defer pprof.StopCPUProfile() 81 | } 82 | 83 | if *logfile != "" { 84 | fo, err := os.Create(*logfile) 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | defer fo.Close() 89 | log.SetOutput(fo) 90 | } 91 | 92 | done := make(chan int) 93 | go func() { 94 | for x := range cleanup { 95 | x.b.Flush() 96 | x.f.Close() 97 | outpool.Put(x) 98 | } 99 | done <- 1 100 | }() 101 | 102 | var err error 103 | fstats, err = os.OpenFile(*infoFile, os.O_CREATE|os.O_RDWR, 0644) 104 | if err != nil { 105 | log.Fatal(err) 106 | } 107 | defer fstats.Close() 108 | pos, err := fstats.Seek(0, io.SeekEnd) 109 | if err != nil { 110 | log.Fatal(err) 111 | } 112 | if pos == 0 { 113 | fmt.Fprintf(fstats, "time\tfilename\tsheet\trows\tcolumns\terrors\n") 114 | } 115 | 116 | filenameChan := make(chan string) 117 | 118 | // fan out to 1/2 of CPU cores 119 | // (e.g. each file-processor can use 2 cpus) 120 | outMu := &sync.Mutex{} 121 | nparallel := runtime.NumCPU() / 2 122 | procWG.Add(nparallel) 123 | for i := 0; i < nparallel; i++ { 124 | go runProcessor(filenameChan, outMu) 125 | } 126 | for _, fn := range flag.Args() { 127 | filenameChan <- fn 128 | } 129 | 130 | close(filenameChan) 131 | procWG.Wait() 132 | close(cleanup) 133 | <-done 134 | } 135 | 136 | func runProcessor(from chan string, mu *sync.Mutex) { 137 | for fn := range from { 138 | nowFmt := time.Now().Format(timeFormat) 139 | results, err := processFile(fn) 140 | mu.Lock() 141 | if err != nil { 142 | // returned errors are fatal 143 | fmt.Fprintf(fstats, "%s\t%s\t-\t-\t-\t%s\n", nowFmt, fn, err.Error()) 144 | mu.Unlock() 145 | continue 146 | } 147 | 148 | for _, res := range results { 149 | e := "-" 150 | if res.Err != nil { 151 | e = res.Err.Error() 152 | } 153 | fmt.Fprintf(fstats, "%s\t%s\t%s\t%d\t%d\t%s\n", nowFmt, res.Filename, res.SheetName, 154 | res.NumRows, res.NumCols, e) 155 | } 156 | mu.Unlock() 157 | } 158 | procWG.Done() 159 | } 160 | 161 | var ( 162 | sanitize = regexp.MustCompile("[^a-zA-Z0-9]+") 163 | newlines = regexp.MustCompile("[ \n\r\t]+") 164 | ) 165 | 166 | type stats struct { 167 | Filename string 168 | Hash string 169 | SheetName string 170 | NumRows int 171 | NumCols int 172 | Err error 173 | } 174 | 175 | func processFile(fn string) ([]stats, error) { 176 | //log.Printf("Opening file '%s' ...", fn) 177 | wb, err := grate.Open(fn) 178 | if err != nil { 179 | return nil, err 180 | } 181 | defer wb.Close() 182 | 183 | results := []stats{} 184 | 185 | ext := filepath.Ext(fn) 186 | fn2 := filepath.Base(strings.TrimSuffix(fn, ext)) 187 | subparts := fmt.Sprintf("%x", md5.Sum([]byte(fn2))) 188 | subdir := filepath.Join("results", subparts[:2], subparts[2:4]) 189 | os.MkdirAll(subdir, 0755) 190 | log.Printf(subparts[:8]+" Processing file '%s'", fn2) 191 | 192 | sheets, err := wb.List() 193 | if err != nil { 194 | return nil, err 195 | } 196 | for _, s := range sheets { 197 | ps := stats{ 198 | Filename: fn, 199 | Hash: subparts[:8], 200 | SheetName: s, 201 | } 202 | log.Printf(subparts[:8]+" Opening Sheet '%s'...", s) 203 | sheet, err := wb.Get(s) 204 | if err != nil { 205 | ps.Err = err 206 | results = append(results, ps) 207 | continue 208 | } 209 | if sheet.IsEmpty() { 210 | log.Println(subparts[:8] + " Empty sheet. Skipping.") 211 | results = append(results, ps) 212 | continue 213 | } 214 | s2 := sanitize.ReplaceAllString(s, "_") 215 | if s == fn { 216 | s2 = "main" 217 | } 218 | var ox *output 219 | var w io.Writer = ioutil.Discard 220 | if !*pretend { 221 | f, err := os.Create(subdir + "/" + fn2 + "." + s2 + ".tsv") 222 | if err != nil { 223 | return nil, err 224 | } 225 | ox = outpool.Get().(*output) 226 | ox.f = f 227 | ox.b = bufio.NewWriter(f) 228 | w = ox.b 229 | } 230 | 231 | for sheet.Next() { 232 | row := sheet.Strings() 233 | nonblank := false 234 | for i, x := range row { 235 | if *removeNewlines { 236 | x = newlines.ReplaceAllString(x, " ") 237 | } 238 | if *trimSpaces { 239 | x = strings.TrimSpace(x) 240 | row[i] = x 241 | } 242 | if x != "" { 243 | nonblank = true 244 | if ps.NumCols < i { 245 | ps.NumCols = i 246 | } 247 | } 248 | } 249 | if nonblank || !*skipBlanks { 250 | for i, v := range row { 251 | if i != 0 { 252 | w.Write([]byte{'\t'}) 253 | } 254 | w.Write([]byte(v)) 255 | } 256 | w.Write([]byte{'\n'}) 257 | ps.NumRows++ 258 | } 259 | } 260 | results = append(results, ps) 261 | if ox != nil { 262 | cleanup <- ox 263 | } 264 | } 265 | return results, nil 266 | } 267 | -------------------------------------------------------------------------------- /commonxl/fmt.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | ) 8 | 9 | // FmtFunc will format a value according to the designated style. 10 | type FmtFunc func(*Formatter, interface{}) string 11 | 12 | func staticFmtFunc(s string) FmtFunc { 13 | return func(x *Formatter, v interface{}) string { 14 | return s 15 | } 16 | } 17 | 18 | func surround(pre string, ff FmtFunc, post string) FmtFunc { 19 | return func(x *Formatter, v interface{}) string { 20 | return pre + ff(x, v) + post 21 | } 22 | } 23 | 24 | func addNegParens(ff FmtFunc) FmtFunc { 25 | return func(x *Formatter, v interface{}) string { 26 | s1 := ff(x, v) 27 | if s1[0] == '-' { 28 | return "(" + s1[1:] + ")" 29 | } 30 | return s1 31 | } 32 | } 33 | 34 | func addCommas(ff FmtFunc) FmtFunc { 35 | return func(x *Formatter, v interface{}) string { 36 | s1 := ff(x, v) 37 | isNeg := false 38 | if s1[0] == '-' { 39 | isNeg = true 40 | s1 = s1[1:] 41 | } 42 | endIndex := strings.IndexAny(s1, ".eE") 43 | if endIndex < 0 { 44 | endIndex = len(s1) 45 | } 46 | for endIndex > 3 { 47 | endIndex -= 3 48 | s1 = s1[:endIndex] + "," + s1[endIndex:] 49 | } 50 | if isNeg { 51 | return "-" + s1 52 | } 53 | return s1 54 | } 55 | } 56 | 57 | func identFunc(x *Formatter, v interface{}) string { 58 | switch x := v.(type) { 59 | case bool: 60 | if x { 61 | return "TRUE" 62 | } 63 | return "FALSE" 64 | case int64: 65 | s := strconv.FormatInt(x, 10) 66 | if len(s) <= 11 { 67 | return s 68 | } 69 | case float64: 70 | s := strconv.FormatFloat(x, 'f', -1, 64) 71 | if len(s) <= 11 || (len(s) == 12 && x < 0) { 72 | return s 73 | } 74 | s = strconv.FormatFloat(x, 'g', 6, 64) 75 | if len(s) <= 11 { 76 | return s 77 | } 78 | case string: 79 | return x 80 | case fmt.Stringer: 81 | return x.String() 82 | } 83 | return fmt.Sprint(v) 84 | } 85 | 86 | func sprintfFunc(fs string, mul int) FmtFunc { 87 | wantInt64 := strings.Contains(fs, "%d") 88 | return func(x *Formatter, v interface{}) string { 89 | switch val := v.(type) { 90 | case int, uint, int64, uint64, int32, uint32, uint16, int16: 91 | return fmt.Sprintf(fs, v) 92 | 93 | case float64: 94 | val *= float64(mul) 95 | if wantInt64 { 96 | v2 := int64(val) 97 | return fmt.Sprintf(fs, v2) 98 | } 99 | return fmt.Sprintf(fs, val) 100 | } 101 | return fmt.Sprint(v) 102 | } 103 | } 104 | 105 | func convertToInt64(v interface{}) (int64, bool) { 106 | x, ok := convertToFloat64(v) 107 | return int64(x), ok 108 | } 109 | 110 | func convertToFloat64(v interface{}) (float64, bool) { 111 | switch val := v.(type) { 112 | case float64: 113 | return val, true 114 | case bool: 115 | if val { 116 | return 1.0, true 117 | } 118 | return 0.0, true 119 | case int: 120 | return float64(val), true 121 | case int8: 122 | return float64(val), true 123 | case int16: 124 | return float64(val), true 125 | case int32: 126 | return float64(val), true 127 | case int64: 128 | return float64(val), true 129 | case uint: 130 | return float64(val), true 131 | case uint8: 132 | return float64(val), true 133 | case uint16: 134 | return float64(val), true 135 | case uint32: 136 | return float64(val), true 137 | case uint64: 138 | return float64(val), true 139 | case float32: 140 | return float64(val), true 141 | case string: 142 | nf, err := strconv.ParseFloat(val, 64) 143 | return nf, err == nil 144 | default: 145 | return 0.0, false 146 | } 147 | } 148 | 149 | // replaces a zero with a dash 150 | func zeroDashFunc(ff FmtFunc) FmtFunc { 151 | return func(x *Formatter, v interface{}) string { 152 | fval, ok := convertToFloat64(v) 153 | if !ok { 154 | // strings etc returned as-is 155 | return fmt.Sprint(v) 156 | } 157 | if fval == 0.0 { 158 | return "-" 159 | } 160 | return ff(x, v) 161 | } 162 | } 163 | 164 | func fracFmtFunc(n int) FmtFunc { 165 | return func(x *Formatter, v interface{}) string { 166 | f, ok := convertToFloat64(v) 167 | if !ok { 168 | return "MUST BE numeric TO FORMAT CORRECTLY" 169 | } 170 | w, n, d := DecimalToWholeFraction(f, n, n) 171 | if n == 0 { 172 | return fmt.Sprintf("%d", w) 173 | } 174 | if w == 0 { 175 | if f < 0 && n > 0 { 176 | n = -n 177 | } 178 | return fmt.Sprintf("%d/%d", n, d) 179 | } 180 | return fmt.Sprintf("%d %d/%d", w, n, d) 181 | } 182 | } 183 | 184 | // handle (up to) all four format cases: 185 | // positive;negative;zero;other 186 | func switchFmtFunc(pos FmtFunc, others ...FmtFunc) FmtFunc { 187 | stringFF := identFunc 188 | zeroFF := pos 189 | negFF := pos 190 | if len(others) > 0 { 191 | negFF = others[0] 192 | if len(others) > 1 { 193 | zeroFF = others[1] 194 | if len(others) > 2 { 195 | stringFF = others[2] 196 | } 197 | } 198 | } 199 | return func(x *Formatter, v interface{}) string { 200 | val, ok := convertToFloat64(v) 201 | if !ok { 202 | return stringFF(x, v) 203 | } 204 | if val == 0.0 { 205 | return zeroFF(x, v) 206 | } 207 | if val < 0.0 { 208 | return negFF(x, v) 209 | } 210 | return pos(x, v) 211 | } 212 | } 213 | 214 | // mapping of standard built-ins to Go date format funcs. 215 | var goFormatters = map[uint16]FmtFunc{ 216 | 0: identFunc, // FIXME: better "general" formatter 217 | 49: identFunc, 218 | 219 | 14: timeFmtFunc(`01-02-06`), 220 | 15: timeFmtFunc(`2-Jan-06`), 221 | 16: timeFmtFunc(`2-Jan`), 222 | 17: timeFmtFunc(`Jan-06`), 223 | 20: timeFmtFunc(`15:04`), 224 | 21: timeFmtFunc(`15:04:05`), 225 | 22: timeFmtFunc(`1/2/06 15:04`), 226 | 45: timeFmtFunc(`04:05`), 227 | 46: timeFmtFunc(`3:04:05`), 228 | 47: timeFmtFunc(`0405.9`), 229 | 27: timeFmtFunc(`2006"年"1"月"`), 230 | 28: timeFmtFunc(`1"月"2"日"`), 231 | 29: timeFmtFunc(`1"月"2"日"`), 232 | 30: timeFmtFunc(`1-2-06`), 233 | 31: timeFmtFunc(`2006"年"1"月"2"日"`), 234 | 32: timeFmtFunc(`15"时"04"分"`), 235 | 33: timeFmtFunc(`15"时"04"分"05"秒"`), 236 | 36: timeFmtFunc(`2006"年"2"月"`), 237 | 50: timeFmtFunc(`2006"年"2"月"`), 238 | 51: timeFmtFunc(`1"月"2"日"`), 239 | 52: timeFmtFunc(`2006"年"1"月"`), 240 | 53: timeFmtFunc(`1"月"2"日"`), 241 | 54: timeFmtFunc(`1"月"2"日"`), 242 | 57: timeFmtFunc(`2006"年"1"月"`), 243 | 58: timeFmtFunc(`1"月"2"日"`), 244 | 71: timeFmtFunc(`2/1/2006`), 245 | 72: timeFmtFunc(`2-Jan-06`), 246 | 73: timeFmtFunc(`2-Jan`), 247 | 74: timeFmtFunc(`Jan-06`), 248 | 75: timeFmtFunc(`15:04`), 249 | 76: timeFmtFunc(`15:04:05`), 250 | 77: timeFmtFunc(`2/1/2006 15:04`), 251 | 78: timeFmtFunc(`04:05`), 252 | 79: timeFmtFunc(`15:04:05`), 253 | 80: timeFmtFunc(`04:05.9`), 254 | 81: timeFmtFunc(`2/1/06`), 255 | 18: timeFmtFunc(`3:04 PM`), 256 | 19: timeFmtFunc(`3:04:05 PM`), 257 | 258 | 34: cnTimeFmtFunc(`PM 3"时"04"分"`), 259 | 35: cnTimeFmtFunc(`PM 3"时"04"分"05"秒"`), 260 | 55: cnTimeFmtFunc(`PM 3"时"04"分"`), 261 | 56: cnTimeFmtFunc(`PM 3"时"04"分"05"秒`), 262 | 263 | 12: fracFmtFunc(1), 264 | 13: fracFmtFunc(2), 265 | 266 | 69: fracFmtFunc(1), 267 | 70: fracFmtFunc(2), 268 | 269 | 1: sprintfFunc(`%d`, 1), 270 | 2: sprintfFunc(`%4.2f`, 1), 271 | 59: sprintfFunc(`%d`, 1), 272 | 60: sprintfFunc(`%4.2f`, 1), 273 | 274 | 9: sprintfFunc(`%d%%`, 100), 275 | 10: sprintfFunc(`%4.2f%%`, 100), 276 | 67: sprintfFunc(`%d%%`, 100), 277 | 68: sprintfFunc(`%4.2f%%`, 100), 278 | 279 | 3: addCommas(sprintfFunc("%d", 1)), 280 | 61: addCommas(sprintfFunc("%d", 1)), 281 | 37: addNegParens(addCommas(sprintfFunc("%d", 1))), 282 | 38: addNegParens(addCommas(sprintfFunc("%d", 1))), 283 | 284 | 4: addCommas(sprintfFunc("%4.2f", 1)), 285 | 62: addCommas(sprintfFunc("%4.2f", 1)), 286 | 39: addNegParens(addCommas(sprintfFunc("%4.2f", 1))), 287 | 40: addNegParens(addCommas(sprintfFunc("%4.2f", 1))), 288 | 289 | 11: sprintfFunc(`%4.2E`, 1), 290 | 48: sprintfFunc(`%3.1E`, 1), 291 | 292 | 41: zeroDashFunc(addCommas(sprintfFunc("%d", 1))), 293 | 43: zeroDashFunc(addCommas(sprintfFunc("%4.2f", 1))), 294 | 295 | 42: switchFmtFunc( 296 | surround("$", addCommas(sprintfFunc("%d", 1)), ""), 297 | surround("$(", addCommas(sprintfFunc("%d", 1)), ")"), 298 | staticFmtFunc("$-")), 299 | 44: switchFmtFunc( 300 | surround("$", addCommas(sprintfFunc("%4.2f", 1)), ""), 301 | surround("$(", addCommas(sprintfFunc("%4.2f", 1)), ")"), 302 | staticFmtFunc("$-")), 303 | } 304 | -------------------------------------------------------------------------------- /commonxl/formats.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | ) 9 | 10 | // Formatter contains formatting methods common to Excel spreadsheets. 11 | type Formatter struct { 12 | flags uint64 13 | customCodes map[uint16]FmtFunc 14 | customCodeTypes map[uint16]CellType 15 | } 16 | 17 | const ( 18 | fMode1904 uint64 = 1 19 | ) 20 | 21 | // Mode1904 indicates that dates start on Jan 1, 1904 22 | // this setting was used in early MacOS Excel applications. 23 | func (x *Formatter) Mode1904(enabled bool) { 24 | if enabled { 25 | x.flags |= fMode1904 26 | } else { 27 | x.flags = x.flags &^ fMode1904 28 | } 29 | } 30 | 31 | // Add a custom number format to the formatter. 32 | func (x *Formatter) Add(fmtID uint16, formatCode string) error { 33 | if x.customCodes == nil { 34 | x.customCodes = make(map[uint16]FmtFunc) 35 | x.customCodeTypes = make(map[uint16]CellType) 36 | } 37 | if strings.ToLower(formatCode) == "general" { 38 | x.customCodes[fmtID] = goFormatters[0] 39 | return nil 40 | } 41 | _, ok := goFormatters[fmtID] 42 | if ok { 43 | return errors.New("grate/commonxl: cannot replace default number formats") 44 | } 45 | 46 | _, ok2 := x.customCodes[fmtID] 47 | if ok2 { 48 | return errors.New("grate/commonxl: cannot replace existing number formats") 49 | } 50 | 51 | x.customCodes[fmtID], x.customCodeTypes[fmtID] = makeFormatter(formatCode) 52 | return nil 53 | } 54 | 55 | func (x *Formatter) getCellType(fmtID uint16) (CellType, bool) { 56 | if ct, ok := builtInFormatTypes[fmtID]; ok { 57 | return ct, true 58 | } 59 | if x.customCodeTypes != nil { 60 | ct, ok := x.customCodeTypes[fmtID] 61 | return ct, ok 62 | } 63 | return 0, false 64 | } 65 | 66 | var ( 67 | minsMatch = regexp.MustCompile("h.*m.*s") 68 | nonEsc = regexp.MustCompile(`([^"]|^)"`) 69 | squash = regexp.MustCompile(`[*_].`) 70 | fixEsc = regexp.MustCompile(`\\(.)`) 71 | 72 | formatMatchBrackets = regexp.MustCompile(`\[[^\]]*\]`) 73 | formatMatchTextLiteral = regexp.MustCompile(`"[^"]*"`) 74 | ) 75 | 76 | func makeFormatter(s string) (FmtFunc, CellType) { 77 | //log.Printf("makeFormatter('%s')", s) 78 | // remove any coloring marks 79 | s = formatMatchBrackets.ReplaceAllString(s, "") 80 | if strings.Contains(s, ";") { 81 | parts := strings.Split(s, ";") 82 | posFF, ctypePos := makeFormatter(parts[0]) 83 | rem := make([]FmtFunc, len(parts)-1) 84 | for i, ps := range parts[1:] { 85 | rem[i], _ = makeFormatter(ps) 86 | } 87 | return switchFmtFunc(posFF, rem...), ctypePos 88 | } 89 | 90 | // escaped characters, and quoted text 91 | s2 := fixEsc.ReplaceAllString(s, "") 92 | s2 = formatMatchTextLiteral.ReplaceAllString(s, "") 93 | 94 | if strings.ContainsAny(s2, "ymdhs") { 95 | // it's a date/time format 96 | 97 | if loc := minsMatch.FindStringIndex(s); loc != nil { 98 | // m or mm in loc[0]:loc[1] is a minute format 99 | inner := s[loc[0]:loc[1]] 100 | inner = strings.Replace(inner, "mm", "04", 1) 101 | inner = strings.Replace(inner, "m", "4", 1) 102 | s = s[:loc[0]] + inner + s[loc[1]:] 103 | } 104 | dfreps := [][]string{ 105 | {"hh", "15"}, {"h", "15"}, 106 | {"ss", "05"}, {"s", "5"}, 107 | {"mmmmm", "Jan"}, // super ambiguous, replace with 3-letter month 108 | {"mmmm", "January"}, {"mmm", "Jan"}, 109 | {"mm", "01"}, {"m", "1"}, 110 | {"dddd", "Monday"}, {"ddd", "Mon"}, 111 | {"dd", "02"}, {"d", "2"}, 112 | {"yyyy", "2006"}, {"yy", "06"}, 113 | } 114 | if strings.Contains(s, "AM") || strings.Contains(s, "PM") { 115 | dfreps[0][1] = "03" 116 | dfreps[1][1] = "3" 117 | } 118 | for _, dfr := range dfreps { 119 | s = strings.Replace(s, dfr[0], dfr[1], 1) 120 | } 121 | 122 | s = nonEsc.ReplaceAllString(s, `$1`) 123 | s = squash.ReplaceAllString(s, ``) 124 | s = fixEsc.ReplaceAllString(s, `$1`) 125 | 126 | //log.Printf(" made time formatter '%s'", s) 127 | return timeFmtFunc(s), DateCell 128 | } 129 | 130 | var ff FmtFunc 131 | var ctype CellType 132 | if strings.ContainsAny(s, ".Ee") { 133 | verb := "f" 134 | if strings.ContainsAny(s, "Ee") { 135 | verb = "E" 136 | } 137 | s = regexp.MustCompile("[eE]+[+-]0+").ReplaceAllString(s, "") 138 | s2 := strings.ReplaceAll(s, ",", "") 139 | i1 := strings.IndexAny(s2, "0") 140 | i2 := strings.IndexByte(s2, '.') 141 | i3 := strings.LastIndexAny(s2, "0.") 142 | mul := 1 143 | if strings.Contains(s2, "%") { 144 | mul = 100 145 | } 146 | sf := fmt.Sprintf("%%%d.%d%s", i3-i1, i3-i2, verb) 147 | //log.Printf(" made float formatter '%s'", sf) 148 | ff = sprintfFunc(sf, mul) 149 | ctype = FloatCell 150 | } else { 151 | s2 := strings.ReplaceAll(s, ",", "") 152 | i1 := strings.IndexAny(s2, "0") 153 | i2 := strings.LastIndexAny(s2, "0.") 154 | mul := 1 155 | if strings.Contains(s2, "%") { 156 | mul = 100 157 | } 158 | sf := fmt.Sprintf("%%%dd", i2-i1) 159 | if (i2 - i1) == 0 { 160 | sf = "%d" 161 | } 162 | //log.Printf(" made int formatter '%s'", sf) 163 | ff = sprintfFunc(sf, mul) 164 | ctype = IntegerCell 165 | } 166 | 167 | if strings.Contains(s, ",") { 168 | ff = addCommas(ff) 169 | //log.Printf(" added commas") 170 | } 171 | 172 | surReg := regexp.MustCompile(`[0#?,.]+`) 173 | prepost := surReg.Split(s, 2) 174 | if len(prepost) > 0 && len(prepost[0]) > 0 { 175 | prepost[0] = nonEsc.ReplaceAllString(prepost[0], `$1`) 176 | prepost[0] = squash.ReplaceAllString(prepost[0], ``) 177 | prepost[0] = fixEsc.ReplaceAllString(prepost[0], `$1`) 178 | } 179 | if len(prepost) == 1 { 180 | if prepost[0] == "@" { 181 | return identFunc, StringCell 182 | } 183 | //log.Printf(" added static ('%s')", prepost[0]) 184 | return staticFmtFunc(prepost[0]), StringCell 185 | } 186 | if len(prepost[0]) > 0 || len(prepost[1]) > 0 { 187 | prepost[1] = nonEsc.ReplaceAllString(prepost[1], `$1`) 188 | prepost[1] = squash.ReplaceAllString(prepost[1], ``) 189 | prepost[1] = fixEsc.ReplaceAllString(prepost[1], `$1`) 190 | 191 | ff = surround(prepost[0], ff, prepost[1]) 192 | //log.Printf(" added surround ('%s' ... '%s')", prepost[0], prepost[1]) 193 | } 194 | 195 | return ff, ctype 196 | } 197 | 198 | // Get the number format func to use for formatting values, 199 | // it returns false when fmtID is unknown. 200 | func (x *Formatter) Get(fmtID uint16) (FmtFunc, bool) { 201 | ff, ok := goFormatters[fmtID] 202 | if !ok { 203 | fs, ok2 := x.customCodes[fmtID] 204 | if ok2 { 205 | return fs, true 206 | } 207 | ff = identFunc 208 | } 209 | 210 | return ff, ok 211 | } 212 | 213 | // Apply the specified number format to the value. 214 | // Returns false when fmtID is unknown. 215 | func (x *Formatter) Apply(fmtID uint16, val interface{}) (string, bool) { 216 | ff, ok := goFormatters[fmtID] 217 | if !ok { 218 | fs, ok2 := x.customCodes[fmtID] 219 | if ok2 { 220 | return fs(x, val), true 221 | } 222 | } 223 | return ff(x, val), ok 224 | } 225 | 226 | // builtInFormats are all the built-in number formats for XLS/XLSX. 227 | var builtInFormats = map[uint16]string{ 228 | 0: `General`, 229 | 1: `0`, 230 | 2: `0.00`, 231 | 3: `#,##0`, 232 | 4: `#,##0.00`, 233 | 9: `0%`, 234 | 10: `0.00%`, 235 | 236 | 11: `0.00E+00`, 237 | 12: `# ?/?`, 238 | 13: `# ??/??`, 239 | 14: `mm-dd-yy`, 240 | 15: `d-mmm-yy`, 241 | 16: `d-mmm`, 242 | 17: `mmm-yy`, 243 | 18: `h:mm AM/PM`, 244 | 19: `h:mm:ss AM/PM`, 245 | 20: `h:mm`, 246 | 21: `h:mm:ss`, 247 | 22: `m/d/yy h:mm`, 248 | 37: `#,##0 ;(#,##0)`, 249 | 38: `#,##0 ;[Red](#,##0)`, 250 | 39: `#,##0.00;(#,##0.00)`, 251 | 40: `#,##0.00;[Red](#,##0.00)`, 252 | 253 | 41: `_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)`, 254 | 42: `_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_)`, 255 | 43: `_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)`, 256 | 44: `_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)`, 257 | 258 | 45: `mm:ss`, 259 | 46: `[h]:mm:ss`, 260 | 47: `mmss.0`, 261 | 48: `##0.0E+0`, 262 | 49: `@`, 263 | 264 | // zh-cn format codes 265 | 27: `yyyy"年"m"月"`, 266 | 28: `m"月"d"日"`, 267 | 29: `m"月"d"日"`, 268 | 30: `m-d-yy`, 269 | 31: `yyyy"年"m"月"d"日"`, 270 | 32: `h"时"mm"分"`, 271 | 33: `h"时"mm"分"ss"秒"`, 272 | 34: `上午/下午 h"时"mm"分"`, 273 | 35: `上午/下午 h"时"mm"分"ss"秒"`, 274 | 36: `yyyy"年"m"月"`, 275 | 50: `yyyy"年"m"月"`, 276 | 51: `m"月"d"日"`, 277 | 52: `yyyy"年"m"月"`, 278 | 53: `m"月"d"日"`, 279 | 54: `m"月"d"日"`, 280 | 55: `上午/下午 h"时"mm"分"`, 281 | 56: `上午/下午 h"时"mm"分"ss"秒`, 282 | 57: `yyyy"年"m"月"`, 283 | 58: `m"月"d"日"`, 284 | 285 | // th-th format codes (in the spec these have a "t" prefix?) 286 | 59: `0`, 287 | 60: `0.00`, 288 | 61: `#,##0`, 289 | 62: `#,##0.00`, 290 | 67: `0%`, 291 | 68: `0.00%`, 292 | 69: `# ?/?`, 293 | 70: `# ??/??`, 294 | 295 | // th format code, but translated to aid the parser 296 | 71: `d/m/yyyy`, // `ว/ด/ปปปป`, 297 | 72: `d-mmm-yy`, // `ว-ดดด-ปป`, 298 | 73: `d-mmm`, // `ว-ดดด`, 299 | 74: `mmm-yy`, // `ดดด-ปป`, 300 | 75: `h:mm`, // `ช:นน`, 301 | 76: `h:mm:ss`, // `ช:นน:ทท`, 302 | 77: `d/m/yyyy h:mm`, // `ว/ด/ปปปป ช:นน`, 303 | 78: `mm:ss`, // `นน:ทท`, 304 | 79: `[h]:mm:ss`, // `[ช]:นน:ทท`, 305 | 80: `mm:ss.0`, // `นน:ทท.0`, 306 | 81: `d/m/bb`, // `d/m/bb`, 307 | } 308 | 309 | // builtInFormatTypes are the underlying datatypes for built-in number formats in XLS/XLSX. 310 | var builtInFormatTypes = map[uint16]CellType{ 311 | // 0 has no defined type 312 | 1: IntegerCell, 313 | 2: FloatCell, 314 | 3: IntegerCell, 315 | 4: FloatCell, 316 | 9: FloatCell, 317 | 10: FloatCell, 318 | 319 | 11: FloatCell, 320 | 12: FloatCell, 321 | 13: FloatCell, 322 | 14: DateCell, 323 | 15: DateCell, 324 | 16: DateCell, 325 | 17: DateCell, 326 | 18: DateCell, 327 | 19: DateCell, 328 | 20: DateCell, 329 | 21: DateCell, 330 | 22: DateCell, 331 | 37: IntegerCell, 332 | 38: IntegerCell, 333 | 39: FloatCell, 334 | 40: FloatCell, 335 | 41: IntegerCell, 336 | 42: IntegerCell, 337 | 43: FloatCell, 338 | 44: FloatCell, 339 | 45: DateCell, // Durations? 340 | 46: DateCell, 341 | 47: DateCell, 342 | 48: FloatCell, 343 | 49: StringCell, 344 | 27: DateCell, 345 | 28: DateCell, 346 | 29: DateCell, 347 | 30: DateCell, 348 | 31: DateCell, 349 | 32: DateCell, 350 | 33: DateCell, 351 | 34: DateCell, 352 | 35: DateCell, 353 | 36: DateCell, 354 | 50: DateCell, 355 | 51: DateCell, 356 | 52: DateCell, 357 | 53: DateCell, 358 | 54: DateCell, 359 | 55: DateCell, 360 | 56: DateCell, 361 | 57: DateCell, 362 | 58: DateCell, 363 | 59: IntegerCell, 364 | 60: FloatCell, 365 | 61: IntegerCell, 366 | 62: FloatCell, 367 | 67: FloatCell, 368 | 68: FloatCell, 369 | 69: FloatCell, 370 | 70: FloatCell, 371 | 71: DateCell, 372 | 72: DateCell, 373 | 73: DateCell, 374 | 74: DateCell, 375 | 75: DateCell, 376 | 76: DateCell, 377 | 77: DateCell, 378 | 78: DateCell, 379 | 79: DateCell, 380 | 80: DateCell, 381 | 81: DateCell, 382 | } 383 | -------------------------------------------------------------------------------- /commonxl/cell.go: -------------------------------------------------------------------------------- 1 | package commonxl 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "net/url" 7 | "strconv" 8 | "time" 9 | "unicode/utf16" 10 | ) 11 | 12 | // CellType annotates the type of data extracted in the cell. 13 | type CellType uint16 14 | 15 | // CellType annotations for various cell value types. 16 | const ( 17 | BlankCell CellType = iota 18 | IntegerCell 19 | FloatCell 20 | StringCell 21 | BooleanCell 22 | DateCell 23 | 24 | HyperlinkStringCell // internal type to separate URLs 25 | StaticCell // placeholder, internal use only 26 | ) 27 | 28 | // String returns a string description of the cell data type. 29 | func (c CellType) String() string { 30 | switch c { 31 | case BlankCell: 32 | return "blank" 33 | case IntegerCell: 34 | return "integer" 35 | case FloatCell: 36 | return "float" 37 | case BooleanCell: 38 | return "boolean" 39 | case DateCell: 40 | return "date" 41 | case HyperlinkStringCell: 42 | return "hyperlink" 43 | case StaticCell: 44 | return "static" 45 | default: // StringCell, StaticCell 46 | return "string" 47 | } 48 | } 49 | 50 | // Cell represents a single cell value. 51 | type Cell []interface{} 52 | 53 | // internally, it is a slice sized 2 or 3 54 | // [Value, CellType] or [Value, CellType, FormatNumber] 55 | // where FormatNumber is a uint16 if not 0 56 | 57 | // Value returns the contents as a generic interface{}. 58 | func (c Cell) Value() interface{} { 59 | if len(c) == 0 { 60 | return "" 61 | } 62 | return c[0] 63 | } 64 | 65 | // SetURL adds a URL hyperlink to the cell. 66 | func (c *Cell) SetURL(link string) { 67 | (*c)[1] = HyperlinkStringCell 68 | if len(*c) == 2 { 69 | *c = append(*c, uint16(0), link) 70 | } else { // len = 3 already 71 | *c = append(*c, link) 72 | } 73 | } 74 | 75 | // URL returns the parsed URL when a cell contains a hyperlink. 76 | func (c Cell) URL() (*url.URL, bool) { 77 | if c.Type() == HyperlinkStringCell && len(c) >= 4 { 78 | u, err := url.Parse(c[3].(string)) 79 | return u, err == nil 80 | } 81 | return nil, false 82 | } 83 | 84 | // Type returns the CellType of the value. 85 | func (c Cell) Type() CellType { 86 | if len(c) < 2 { 87 | return BlankCell 88 | } 89 | return c[1].(CellType) 90 | } 91 | 92 | // FormatNo returns the NumberFormat used for display. 93 | func (c Cell) FormatNo() uint16 { 94 | if len(c) == 3 { 95 | return c[2].(uint16) 96 | } 97 | return 0 98 | } 99 | 100 | // Clone returns the new copy of this Cell. 101 | func (c Cell) Clone() Cell { 102 | c2 := make([]interface{}, len(c)) 103 | for i, x := range c { 104 | c2[i] = x 105 | } 106 | return c2 107 | } 108 | 109 | /////// 110 | 111 | var boolStrings = map[string]bool{ 112 | "yes": true, "true": true, "t": true, "y": true, "1": true, "on": true, 113 | "no": false, "false": false, "f": false, "n": false, "0": false, "off": false, 114 | "YES": true, "TRUE": true, "T": true, "Y": true, "1.0": true, "ON": true, 115 | "NO": false, "FALSE": false, "F": false, "N": false, "0.0": false, "OFF": false, 116 | } 117 | 118 | // NewCellWithType creates a new cell value with the given type, coercing as necessary. 119 | func NewCellWithType(value interface{}, t CellType, f *Formatter) Cell { 120 | c := NewCell(value) 121 | if c[1] == t { 122 | // fast path if it was already typed correctly 123 | return c 124 | } 125 | 126 | if c[1] == BooleanCell { 127 | if t == IntegerCell { 128 | if c[0].(bool) { 129 | c[0] = int64(1) 130 | } else { 131 | c[0] = int64(0) 132 | } 133 | c[1] = IntegerCell 134 | } else if t == FloatCell { 135 | if c[0].(bool) { 136 | c[0] = float64(1.0) 137 | } else { 138 | c[0] = float64(0.0) 139 | } 140 | c[1] = FloatCell 141 | } else if t == StringCell { 142 | if c[0].(bool) { 143 | c[0] = "TRUE" 144 | } else { 145 | c[0] = "FALSE" 146 | } 147 | c[1] = FloatCell 148 | } 149 | } 150 | 151 | if c[1] == FloatCell { 152 | if t == IntegerCell { 153 | c[0] = int64(c[0].(float64)) 154 | c[1] = IntegerCell 155 | } else if t == BooleanCell { 156 | c[0] = c[0].(float64) != 0.0 157 | c[1] = BooleanCell 158 | } 159 | } 160 | if c[1] == IntegerCell { 161 | if t == FloatCell { 162 | c[0] = float64(c[0].(int64)) 163 | c[1] = FloatCell 164 | } else if t == BooleanCell { 165 | c[0] = c[0].(int64) != 0 166 | c[1] = BooleanCell 167 | } 168 | } 169 | if c[1] == StringCell { 170 | if t == IntegerCell { 171 | x, _ := strconv.ParseInt(c[0].(string), 10, 64) 172 | c[0] = x 173 | c[1] = IntegerCell 174 | } else if t == FloatCell { 175 | x, _ := strconv.ParseFloat(c[0].(string), 64) 176 | c[0] = x 177 | c[1] = FloatCell 178 | } else if t == BooleanCell { 179 | c[0] = boolStrings[c[0].(string)] 180 | c[1] = BooleanCell 181 | } 182 | } 183 | if t == StringCell { 184 | c[0] = fmt.Sprint(c[0]) 185 | c[1] = StringCell 186 | } 187 | if t == DateCell { 188 | if c[1] == FloatCell { 189 | c[0] = f.ConvertToDate(c[0].(float64)) 190 | } else if c[1] == IntegerCell { 191 | c[0] = f.ConvertToDate(float64(c[0].(int64))) 192 | } 193 | c[1] = DateCell 194 | } 195 | return c 196 | } 197 | 198 | // NewCell creates a new cell value from any builtin type. 199 | func NewCell(value interface{}) Cell { 200 | c := make([]interface{}, 2) 201 | switch v := value.(type) { 202 | case bool: 203 | c[0] = v 204 | c[1] = BooleanCell 205 | case int: 206 | c[0] = int64(v) 207 | c[1] = IntegerCell 208 | case int8: 209 | c[0] = int64(v) 210 | c[1] = IntegerCell 211 | case int16: 212 | c[0] = int64(v) 213 | c[1] = IntegerCell 214 | case int32: 215 | c[0] = int64(v) 216 | c[1] = IntegerCell 217 | case int64: 218 | c[0] = int64(v) 219 | c[1] = IntegerCell 220 | case uint8: 221 | c[0] = int64(v) 222 | c[1] = IntegerCell 223 | case uint16: 224 | c[0] = int64(v) 225 | c[1] = IntegerCell 226 | case uint32: 227 | c[0] = int64(v) 228 | c[1] = IntegerCell 229 | 230 | case uint: 231 | if int64(v) > int64(math.MaxInt64) { 232 | c[0] = float64(v) 233 | c[1] = FloatCell 234 | } else { 235 | c[0] = int64(v) 236 | c[1] = IntegerCell 237 | } 238 | case uint64: 239 | if v > math.MaxInt64 { 240 | c[0] = float64(v) 241 | c[1] = FloatCell 242 | } else { 243 | c[0] = int64(v) 244 | c[1] = IntegerCell 245 | } 246 | 247 | case float32: 248 | c[0] = float64(v) 249 | c[1] = FloatCell 250 | case float64: 251 | c[0] = float64(v) 252 | c[1] = FloatCell 253 | 254 | case string: 255 | if len(v) == 0 { 256 | c[0] = nil 257 | c[1] = BlankCell 258 | } else { 259 | c[0] = v 260 | c[1] = StringCell 261 | } 262 | case []byte: 263 | if len(v) == 0 { 264 | c[0] = nil 265 | c[1] = BlankCell 266 | } else { 267 | c[0] = string(v) 268 | c[1] = StringCell 269 | } 270 | case []uint16: 271 | if len(v) == 0 { 272 | c[0] = nil 273 | c[1] = BlankCell 274 | } else { 275 | c[0] = string(utf16.Decode(v)) 276 | c[1] = StringCell 277 | } 278 | case []rune: 279 | if len(v) == 0 { 280 | c[0] = nil 281 | c[1] = BlankCell 282 | } else { 283 | c[0] = string(v) 284 | c[1] = StringCell 285 | } 286 | case time.Time: 287 | c[0] = v 288 | c[1] = DateCell 289 | 290 | case fmt.Stringer: 291 | s := v.String() 292 | if len(s) == 0 { 293 | c[0] = nil 294 | c[1] = BlankCell 295 | } else { 296 | c[0] = s 297 | c[1] = StringCell 298 | } 299 | default: 300 | panic("grate: data type not handled") 301 | } 302 | return Cell(c) 303 | } 304 | 305 | // SetFormatNumber changes the number format stored with the cell. 306 | func (c *Cell) SetFormatNumber(f uint16) { 307 | if f == 0 { 308 | *c = (*c)[:2] 309 | return 310 | } 311 | 312 | if len(*c) == 2 { 313 | *c = append(*c, f) 314 | } else { 315 | (*c)[2] = f 316 | } 317 | } 318 | 319 | func (c Cell) Equal(other Cell) bool { 320 | if c.Type() == FloatCell || other.Type() == FloatCell || 321 | c.Type() == IntegerCell || other.Type() == IntegerCell { 322 | v1, ok := c[0].(float64) 323 | v1x, okx := c[0].(int64) 324 | if okx { 325 | v1 = float64(v1x) 326 | ok = true 327 | } 328 | if !ok { 329 | fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v1) 330 | } 331 | v2, ok := other[0].(float64) 332 | v2x, okx := other[0].(int64) 333 | if okx { 334 | v2 = float64(v2x) 335 | ok = true 336 | } 337 | if !ok { 338 | fmt.Sscanf(fmt.Sprint(c[0]), "%g", &v2) 339 | } 340 | return v1 == v2 341 | } 342 | 343 | return c.Less(other) == other.Less(c) 344 | } 345 | 346 | func (c Cell) Less(other Cell) bool { 347 | if len(c) == 0 { 348 | return false 349 | } 350 | switch v1 := c[0].(type) { 351 | case nil: 352 | return false 353 | case bool: 354 | // F < T = T 355 | // F < F = F 356 | // T < T = F 357 | // T < F = F 358 | if v1 { 359 | return false 360 | } 361 | 362 | // if v2 is truthy, return true 363 | switch v2 := other[0].(type) { 364 | case nil: 365 | return false 366 | case bool: 367 | return v2 368 | case int64: 369 | return v2 != 0 370 | case float64: 371 | return v2 != 0.0 372 | case string: 373 | return boolStrings[v2] 374 | } 375 | 376 | case int64: 377 | // v1 < v2 378 | 379 | switch v2 := other[0].(type) { 380 | case nil: 381 | return false 382 | case bool: 383 | x := int64(0) 384 | if v2 { 385 | x = 1 386 | } 387 | return v1 < x 388 | case int64: 389 | return v1 < v2 390 | case float64: 391 | if v2 < math.MinInt64 { 392 | return false 393 | } 394 | if v2 > math.MaxInt64 { 395 | return true 396 | } 397 | return float64(v1) < v2 398 | case string: 399 | var x int64 400 | _, err := fmt.Sscanf(v2, "%d", &x) 401 | if err == nil { 402 | return v1 < x 403 | } 404 | return fmt.Sprint(v1) < v2 405 | } 406 | case float64: 407 | switch v2 := other[0].(type) { 408 | case nil: 409 | return false 410 | case bool: 411 | x := float64(0.0) 412 | if v2 { 413 | x = 1.0 414 | } 415 | return v1 < x 416 | case int64: 417 | if v1 < math.MinInt64 { 418 | return true 419 | } 420 | if v1 > math.MaxInt64 { 421 | return false 422 | } 423 | return v1 < float64(v2) 424 | case float64: 425 | return v1 < v2 426 | case string: 427 | var x float64 428 | _, err := fmt.Sscanf(v2, "%g", &x) 429 | if err == nil { 430 | return v1 < x 431 | } 432 | return fmt.Sprint(v1) < v2 433 | } 434 | case string: 435 | //return v1 < fmt.Sprint(other[0]) 436 | 437 | switch v2 := other[0].(type) { 438 | case nil: 439 | return false 440 | case bool: 441 | return v2 && !boolStrings[v1] 442 | case int64: 443 | var x int64 444 | _, err := fmt.Sscanf(v1, "%d", &x) 445 | if err == nil { 446 | return x < v2 447 | } 448 | return v1 < fmt.Sprint(v2) 449 | case float64: 450 | var x float64 451 | _, err := fmt.Sscanf(v1, "%g", &x) 452 | if err == nil { 453 | return x < v2 454 | } 455 | return v1 < fmt.Sprint(v2) 456 | case string: 457 | return v1 < v2 458 | } 459 | 460 | } 461 | 462 | panic("unable to compare cells (invalid internal type)") 463 | } 464 | -------------------------------------------------------------------------------- /xls/xls.go: -------------------------------------------------------------------------------- 1 | // Package xls implements the Microsoft Excel Binary File Format (.xls) Structure. 2 | // More specifically, it contains just enough detail to extract cell contents, 3 | // data types, and last-calculated formula values. In particular, it does NOT 4 | // implement formatting or formula calculations. 5 | package xls 6 | 7 | // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/cd03cb5f-ca02-4934-a391-bb674cb8aa06 8 | 9 | import ( 10 | "context" 11 | "encoding/binary" 12 | "errors" 13 | "io" 14 | "log" 15 | "sync" 16 | 17 | "github.com/pbnjay/grate" 18 | "github.com/pbnjay/grate/commonxl" 19 | "github.com/pbnjay/grate/xls/cfb" 20 | "github.com/pbnjay/grate/xls/crypto" 21 | ) 22 | 23 | var _ = grate.Register("xls", 1, Open) 24 | 25 | // WorkBook represents an Excel workbook containing 1 or more sheets. 26 | type WorkBook struct { 27 | filename string 28 | ctx context.Context 29 | doc *cfb.Document 30 | 31 | prot bool 32 | h *header 33 | sheets []*boundSheet 34 | codepage uint16 35 | dateMode uint16 36 | strings []string 37 | 38 | password string 39 | substreams [][]*rec 40 | 41 | fpos int64 42 | pos2substream map[int64]int 43 | 44 | nfmt commonxl.Formatter 45 | xfs []uint16 46 | } 47 | 48 | func (b *WorkBook) IsProtected() bool { 49 | return b.prot 50 | } 51 | 52 | func Open(filename string) (grate.Source, error) { 53 | doc, err := cfb.Open(filename) 54 | if err != nil { 55 | return nil, err 56 | } 57 | 58 | b := &WorkBook{ 59 | filename: filename, 60 | doc: doc, 61 | 62 | pos2substream: make(map[int64]int, 16), 63 | xfs: make([]uint16, 0, 128), 64 | } 65 | 66 | rdr, err := doc.Open("Workbook") 67 | if err != nil { 68 | return nil, grate.WrapErr(err, grate.ErrNotInFormat) 69 | } 70 | raw, err := io.ReadAll(rdr) 71 | if err != nil { 72 | return nil, err 73 | } 74 | 75 | err = b.loadFromStream(raw) 76 | return b, err 77 | } 78 | 79 | func (b *WorkBook) loadFromStream(raw []byte) error { 80 | return b.loadFromStream2(raw, false) 81 | } 82 | 83 | func (b *WorkBook) loadFromStreamWithDecryptor(raw []byte, dec crypto.Decryptor) error { 84 | // interestingly (insecurely) BIFF8 keeps Record Types and sizes in the clear, 85 | // has a few records that are not encrypted, and has 1 record type that does 86 | // not encrypt the 32bit integer position at the beginning (while encrypting 87 | // the rest). It also resets the encryption block counter every 1024 bytes 88 | // (counting all the "skipped" bytes described above). 89 | // 90 | // So this code streams the records through the decryption, but also records 91 | // a set of overlays applied to the final result which restore the 92 | // "cleartext" contents in line with the decrypted content. 93 | 94 | if grate.Debug { 95 | log.Println(" Decrypting xls stream with standard RC4") 96 | } 97 | 98 | pos := 0 99 | zeros := [8224]byte{} 100 | 101 | type overlay struct { 102 | Pos int 103 | 104 | RecType recordType 105 | DataBytes uint16 106 | Data []byte // NB len() not necessarily = DataBytes 107 | } 108 | replaceBlocks := []overlay{} 109 | 110 | var err error 111 | for err == nil && len(raw[pos:]) > 4 { 112 | o := overlay{} 113 | o.Pos = pos 114 | o.RecType = recordType(binary.LittleEndian.Uint16(raw[pos : pos+2])) 115 | o.DataBytes = binary.LittleEndian.Uint16(raw[pos+2 : pos+4]) 116 | pos += 4 117 | 118 | // copy to output and decryption stream 119 | binary.Write(dec, binary.LittleEndian, o.RecType) 120 | binary.Write(dec, binary.LittleEndian, o.DataBytes) 121 | tocopy := int(o.DataBytes) 122 | 123 | switch o.RecType { 124 | case RecTypeBOF, RecTypeFilePass, RecTypeUsrExcl, RecTypeFileLock, RecTypeInterfaceHdr, RecTypeRRDInfo, RecTypeRRDHead: 125 | // untouched data goes directly into output 126 | o.Data = raw[pos : pos+int(o.DataBytes)] 127 | pos += int(o.DataBytes) 128 | dec.Write(zeros[:int(o.DataBytes)]) 129 | tocopy = 0 130 | 131 | case RecTypeBoundSheet8: 132 | // copy 32-bit position to output 133 | o.Data = raw[pos : pos+4] 134 | pos += 4 135 | dec.Write(zeros[:4]) 136 | tocopy -= 4 137 | } 138 | 139 | if tocopy > 0 { 140 | _, err = dec.Write(raw[pos : pos+tocopy]) 141 | pos += tocopy 142 | } 143 | replaceBlocks = append(replaceBlocks, o) 144 | } 145 | dec.Flush() 146 | 147 | alldata := dec.Bytes() 148 | for _, o := range replaceBlocks { 149 | offs := int(o.Pos) 150 | binary.LittleEndian.PutUint16(alldata[offs:], uint16(o.RecType)) 151 | binary.LittleEndian.PutUint16(alldata[offs+2:], uint16(o.DataBytes)) 152 | if len(o.Data) > 0 { 153 | offs += 4 154 | copy(alldata[offs:], o.Data) 155 | } 156 | } 157 | 158 | // recurse into the stream parser now that things are decrypted 159 | return b.loadFromStream2(alldata, true) 160 | } 161 | 162 | func (b *WorkBook) Close() error { 163 | // return records to the pool for reuse 164 | for i, sub := range b.substreams { 165 | for _, r := range sub { 166 | r.Data = nil // allow GC 167 | recPool.Put(r) 168 | } 169 | b.substreams[i] = b.substreams[i][:0] 170 | } 171 | b.substreams = b.substreams[:0] 172 | return nil 173 | } 174 | 175 | func (b *WorkBook) loadFromStream2(raw []byte, isDecrypted bool) error { 176 | b.h = &header{} 177 | substr := -1 178 | nestedBOF := 0 179 | b.pos2substream = make(map[int64]int, 10) 180 | b.fpos = 0 181 | 182 | // IMPORTANT: if there are any existing records, we need to return them to the pool 183 | for i, sub := range b.substreams { 184 | for _, r := range sub { 185 | recPool.Put(r) 186 | } 187 | b.substreams[i] = b.substreams[i][:0] 188 | } 189 | b.substreams = b.substreams[:0] 190 | 191 | rawfull := raw 192 | nr, no, err := b.nextRecord(raw) 193 | for err == nil { 194 | raw = raw[no:] 195 | switch nr.RecType { 196 | case RecTypeEOF: 197 | nestedBOF-- 198 | case RecTypeBOF: 199 | // when substreams are nested, keep them in the same grouping 200 | if nestedBOF == 0 { 201 | substr = len(b.substreams) 202 | b.substreams = append(b.substreams, []*rec{}) 203 | b.pos2substream[b.fpos] = substr 204 | } 205 | nestedBOF++ 206 | } 207 | b.fpos += int64(4 + len(nr.Data)) 208 | 209 | // if there's a FilePass record, the data is encrypted 210 | if nr.RecType == RecTypeFilePass && !isDecrypted { 211 | etype := binary.LittleEndian.Uint16(nr.Data) 212 | switch etype { 213 | case 1: 214 | dec, err := crypto.NewBasicRC4(nr.Data[2:]) 215 | if err != nil { 216 | log.Println("xls: rc4 encryption failed to set up", err) 217 | return err 218 | } 219 | return b.loadFromStreamWithDecryptor(rawfull, dec) 220 | case 2, 3, 4: 221 | log.Println("need Crypto API RC4 decryptor") 222 | return errors.New("xls: unsupported Crypto API encryption method") 223 | default: 224 | return errors.New("xls: unsupported encryption method") 225 | } 226 | } 227 | 228 | b.substreams[substr] = append(b.substreams[substr], nr) 229 | nr, no, err = b.nextRecord(raw) 230 | } 231 | if err == io.EOF { 232 | err = nil 233 | } 234 | if err != nil { 235 | return err 236 | } 237 | 238 | for ss, records := range b.substreams { 239 | if grate.Debug { 240 | log.Printf(" Processing substream %d/%d (%d records)", ss, len(b.substreams), len(records)) 241 | } 242 | for i, nr := range records { 243 | if len(nr.Data) == 0 { 244 | continue 245 | } 246 | 247 | switch nr.RecType { 248 | case RecTypeSST: 249 | // Shared String Table is often continued across multiple records, 250 | // so we want to gather them all before starting to parse (some 251 | // strings may span the gap between records) 252 | recSet := []*rec{nr} 253 | 254 | lastIndex := i 255 | for len(records) > (lastIndex+1) && records[lastIndex+1].RecType == RecTypeContinue { 256 | lastIndex++ 257 | recSet = append(recSet, records[lastIndex]) 258 | } 259 | 260 | b.strings, err = parseSST(recSet) 261 | if err != nil { 262 | return err 263 | } 264 | 265 | case RecTypeContinue: 266 | // no-op (used above) 267 | case RecTypeEOF: 268 | // done 269 | 270 | case RecTypeBOF: 271 | b.h = &header{ 272 | Version: binary.LittleEndian.Uint16(nr.Data[0:2]), 273 | DocType: binary.LittleEndian.Uint16(nr.Data[2:4]), 274 | RupBuild: binary.LittleEndian.Uint16(nr.Data[4:6]), 275 | RupYear: binary.LittleEndian.Uint16(nr.Data[6:8]), 276 | MiscBits: binary.LittleEndian.Uint64(nr.Data[8:16]), 277 | } 278 | 279 | if b.h.Version != 0x0600 { 280 | return errors.New("xls: invalid file version") 281 | } 282 | if b.h.RupYear != 0x07CC && b.h.RupYear != 0x07CD { 283 | return errors.New("xls: unsupported biff version") 284 | } 285 | /* 286 | if b.h.DocType != 0x0005 && b.h.DocType != 0x0010 { 287 | // we only support the workbook or worksheet substreams 288 | log.Println("xls: unsupported document type") 289 | //break 290 | } 291 | */ 292 | 293 | case RecTypeCodePage: 294 | // BIFF8 is entirely UTF-16LE so this is actually ignored 295 | b.codepage = binary.LittleEndian.Uint16(nr.Data) 296 | 297 | case RecTypeDate1904: 298 | b.dateMode = binary.LittleEndian.Uint16(nr.Data) 299 | 300 | case RecTypeFormat: 301 | // Format maps a format ID to a code string 302 | fmtNo := binary.LittleEndian.Uint16(nr.Data) 303 | formatStr, _, err := decodeXLUnicodeString(nr.Data[2:]) 304 | if err != nil { 305 | log.Println("fail2", err) 306 | return err 307 | } 308 | b.nfmt.Add(fmtNo, formatStr) 309 | 310 | case RecTypeXF: 311 | // XF records merge multiple style and format directives to one ID 312 | // ignore font id at nr.Data[0:2] 313 | fmtNo := binary.LittleEndian.Uint16(nr.Data[2:]) 314 | b.xfs = append(b.xfs, fmtNo) 315 | 316 | case RecTypeBoundSheet8: 317 | // Identifies the postition within the stream, visibility state, 318 | // and name of a worksheet 319 | bs := &boundSheet{} 320 | bs.Position = binary.LittleEndian.Uint32(nr.Data[:4]) 321 | bs.HiddenState = nr.Data[4] 322 | bs.SheetType = nr.Data[5] 323 | 324 | bs.Name, _, err = decodeShortXLUnicodeString(nr.Data[6:]) 325 | if err != nil { 326 | return err 327 | } 328 | b.sheets = append(b.sheets, bs) 329 | default: 330 | if grate.Debug && ss == 0 { 331 | log.Println(" Unhandled record type:", nr.RecType, i) 332 | } 333 | } 334 | } 335 | } 336 | 337 | return err 338 | } 339 | 340 | var recPool = sync.Pool{ 341 | New: func() interface{} { 342 | return &rec{} 343 | }, 344 | } 345 | 346 | func (b *WorkBook) nextRecord(raw []byte) (*rec, int, error) { 347 | if len(raw) < 4 { 348 | return nil, 0, io.EOF 349 | } 350 | rec := recPool.Get().(*rec) 351 | 352 | rec.RecType = recordType(binary.LittleEndian.Uint16(raw[:2])) 353 | rec.RecSize = binary.LittleEndian.Uint16(raw[2:4]) 354 | if len(raw[4:]) < int(rec.RecSize) { 355 | recPool.Put(rec) 356 | return nil, 4, io.ErrUnexpectedEOF 357 | } 358 | rec.Data = raw[4 : 4+rec.RecSize] 359 | return rec, int(4 + rec.RecSize), nil 360 | } 361 | -------------------------------------------------------------------------------- /xls/cfb/cfb.go: -------------------------------------------------------------------------------- 1 | // Package cfb implements the Microsoft Compound File Binary File Format. 2 | package cfb 3 | 4 | // https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/53989ce4-7b05-4f8d-829b-d08d6148375b 5 | // Note for myself: 6 | // Storage = Directory 7 | // Stream = File 8 | 9 | import ( 10 | "bytes" 11 | "encoding/binary" 12 | "errors" 13 | "io" 14 | "io/ioutil" 15 | "log" 16 | "unicode/utf16" 17 | 18 | "github.com/pbnjay/grate" 19 | ) 20 | 21 | const fullAssertions = true 22 | 23 | const ( 24 | secFree uint32 = 0xFFFFFFFF // FREESECT 25 | secEndOfChain uint32 = 0xFFFFFFFE // ENDOFCHAIN 26 | secFAT uint32 = 0xFFFFFFFD // FATSECT 27 | secDIFAT uint32 = 0xFFFFFFFC // DIFSECT 28 | secReserved uint32 = 0xFFFFFFFB 29 | secMaxRegular uint32 = 0xFFFFFFFA // MAXREGSECT 30 | ) 31 | 32 | // Header of the Compound File MUST be at the beginning of the file (offset 0). 33 | type header struct { 34 | Signature uint64 // Identification signature for the compound file structure, and MUST be set to the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1. 35 | ClassID [2]uint64 // Reserved and unused class ID that MUST be set to all zeroes (CLSID_NULL). 36 | MinorVersion uint16 // Version number for nonbreaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004. 37 | MajorVersion uint16 // Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4). 38 | ByteOrder uint16 // This field MUST be set to 0xFFFE. This field is a byte order mark for all integer fields, specifying little-endian byte order. 39 | SectorShift uint16 // This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2. 40 | MiniSectorShift uint16 // This field MUST be set to 0x0006. This field specifies the sector size of the Mini Stream as a power of 2. The sector size of the Mini Stream MUST be 64 bytes. 41 | Reserved1 [6]byte // This field MUST be set to all zeroes. 42 | NumDirectorySectors int32 // This integer field contains the count of the number of directory sectors in the compound file. 43 | NumFATSectors int32 // This integer field contains the count of the number of FAT sectors in the compound file. 44 | FirstDirectorySectorLocation uint32 // This integer field contains the starting sector number for the directory stream. 45 | TransactionSignature int32 // This integer field MAY contain a sequence number that is incremented every time the compound file is saved by an implementation that supports file transactions. This is the field that MUST be set to all zeroes if file transactions are not implemented.<1> 46 | MiniStreamCutoffSize int32 // This integer field MUST be set to 0x00001000. This field specifies the maximum size of a user-defined data stream that is allocated from the mini FAT and mini stream, and that cutoff is 4,096 bytes. Any user-defined data stream that is greater than or equal to this cutoff size must be allocated as normal sectors from the FAT. 47 | FirstMiniFATSectorLocation uint32 // This integer field contains the starting sector number for the mini FAT. 48 | NumMiniFATSectors int32 // This integer field contains the count of the number of mini FAT sectors in the compound file. 49 | FirstDIFATSectorLocation uint32 // This integer field contains the starting sector number for the DIFAT. 50 | NumDIFATSectors int32 // This integer field contains the count of the number of DIFAT sectors in the compound file. 51 | DIFAT [109]uint32 // This array of 32-bit integer fields contains the first 109 FAT sector locations of the compound file. 52 | } 53 | 54 | type objectType byte 55 | 56 | const ( 57 | typeUnknown objectType = 0x00 58 | typeStorage objectType = 0x01 59 | typeStream objectType = 0x02 60 | typeRootStorage objectType = 0x05 61 | ) 62 | 63 | type directory struct { 64 | Name [32]uint16 // 32 utf16 characters 65 | NameByteLen int16 // length of Name in bytes 66 | ObjectType objectType 67 | ColorFlag byte // 0=red, 1=black 68 | LeftSiblingID uint32 // stream ids 69 | RightSiblingID uint32 70 | ChildID uint32 71 | ClassID [2]uint64 // GUID 72 | StateBits uint32 73 | CreationTime int64 74 | ModifiedTime int64 75 | StartingSectorLocation int32 76 | StreamSize uint64 77 | } 78 | 79 | func (d *directory) String() string { 80 | if (d.NameByteLen&1) == 1 || d.NameByteLen > 64 { 81 | return "" 82 | } 83 | r16 := utf16.Decode(d.Name[:int(d.NameByteLen)/2]) 84 | // trim off null terminator 85 | return string(r16[:len(r16)-1]) 86 | } 87 | 88 | // Document represents a Compound File Binary Format document. 89 | type Document struct { 90 | // the entire file, loaded into memory 91 | data []byte 92 | 93 | // pre-parsed info 94 | header *header 95 | dir []*directory 96 | 97 | // lookup tables for all the sectors 98 | fat []uint32 99 | minifat []uint32 100 | 101 | ministreamstart uint32 102 | ministreamsize uint32 103 | } 104 | 105 | func (d *Document) load(rx io.ReadSeeker) error { 106 | var err error 107 | d.data, err = ioutil.ReadAll(rx) 108 | if err != nil { 109 | return err 110 | } 111 | br := bytes.NewReader(d.data) 112 | 113 | h := &header{} 114 | err = binary.Read(br, binary.LittleEndian, h) 115 | if h.Signature != 0xe11ab1a1e011cfd0 { 116 | return grate.ErrNotInFormat // errors.New("ole2: invalid format") 117 | } 118 | if h.ByteOrder != 0xFFFE { 119 | return grate.ErrNotInFormat //errors.New("ole2: invalid format") 120 | } 121 | if fullAssertions { 122 | if h.ClassID[0] != 0 || h.ClassID[1] != 0 { 123 | return grate.ErrNotInFormat //errors.New("ole2: invalid CLSID") 124 | } 125 | if h.MajorVersion != 3 && h.MajorVersion != 4 { 126 | return errors.New("ole2: unknown major version") 127 | } 128 | if h.MinorVersion != 0x3B && h.MinorVersion != 0x3E { 129 | log.Printf("WARNING MinorVersion = 0x%02x NOT 0x3E", h.MinorVersion) 130 | //return errors.New("ole2: unknown minor version") 131 | } 132 | 133 | for _, v := range h.Reserved1 { 134 | if v != 0 { 135 | return errors.New("ole2: reserved section is non-zero") 136 | } 137 | } 138 | if h.MajorVersion == 3 { 139 | if h.SectorShift != 9 { 140 | return errors.New("ole2: invalid sector size") 141 | } 142 | if h.NumDirectorySectors != 0 { 143 | return errors.New("ole2: version 3 does not support directory sectors") 144 | } 145 | } 146 | if h.MajorVersion == 4 { 147 | if h.SectorShift != 12 { 148 | return errors.New("ole2: invalid sector size") 149 | } 150 | } 151 | if h.MiniSectorShift != 6 { 152 | return errors.New("ole2: invalid mini sector size") 153 | } 154 | if h.MiniStreamCutoffSize != 0x00001000 { 155 | return errors.New("ole2: invalid mini sector cutoff") 156 | } 157 | } 158 | d.header = h 159 | 160 | numFATentries := (1 << (h.SectorShift - 2)) 161 | le := binary.LittleEndian 162 | d.fat = make([]uint32, 0, numFATentries*int(1+d.header.NumFATSectors)) 163 | d.minifat = make([]uint32, 0, numFATentries*int(1+h.NumMiniFATSectors)) 164 | 165 | // step 1: read the DIFAT sector list 166 | for i := 0; i < 109; i++ { 167 | sid := h.DIFAT[i] 168 | if sid == secFree { 169 | break 170 | } 171 | offs := int64(1+sid) << int32(h.SectorShift) 172 | if offs >= int64(len(d.data)) { 173 | return errors.New("xls/cfb: unable to load file") 174 | } 175 | sector := d.data[offs:] 176 | for j := 0; j < numFATentries; j++ { 177 | sid2 := le.Uint32(sector) 178 | d.fat = append(d.fat, sid2) 179 | sector = sector[4:] 180 | } 181 | } 182 | if h.NumDIFATSectors > 0 { 183 | sid1 := h.FirstDIFATSectorLocation 184 | 185 | for sid1 != secEndOfChain { 186 | offs := int64(1+sid1) << int32(h.SectorShift) 187 | difatSector := d.data[offs:] 188 | 189 | for i := 0; i < numFATentries-1; i++ { 190 | sid2 := le.Uint32(difatSector) 191 | if sid2 == secFree || sid2 == secEndOfChain { 192 | difatSector = difatSector[4:] 193 | continue 194 | } 195 | 196 | offs := int64(1+sid2) << int32(h.SectorShift) 197 | if offs >= int64(len(d.data)) { 198 | return errors.New("xls/cfb: unable to load file") 199 | } 200 | sector := d.data[offs:] 201 | for j := 0; j < numFATentries; j++ { 202 | sid3 := le.Uint32(sector) 203 | d.fat = append(d.fat, sid3) 204 | sector = sector[4:] 205 | } 206 | 207 | difatSector = difatSector[4:] 208 | } 209 | // chain the next DIFAT sector 210 | sid1 = le.Uint32(difatSector) 211 | } 212 | } 213 | 214 | // step 2: read the mini FAT 215 | sid := h.FirstMiniFATSectorLocation 216 | for sid != secEndOfChain { 217 | offs := int64(1+sid) << int32(h.SectorShift) 218 | if offs >= int64(len(d.data)) { 219 | return errors.New("xls/cfb: unable to load file") 220 | } 221 | sector := d.data[offs:] 222 | for j := 0; j < numFATentries; j++ { 223 | sid = le.Uint32(sector) 224 | d.minifat = append(d.minifat, sid) 225 | sector = sector[4:] 226 | } 227 | 228 | if len(d.minifat) >= int(h.NumMiniFATSectors) { 229 | break 230 | } 231 | 232 | // chain the next mini FAT sector 233 | sid = le.Uint32(sector) 234 | } 235 | 236 | // step 3: read the Directory Entries 237 | err = d.buildDirs(br) 238 | 239 | return err 240 | } 241 | 242 | func (d *Document) buildDirs(br *bytes.Reader) error { 243 | h := d.header 244 | le := binary.LittleEndian 245 | 246 | // step 2: read the Directory 247 | sid := h.FirstDirectorySectorLocation 248 | offs := int64(1+sid) << int64(h.SectorShift) 249 | br.Seek(offs, io.SeekStart) 250 | 251 | for j := 0; j < 4; j++ { 252 | dirent := &directory{} 253 | binary.Read(br, le, dirent) 254 | if d.header.MajorVersion == 3 { 255 | // mask out upper 32bits 256 | dirent.StreamSize = dirent.StreamSize & 0xFFFFFFFF 257 | } 258 | 259 | switch dirent.ObjectType { 260 | case typeRootStorage: 261 | d.ministreamstart = uint32(dirent.StartingSectorLocation) 262 | d.ministreamsize = uint32(dirent.StreamSize) 263 | case typeStorage: 264 | //log.Println("got a storage? what to do now?") 265 | case typeStream: 266 | /* 267 | var freader io.Reader 268 | if dirent.StreamSize < uint64(d.header.MiniStreamCutoffSize) { 269 | freader = d.getMiniStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize) 270 | } else if dirent.StreamSize != 0 { 271 | freader = d.getStreamReader(uint32(dirent.StartingSectorLocation), dirent.StreamSize) 272 | } 273 | */ 274 | case typeUnknown: 275 | return nil 276 | } 277 | d.dir = append(d.dir, dirent) 278 | } 279 | 280 | return nil 281 | } 282 | 283 | func (d *Document) getStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) { 284 | // NB streamData is a slice of slices of the raw data, so this is the 285 | // only allocation - for the (much smaller) list of sector slices 286 | streamData := make([][]byte, 1+(size>>d.header.SectorShift)) 287 | 288 | x := 0 289 | secSize := int64(1) << int32(d.header.SectorShift) 290 | for sid != secEndOfChain && sid != secFree { 291 | offs := int64(1+sid) << int64(d.header.SectorShift) 292 | if offs > int64(len(d.data)) { 293 | return nil, errors.New("ole2: corrupt data format") 294 | } 295 | slice := d.data[offs : offs+secSize] 296 | if size < uint64(len(slice)) { 297 | slice = slice[:size] 298 | size = 0 299 | } else { 300 | size -= uint64(len(slice)) 301 | } 302 | streamData[x] = slice 303 | if size == 0 { 304 | break 305 | } 306 | sid = d.fat[sid] 307 | x++ 308 | } 309 | if size != 0 { 310 | return nil, errors.New("ole2: incomplete read") 311 | } 312 | 313 | return &SliceReader{Data: streamData}, nil 314 | } 315 | 316 | func (d *Document) getMiniStreamReader(sid uint32, size uint64) (io.ReadSeeker, error) { 317 | // TODO: move into a separate cache so we don't recalculate it each time 318 | fatStreamData := make([][]byte, 1+(d.ministreamsize>>d.header.SectorShift)) 319 | 320 | // NB streamData is a slice of slices of the raw data, so this is the 321 | // only allocation - for the (much smaller) list of sector slices 322 | streamData := make([][]byte, 1+(size>>d.header.MiniSectorShift)) 323 | 324 | x := 0 325 | fsid := d.ministreamstart 326 | fsize := uint64(d.ministreamsize) 327 | secSize := int64(1) << int64(d.header.SectorShift) 328 | for fsid != secEndOfChain && fsid != secFree { 329 | offs := int64(1+fsid) << int64(d.header.SectorShift) 330 | slice := d.data[offs : offs+secSize] 331 | if fsize < uint64(len(slice)) { 332 | slice = slice[:fsize] 333 | fsize = 0 334 | } else { 335 | fsize -= uint64(len(slice)) 336 | } 337 | fatStreamData[x] = slice 338 | x++ 339 | fsid = d.fat[fsid] 340 | } 341 | 342 | x = 0 343 | miniSecSize := int64(1) << int64(d.header.MiniSectorShift) 344 | for sid != secEndOfChain && sid != secFree { 345 | offs := int64(sid) << int64(d.header.MiniSectorShift) 346 | 347 | so, si := offs/secSize, offs%secSize 348 | data := fatStreamData[so] 349 | 350 | slice := data[si : si+miniSecSize] 351 | if size < uint64(len(slice)) { 352 | slice = slice[:size] 353 | size = 0 354 | } else { 355 | size -= uint64(len(slice)) 356 | } 357 | streamData[x] = slice 358 | x++ 359 | sid = d.minifat[sid] 360 | } 361 | 362 | return &SliceReader{Data: streamData}, nil 363 | } 364 | -------------------------------------------------------------------------------- /xls/sheets.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import ( 4 | "encoding/binary" 5 | "errors" 6 | "log" 7 | "math" 8 | "unicode/utf16" 9 | 10 | "github.com/pbnjay/grate" 11 | "github.com/pbnjay/grate/commonxl" 12 | ) 13 | 14 | // List (visible) sheet names from the workbook. 15 | func (b *WorkBook) List() ([]string, error) { 16 | res := make([]string, 0, len(b.sheets)) 17 | for _, s := range b.sheets { 18 | if (s.HiddenState & 0x03) == 0 { 19 | res = append(res, s.Name) 20 | } 21 | } 22 | return res, nil 23 | } 24 | 25 | // ListHidden sheet names in the workbook. 26 | func (b *WorkBook) ListHidden() ([]string, error) { 27 | res := make([]string, 0, len(b.sheets)) 28 | for _, s := range b.sheets { 29 | if (s.HiddenState & 0x03) != 0 { 30 | res = append(res, s.Name) 31 | } 32 | } 33 | return res, nil 34 | } 35 | 36 | // Get opens the named worksheet and return an iterator for its contents. 37 | func (b *WorkBook) Get(sheetName string) (grate.Collection, error) { 38 | for _, s := range b.sheets { 39 | if s.Name == sheetName { 40 | ss := b.pos2substream[int64(s.Position)] 41 | return b.parseSheet(s, ss) 42 | } 43 | } 44 | return nil, errors.New("xls: sheet not found") 45 | } 46 | 47 | func (b *WorkBook) parseSheet(s *boundSheet, ss int) (*commonxl.Sheet, error) { 48 | res := &commonxl.Sheet{ 49 | Formatter: &b.nfmt, 50 | } 51 | var minRow, maxRow uint32 52 | var minCol, maxCol uint16 53 | 54 | // temporary string buffer 55 | us := make([]uint16, 8224) 56 | 57 | inSubstream := 0 58 | for idx, r := range b.substreams[ss] { 59 | if inSubstream > 0 { 60 | if r.RecType == RecTypeEOF { 61 | inSubstream-- 62 | } 63 | continue 64 | } 65 | switch r.RecType { 66 | case RecTypeBOF: 67 | // a BOF inside a sheet usually means embedded content like a chart 68 | // (which we aren't interested in). So we we set a flag and wait 69 | // for the EOF for that content block. 70 | if idx > 0 { 71 | inSubstream++ 72 | continue 73 | } 74 | case RecTypeWsBool: 75 | if (r.Data[1] & 0x10) != 0 { 76 | // it's a dialog 77 | return nil, nil 78 | } 79 | 80 | case RecTypeDimensions: 81 | // max = 0-based index of the row AFTER the last valid index 82 | minRow = binary.LittleEndian.Uint32(r.Data[:4]) 83 | maxRow = binary.LittleEndian.Uint32(r.Data[4:8]) // max = 0x010000 84 | minCol = binary.LittleEndian.Uint16(r.Data[8:10]) 85 | maxCol = binary.LittleEndian.Uint16(r.Data[10:12]) // max = 0x000100 86 | if grate.Debug { 87 | log.Printf(" Sheet dimensions (%d, %d) - (%d,%d)", 88 | minCol, minRow, maxCol, maxRow) 89 | } 90 | if minRow > 0x0000FFFF || maxRow > 0x00010000 { 91 | log.Println("invalid dimensions") 92 | } 93 | if minCol > 0x00FF || maxCol > 0x0100 { 94 | log.Println("invalid dimensions") 95 | } 96 | 97 | // pre-allocate cells 98 | res.Resize(int(maxRow), int(maxCol)) 99 | } 100 | } 101 | inSubstream = 0 102 | 103 | var formulaRow, formulaCol uint16 104 | for ridx, r := range b.substreams[ss] { 105 | if inSubstream > 0 { 106 | if r.RecType == RecTypeEOF { 107 | inSubstream-- 108 | } else if grate.Debug { 109 | log.Println(" Unhandled sheet substream record type:", r.RecType, ridx) 110 | } 111 | continue 112 | } 113 | 114 | // sec 2.1.7.20.6 Common Productions ABNF: 115 | /* 116 | CELLTABLE = 1*(1*Row *CELL 1*DBCell) *EntExU2 117 | CELL = FORMULA / Blank / MulBlank / RK / MulRk / BoolErr / Number / LabelSst 118 | FORMULA = [Uncalced] Formula [Array / Table / ShrFmla / SUB] [String *Continue] 119 | 120 | Not parsed form the list above: 121 | DBCell, EntExU2, Uncalced, Array, Table,ShrFmla 122 | NB: no idea what "SUB" is 123 | */ 124 | 125 | switch r.RecType { 126 | case RecTypeBOF: 127 | if ridx > 0 { 128 | inSubstream++ 129 | continue 130 | } 131 | 132 | case RecTypeBoolErr: 133 | rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) 134 | colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) 135 | ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) 136 | if r.Data[7] == 0 { 137 | // Boolean value 138 | bv := false 139 | if r.Data[6] == 1 { 140 | bv = true 141 | } 142 | var fno uint16 143 | if ixfe < len(b.xfs) { 144 | fno = b.xfs[ixfe] 145 | } 146 | res.Put(rowIndex, colIndex, bv, fno) 147 | //log.Printf("bool/error spec: %d %d %+v", rowIndex, colIndex, bv) 148 | } else { 149 | // it's an error, load the label 150 | be, ok := berrLookup[r.Data[6]] 151 | if !ok { 152 | be = "" 153 | } 154 | res.Put(rowIndex, colIndex, be, 0) 155 | //log.Printf("bool/error spec: %d %d %s", rowIndex, colIndex, be) 156 | } 157 | 158 | case RecTypeMulRk: 159 | // MulRk encodes multiple RK values in a row 160 | nrk := int((r.RecSize - 6) / 6) 161 | rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) 162 | colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) 163 | for i := 0; i < nrk; i++ { 164 | off := 4 + i*6 165 | ixfe := int(binary.LittleEndian.Uint16(r.Data[off:])) 166 | value := RKNumber(binary.LittleEndian.Uint32(r.Data[off+2:])) 167 | 168 | var rval interface{} 169 | if value.IsInteger() { 170 | rval = value.Int() 171 | } else { 172 | rval = value.Float64() 173 | } 174 | var fno uint16 175 | if ixfe < len(b.xfs) { 176 | fno = b.xfs[ixfe] 177 | } 178 | res.Put(rowIndex, colIndex+i, rval, fno) 179 | } 180 | //log.Printf("mulrow spec: %+v", *mr) 181 | 182 | case RecTypeNumber: 183 | rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) 184 | colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) 185 | ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) 186 | xnum := binary.LittleEndian.Uint64(r.Data[6:]) 187 | 188 | value := math.Float64frombits(xnum) 189 | var fno uint16 190 | if ixfe < len(b.xfs) { 191 | fno = b.xfs[ixfe] 192 | } 193 | res.Put(rowIndex, colIndex, value, fno) 194 | //log.Printf("Number spec: %d %d = %f", rowIndex, colIndex, value) 195 | 196 | case RecTypeRK: 197 | rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) 198 | colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) 199 | ixfe := int(binary.LittleEndian.Uint16(r.Data[4:])) 200 | value := RKNumber(binary.LittleEndian.Uint32(r.Data[6:])) 201 | 202 | var rval interface{} 203 | if value.IsInteger() { 204 | rval = value.Int() 205 | } else { 206 | rval = value.Float64() 207 | } 208 | var fno uint16 209 | if ixfe < len(b.xfs) { 210 | fno = b.xfs[ixfe] 211 | } 212 | res.Put(rowIndex, colIndex, rval, fno) 213 | //log.Printf("RK spec: %d %d = %+v", rowIndex, colIndex, rval) 214 | 215 | case RecTypeFormula: 216 | formulaRow = binary.LittleEndian.Uint16(r.Data[:2]) 217 | formulaCol = binary.LittleEndian.Uint16(r.Data[2:4]) 218 | ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) 219 | fdata := r.Data[6:] 220 | var fno uint16 221 | if ixfe < len(b.xfs) { 222 | fno = b.xfs[ixfe] 223 | } 224 | if fdata[6] == 0xFF && fdata[7] == 0xFF { 225 | switch fdata[0] { 226 | case 0: 227 | // string in next record 228 | // put placeholder now to record the numFmt 229 | res.Put(int(formulaRow), int(formulaCol), "", fno) 230 | case 1: 231 | // boolean 232 | bv := false 233 | if fdata[2] != 0 { 234 | bv = true 235 | } 236 | res.Put(int(formulaRow), int(formulaCol), bv, fno) 237 | case 2: 238 | // error value 239 | be, ok := berrLookup[fdata[2]] 240 | if !ok { 241 | be = "" 242 | } 243 | res.Put(int(formulaRow), int(formulaCol), be, 0) 244 | case 3: 245 | // blank string 246 | default: 247 | log.Printf("unknown formula value type %d", fdata[0]) 248 | } 249 | } else { 250 | xnum := binary.LittleEndian.Uint64(fdata) 251 | value := math.Float64frombits(xnum) 252 | res.Put(int(formulaRow), int(formulaCol), value, fno) 253 | } 254 | //log.Printf("formula spec: %d %d ~~ %+v", formulaRow, formulaCol, r.Data) 255 | 256 | case RecTypeString: 257 | // String is the previously rendered value of a formula 258 | // NB similar to the workbook SST, this can continue over 259 | // addition records up to 32k characters. A 1-byte flag 260 | // at each gap indicates if the encoding switches 261 | // to/from 8/16-bit characters. 262 | 263 | charCount := binary.LittleEndian.Uint16(r.Data[:2]) 264 | flags := r.Data[2] 265 | fstr := "" 266 | if (flags & 1) == 0 { 267 | fstr = string(r.Data[3:]) 268 | } else { 269 | raw := r.Data[3:] 270 | if int(charCount) > cap(us) { 271 | us = make([]uint16, charCount) 272 | } 273 | us = us[:charCount] 274 | for i := 0; i < int(charCount); i++ { 275 | us[i] = binary.LittleEndian.Uint16(raw) 276 | raw = raw[2:] 277 | } 278 | fstr = string(utf16.Decode(us)) 279 | } 280 | 281 | if (ridx + 1) < len(b.substreams[ss]) { 282 | ridx2 := ridx + 1 283 | nrecs := len(b.substreams[ss]) 284 | for ridx2 < nrecs { 285 | r2 := b.substreams[ss][ridx2] 286 | if r2.RecType != RecTypeContinue { 287 | break 288 | } 289 | if (r2.Data[0] & 1) == 0 { 290 | fstr += string(r2.Data[1:]) 291 | } else { 292 | raw := r2.Data[1:] 293 | slen := len(raw) / 2 294 | us = us[:slen] 295 | for i := 0; i < slen; i++ { 296 | us[i] = binary.LittleEndian.Uint16(raw) 297 | raw = raw[2:] 298 | } 299 | fstr += string(utf16.Decode(us)) 300 | } 301 | ridx2++ 302 | } 303 | } 304 | res.Set(int(formulaRow), int(formulaCol), fstr) 305 | //log.Printf("String direct: %d %d '%s'", int(formulaRow), int(formulaCol), fstr) 306 | 307 | case RecTypeLabelSst: 308 | rowIndex := int(binary.LittleEndian.Uint16(r.Data[:2])) 309 | colIndex := int(binary.LittleEndian.Uint16(r.Data[2:4])) 310 | ixfe := int(binary.LittleEndian.Uint16(r.Data[4:6])) 311 | sstIndex := int(binary.LittleEndian.Uint32(r.Data[6:])) 312 | if sstIndex > len(b.strings) { 313 | return nil, errors.New("xls: invalid sst index") 314 | } 315 | var fno uint16 316 | if ixfe < len(b.xfs) { 317 | fno = b.xfs[ixfe] 318 | } 319 | if b.strings[sstIndex] != "" { 320 | res.Put(rowIndex, colIndex, b.strings[sstIndex], fno) 321 | } 322 | //log.Printf("SST spec: %d %d = [%d] '%s' %d", rowIndex, colIndex, sstIndex, b.strings[sstIndex], fno) 323 | 324 | case RecTypeHLink: 325 | firstRow := binary.LittleEndian.Uint16(r.Data[:2]) 326 | lastRow := binary.LittleEndian.Uint16(r.Data[2:4]) 327 | firstCol := binary.LittleEndian.Uint16(r.Data[4:6]) 328 | lastCol := binary.LittleEndian.Uint16(r.Data[6:]) 329 | if int(firstCol) > int(maxCol) { 330 | //log.Println("invalid hyperlink column") 331 | continue 332 | } 333 | if int(firstRow) > int(maxRow) { 334 | //log.Println("invalid hyperlink row") 335 | continue 336 | } 337 | if lastRow == 0xFFFF { // placeholder value indicate "last" 338 | lastRow = uint16(maxRow) - 1 339 | } 340 | if lastCol == 0xFF { // placeholder value indicate "last" 341 | lastCol = uint16(maxCol) - 1 342 | } 343 | 344 | // decode the hyperlink datastructure and try to find the 345 | // display text and separate the URL itself. 346 | displayText, linkText, err := decodeHyperlinks(r.Data[8:]) 347 | if err != nil { 348 | log.Println(err) 349 | continue 350 | } 351 | 352 | // apply merge cell rules (see RecTypeMergeCells below) 353 | for rn := int(firstRow); rn <= int(lastRow); rn++ { 354 | for cn := int(firstCol); cn <= int(lastCol); cn++ { 355 | if rn == int(firstRow) && cn == int(firstCol) { 356 | // TODO: provide custom hooks for how to handle links in output 357 | res.Put(rn, cn, displayText+" <"+linkText+">", 0) 358 | } else if cn == int(firstCol) { 359 | // first and last column MAY be the same 360 | if rn == int(lastRow) { 361 | res.Put(rn, cn, grate.EndRowMerged, 0) 362 | } else { 363 | res.Put(rn, cn, grate.ContinueRowMerged, 0) 364 | } 365 | } else if cn == int(lastCol) { 366 | // first and last column are NOT the same 367 | res.Put(rn, cn, grate.EndColumnMerged, 0) 368 | } else { 369 | res.Put(rn, cn, grate.ContinueColumnMerged, 0) 370 | } 371 | } 372 | } 373 | 374 | case RecTypeMergeCells: 375 | // To keep cells aligned, Merged cells are handled by placing 376 | // special characters in each cell covered by the merge block. 377 | // 378 | // The contents of the cell are always in the top left position. 379 | // A "down arrow" (↓) indicates the left side of the merge block, and a 380 | // "down arrow with stop line" (⤓) indicates the last row of the merge. 381 | // A "right arrow" (→) indicates that the columns span horizontally, 382 | // and a "right arrow with stop line" (⇥) indicates the rightmost 383 | // column of the merge. 384 | // 385 | 386 | cmcs := binary.LittleEndian.Uint16(r.Data[:2]) 387 | raw := r.Data[2:] 388 | for i := 0; i < int(cmcs); i++ { 389 | firstRow := binary.LittleEndian.Uint16(raw[:2]) 390 | lastRow := binary.LittleEndian.Uint16(raw[2:4]) 391 | firstCol := binary.LittleEndian.Uint16(raw[4:6]) 392 | lastCol := binary.LittleEndian.Uint16(raw[6:]) 393 | raw = raw[8:] 394 | 395 | if lastRow == 0xFFFF { // placeholder value indicate "last" 396 | lastRow = uint16(maxRow) - 1 397 | } 398 | if lastCol == 0xFF { // placeholder value indicate "last" 399 | lastCol = uint16(maxCol) - 1 400 | } 401 | for rn := int(firstRow); rn <= int(lastRow); rn++ { 402 | for cn := int(firstCol); cn <= int(lastCol); cn++ { 403 | if rn == int(firstRow) && cn == int(firstCol) { 404 | // should be a value there already! 405 | } else if cn == int(firstCol) { 406 | // first and last column MAY be the same 407 | if rn == int(lastRow) { 408 | res.Put(rn, cn, grate.EndRowMerged, 0) 409 | } else { 410 | res.Put(rn, cn, grate.ContinueRowMerged, 0) 411 | } 412 | } else if cn == int(lastCol) { 413 | // first and last column are NOT the same 414 | res.Put(rn, cn, grate.EndColumnMerged, 0) 415 | } else { 416 | res.Put(rn, cn, grate.ContinueColumnMerged, 0) 417 | } 418 | } 419 | } 420 | } 421 | /* 422 | case RecTypeBlank, RecTypeMulBlank: 423 | // cells default value is blank, no need for these 424 | 425 | case RecTypeContinue: 426 | // the only situation so far is when used in RecTypeString above 427 | 428 | case RecTypeRow, RecTypeDimensions, RecTypeEOF, RecTypeWsBool: 429 | // handled in initial pass 430 | 431 | default: 432 | if grate.Debug { 433 | log.Println(" Unhandled sheet record type:", r.RecType, ridx) 434 | } 435 | */ 436 | } 437 | } 438 | return res, nil 439 | } 440 | 441 | var berrLookup = map[byte]string{ 442 | 0x00: "#NULL!", 443 | 0x07: "#DIV/0!", 444 | 0x0F: "#VALUE!", 445 | 0x17: "#REF!", 446 | 0x1D: "#NAME?", 447 | 0x24: "#NUM!", 448 | 0x2A: "#N/A", 449 | 0x2B: "#GETTING_DATA", 450 | } 451 | -------------------------------------------------------------------------------- /xls/records.go: -------------------------------------------------------------------------------- 1 | package xls 2 | 3 | import "fmt" 4 | 5 | type recordType uint16 6 | 7 | // Record types defined by the XLS specification document, section 2.3/2.4. 8 | // https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-xls/43684742-8fcd-4fcd-92df-157d8d7241f9 9 | const ( 10 | RecTypeFormula recordType = 6 // per section 2.4.127 11 | RecTypeEOF recordType = 10 // section 2.4.103 12 | RecTypeCalcCount recordType = 12 // section 2.4.31 13 | RecTypeCalcMode recordType = 13 // section 2.4.34 14 | RecTypeCalcPrecision recordType = 14 // section 2.4.35 15 | RecTypeCalcRefMode recordType = 15 // section 2.4.36 16 | RecTypeCalcDelta recordType = 16 // section 2.4.32 17 | RecTypeCalcIter recordType = 17 // section 2.4.33 18 | RecTypeProtect recordType = 18 // section 2.4.207 19 | RecTypePassword recordType = 19 // section 2.4.191 20 | RecTypeHeader recordType = 20 // section 2.4.136 21 | RecTypeFooter recordType = 21 // section 2.4.124 22 | RecTypeExternSheet recordType = 23 // section 2.4.106 23 | RecTypeLbl recordType = 24 // section 2.4.150 24 | RecTypeWinProtect recordType = 25 // section 2.4.347 25 | RecTypeVerticalPageBreaks recordType = 26 // section 2.4.343 26 | RecTypeHorizontalPageBreaks recordType = 27 // section 2.4.142 27 | RecTypeNote recordType = 28 // section 2.4.179 28 | RecTypeSelection recordType = 29 // section 2.4.248 29 | RecTypeDate1904 recordType = 34 // section 2.4.77 30 | RecTypeExternName recordType = 35 // section 2.4.105 31 | RecTypeLeftMargin recordType = 38 // section 2.4.151 32 | RecTypeRightMargin recordType = 39 // section 2.4.219 33 | RecTypeTopMargin recordType = 40 // section 2.4.328 34 | RecTypeBottomMargin recordType = 41 // section 2.4.27 35 | RecTypePrintRowCol recordType = 42 // section 2.4.203 36 | RecTypePrintGrid recordType = 43 // section 2.4.202 37 | RecTypeFilePass recordType = 47 // section 2.4.117 38 | RecTypeFont recordType = 49 // section 2.4.122 39 | RecTypePrintSize recordType = 51 // section 2.4.204 40 | RecTypeContinue recordType = 60 // section 2.4.58 41 | RecTypeWindow1 recordType = 61 // section 2.4.345 42 | RecTypeBackup recordType = 64 // section 2.4.14 43 | RecTypePane recordType = 65 // section 2.4.189 44 | RecTypeCodePage recordType = 66 // section 2.4.52 45 | RecTypePls recordType = 77 // section 2.4.199 46 | RecTypeDCon recordType = 80 // section 2.4.82 47 | RecTypeDConRef recordType = 81 // section 2.4.86 48 | RecTypeDConName recordType = 82 // section 2.4.85 49 | RecTypeDefColWidth recordType = 85 // section 2.4.89 50 | RecTypeXCT recordType = 89 // section 2.4.352 51 | RecTypeCRN recordType = 90 // section 2.4.65 52 | RecTypeFileSharing recordType = 91 // section 2.4.118 53 | RecTypeWriteAccess recordType = 92 // section 2.4.349 54 | RecTypeObj recordType = 93 // section 2.4.181 55 | RecTypeUncalced recordType = 94 // section 2.4.331 56 | RecTypeCalcSaveRecalc recordType = 95 // section 2.4.37 57 | RecTypeTemplate recordType = 96 // section 2.4.323 58 | RecTypeIntl recordType = 97 // section 2.4.147 59 | RecTypeObjProtect recordType = 99 // section 2.4.183 60 | RecTypeColInfo recordType = 125 // section 2.4.53 61 | RecTypeGuts recordType = 128 // section 2.4.134 62 | RecTypeWsBool recordType = 129 // section 2.4.351 63 | RecTypeGridSet recordType = 130 // section 2.4.132 64 | RecTypeHCenter recordType = 131 // section 2.4.135 65 | RecTypeVCenter recordType = 132 // section 2.4.342 66 | RecTypeBoundSheet8 recordType = 133 // section 2.4.28 67 | RecTypeWriteProtect recordType = 134 // section 2.4.350 68 | RecTypeCountry recordType = 140 // section 2.4.63 69 | RecTypeHideObj recordType = 141 // section 2.4.139 70 | RecTypeSort recordType = 144 // section 2.4.263 71 | RecTypePalette recordType = 146 // section 2.4.188 72 | RecTypeSync recordType = 151 // section 2.4.318 73 | RecTypeLPr recordType = 152 // section 2.4.158 74 | RecTypeDxGCol recordType = 153 // section 2.4.98 75 | RecTypeFnGroupName recordType = 154 // section 2.4.120 76 | RecTypeFilterMode recordType = 155 // section 2.4.119 77 | RecTypeBuiltInFnGroupCount recordType = 156 // section 2.4.30 78 | RecTypeAutoFilterInfo recordType = 157 // section 2.4.8 79 | RecTypeAutoFilter recordType = 158 // section 2.4.6 80 | RecTypeScl recordType = 160 // section 2.4.247 81 | RecTypeSetup recordType = 161 // section 2.4.257 82 | RecTypeScenMan recordType = 174 // section 2.4.246 83 | RecTypeSCENARIO recordType = 175 // section 2.4.244 84 | RecTypeSxView recordType = 176 // section 2.4.313 85 | RecTypeSxvd recordType = 177 // section 2.4.309 86 | RecTypeSXVI recordType = 178 // section 2.4.312 87 | RecTypeSxIvd recordType = 180 // section 2.4.292 88 | RecTypeSXLI recordType = 181 // section 2.4.293 89 | RecTypeSXPI recordType = 182 // section 2.4.298 90 | RecTypeDocRoute recordType = 184 // section 2.4.91 91 | RecTypeRecipName recordType = 185 // section 2.4.216 92 | RecTypeMulRk recordType = 189 // section 2.4.175 93 | RecTypeMulBlank recordType = 190 // section 2.4.174 94 | RecTypeMms recordType = 193 // section 2.4.169 95 | RecTypeSXDI recordType = 197 // section 2.4.278 96 | RecTypeSXDB recordType = 198 // section 2.4.275 97 | RecTypeSXFDB recordType = 199 // section 2.4.283 98 | RecTypeSXDBB recordType = 200 // section 2.4.276 99 | RecTypeSXNum recordType = 201 // section 2.4.296 100 | RecTypeSxBool recordType = 202 // section 2.4.274 101 | RecTypeSxErr recordType = 203 // section 2.4.281 102 | RecTypeSXInt recordType = 204 // section 2.4.289 103 | RecTypeSXString recordType = 205 // section 2.4.304 104 | RecTypeSXDtr recordType = 206 // section 2.4.279 105 | RecTypeSxNil recordType = 207 // section 2.4.295 106 | RecTypeSXTbl recordType = 208 // section 2.4.305 107 | RecTypeSXTBRGIITM recordType = 209 // section 2.4.307 108 | RecTypeSxTbpg recordType = 210 // section 2.4.306 109 | RecTypeObProj recordType = 211 // section 2.4.185 110 | RecTypeSXStreamID recordType = 213 // section 2.4.303 111 | RecTypeDBCell recordType = 215 // section 2.4.78 112 | RecTypeSXRng recordType = 216 // section 2.4.300 113 | RecTypeSxIsxoper recordType = 217 // section 2.4.290 114 | RecTypeBookBool recordType = 218 // section 2.4.22 115 | RecTypeDbOrParamQry recordType = 220 // section 2.4.79 116 | RecTypeScenarioProtect recordType = 221 // section 2.4.245 117 | RecTypeOleObjectSize recordType = 222 // section 2.4.187 118 | RecTypeXF recordType = 224 // section 2.4.353 119 | RecTypeInterfaceHdr recordType = 225 // section 2.4.146 120 | RecTypeInterfaceEnd recordType = 226 // section 2.4.145 121 | RecTypeSXVS recordType = 227 // section 2.4.317 122 | RecTypeMergeCells recordType = 229 // section 2.4.168 123 | RecTypeBkHim recordType = 233 // section 2.4.19 124 | RecTypeMsoDrawingGroup recordType = 235 // section 2.4.171 125 | RecTypeMsoDrawing recordType = 236 // section 2.4.170 126 | RecTypeMsoDrawingSelection recordType = 237 // section 2.4.172 127 | RecTypePhoneticInfo recordType = 239 // section 2.4.192 128 | RecTypeSxRule recordType = 240 // section 2.4.301 129 | RecTypeSXEx recordType = 241 // section 2.4.282 130 | RecTypeSxFilt recordType = 242 // section 2.4.285 131 | RecTypeSxDXF recordType = 244 // section 2.4.280 132 | RecTypeSxItm recordType = 245 // section 2.4.291 133 | RecTypeSxName recordType = 246 // section 2.4.294 134 | RecTypeSxSelect recordType = 247 // section 2.4.302 135 | RecTypeSXPair recordType = 248 // section 2.4.297 136 | RecTypeSxFmla recordType = 249 // section 2.4.286 137 | RecTypeSxFormat recordType = 251 // section 2.4.287 138 | RecTypeSST recordType = 252 // section 2.4.265 139 | RecTypeLabelSst recordType = 253 // section 2.4.149 140 | RecTypeExtSST recordType = 255 // section 2.4.107 141 | RecTypeSXVDEx recordType = 256 // section 2.4.310 142 | RecTypeSXFormula recordType = 259 // section 2.4.288 143 | RecTypeSXDBEx recordType = 290 // section 2.4.277 144 | RecTypeRRDInsDel recordType = 311 // section 2.4.228 145 | RecTypeRRDHead recordType = 312 // section 2.4.226 146 | RecTypeRRDChgCell recordType = 315 // section 2.4.223 147 | RecTypeRRTabID recordType = 317 // section 2.4.241 148 | RecTypeRRDRenSheet recordType = 318 // section 2.4.234 149 | RecTypeRRSort recordType = 319 // section 2.4.240 150 | RecTypeRRDMove recordType = 320 // section 2.4.231 151 | RecTypeRRFormat recordType = 330 // section 2.4.238 152 | RecTypeRRAutoFmt recordType = 331 // section 2.4.222 153 | RecTypeRRInsertSh recordType = 333 // section 2.4.239 154 | RecTypeRRDMoveBegin recordType = 334 // section 2.4.232 155 | RecTypeRRDMoveEnd recordType = 335 // section 2.4.233 156 | RecTypeRRDInsDelBegin recordType = 336 // section 2.4.229 157 | RecTypeRRDInsDelEnd recordType = 337 // section 2.4.230 158 | RecTypeRRDConflict recordType = 338 // section 2.4.224 159 | RecTypeRRDDefName recordType = 339 // section 2.4.225 160 | RecTypeRRDRstEtxp recordType = 340 // section 2.4.235 161 | RecTypeLRng recordType = 351 // section 2.4.159 162 | RecTypeUsesELFs recordType = 352 // section 2.4.337 163 | RecTypeDSF recordType = 353 // section 2.4.94 164 | RecTypeCUsr recordType = 401 // section 2.4.72 165 | RecTypeCbUsr recordType = 402 // section 2.4.40 166 | RecTypeUsrInfo recordType = 403 // section 2.4.340 167 | RecTypeUsrExcl recordType = 404 // section 2.4.339 168 | RecTypeFileLock recordType = 405 // section 2.4.116 169 | RecTypeRRDInfo recordType = 406 // section 2.4.227 170 | RecTypeBCUsrs recordType = 407 // section 2.4.16 171 | RecTypeUsrChk recordType = 408 // section 2.4.338 172 | RecTypeUserBView recordType = 425 // section 2.4.333 173 | RecTypeUserSViewBegin recordType = 426 // section 2.4.334 174 | RecTypeUserSViewBeginChart recordType = 426 // section 2.4.335 175 | RecTypeUserSViewEnd recordType = 427 // section 2.4.336 176 | RecTypeRRDUserView recordType = 428 // section 2.4.237 177 | RecTypeQsi recordType = 429 // section 2.4.208 178 | RecTypeSupBook recordType = 430 // section 2.4.271 179 | RecTypeProt4Rev recordType = 431 // section 2.4.205 180 | RecTypeCondFmt recordType = 432 // section 2.4.56 181 | RecTypeCF recordType = 433 // section 2.4.42 182 | RecTypeDVal recordType = 434 // section 2.4.96 183 | RecTypeDConBin recordType = 437 // section 2.4.83 184 | RecTypeTxO recordType = 438 // section 2.4.329 185 | RecTypeRefreshAll recordType = 439 // section 2.4.217 186 | RecTypeHLink recordType = 440 // section 2.4.140 187 | RecTypeLel recordType = 441 // section 2.4.154 188 | RecTypeCodeName recordType = 442 // section 2.4.51 189 | RecTypeSXFDBType recordType = 443 // section 2.4.284 190 | RecTypeProt4RevPass recordType = 444 // section 2.4.206 191 | RecTypeObNoMacros recordType = 445 // section 2.4.184 192 | RecTypeDv recordType = 446 // section 2.4.95 193 | RecTypeExcel9File recordType = 448 // section 2.4.104 194 | RecTypeRecalcID recordType = 449 // section 2.4.215 195 | RecTypeEntExU2 recordType = 450 // section 2.4.102 196 | RecTypeDimensions recordType = 512 // section 2.4.90 197 | RecTypeBlank recordType = 513 // section 2.4.20 198 | RecTypeNumber recordType = 515 // section 2.4.180 199 | RecTypeLabel recordType = 516 // section 2.4.148 200 | RecTypeBoolErr recordType = 517 // section 2.4.24 201 | RecTypeString recordType = 519 // section 2.4.268 202 | RecTypeRow recordType = 520 // section 2.4.221 203 | RecTypeIndex recordType = 523 // section 2.4.144 204 | RecTypeArray recordType = 545 // section 2.4.4 205 | RecTypeDefaultRowHeight recordType = 549 // section 2.4.87 206 | RecTypeTable recordType = 566 // section 2.4.319 207 | RecTypeWindow2 recordType = 574 // section 2.4.346 208 | RecTypeRK recordType = 638 // section 2.4.220 209 | RecTypeStyle recordType = 659 // section 2.4.269 210 | RecTypeBigName recordType = 1048 // section 2.4.18 211 | RecTypeFormat recordType = 1054 // section 2.4.126 212 | RecTypeContinueBigName recordType = 1084 // section 2.4.59 213 | RecTypeShrFmla recordType = 1212 // section 2.4.260 214 | RecTypeHLinkTooltip recordType = 2048 // section 2.4.141 215 | RecTypeWebPub recordType = 2049 // section 2.4.344 216 | RecTypeQsiSXTag recordType = 2050 // section 2.4.211 217 | RecTypeDBQueryExt recordType = 2051 // section 2.4.81 218 | RecTypeExtString recordType = 2052 // section 2.4.108 219 | RecTypeTxtQry recordType = 2053 // section 2.4.330 220 | RecTypeQsir recordType = 2054 // section 2.4.210 221 | RecTypeQsif recordType = 2055 // section 2.4.209 222 | RecTypeRRDTQSIF recordType = 2056 // section 2.4.236 223 | RecTypeBOF recordType = 2057 // section 2.4.21 224 | RecTypeOleDbConn recordType = 2058 // section 2.4.186 225 | RecTypeWOpt recordType = 2059 // section 2.4.348 226 | RecTypeSXViewEx recordType = 2060 // section 2.4.314 227 | RecTypeSXTH recordType = 2061 // section 2.4.308 228 | RecTypeSXPIEx recordType = 2062 // section 2.4.299 229 | RecTypeSXVDTEx recordType = 2063 // section 2.4.311 230 | RecTypeSXViewEx9 recordType = 2064 // section 2.4.315 231 | RecTypeContinueFrt recordType = 2066 // section 2.4.60 232 | RecTypeRealTimeData recordType = 2067 // section 2.4.214 233 | RecTypeChartFrtInfo recordType = 2128 // section 2.4.49 234 | RecTypeFrtWrapper recordType = 2129 // section 2.4.130 235 | RecTypeStartBlock recordType = 2130 // section 2.4.266 236 | RecTypeEndBlock recordType = 2131 // section 2.4.100 237 | RecTypeStartObject recordType = 2132 // section 2.4.267 238 | RecTypeEndObject recordType = 2133 // section 2.4.101 239 | RecTypeCatLab recordType = 2134 // section 2.4.38 240 | RecTypeYMult recordType = 2135 // section 2.4.356 241 | RecTypeSXViewLink recordType = 2136 // section 2.4.316 242 | RecTypePivotChartBits recordType = 2137 // section 2.4.196 243 | RecTypeFrtFontList recordType = 2138 // section 2.4.129 244 | RecTypeSheetExt recordType = 2146 // section 2.4.259 245 | RecTypeBookExt recordType = 2147 // section 2.4.23 246 | RecTypeSXAddl recordType = 2148 // section 2.4.273.2 247 | RecTypeCrErr recordType = 2149 // section 2.4.64 248 | RecTypeHFPicture recordType = 2150 // section 2.4.138 249 | RecTypeFeatHdr recordType = 2151 // section 2.4.112 250 | RecTypeFeat recordType = 2152 // section 2.4.111 251 | RecTypeDataLabExt recordType = 2154 // section 2.4.75 252 | RecTypeDataLabExtContents recordType = 2155 // section 2.4.76 253 | RecTypeCellWatch recordType = 2156 // section 2.4.41 254 | RecTypeFeatHdr11 recordType = 2161 // section 2.4.113 255 | RecTypeFeature11 recordType = 2162 // section 2.4.114 256 | RecTypeDropDownObjIds recordType = 2164 // section 2.4.93 257 | RecTypeContinueFrt11 recordType = 2165 // section 2.4.61 258 | RecTypeDConn recordType = 2166 // section 2.4.84 259 | RecTypeList12 recordType = 2167 // section 2.4.157 260 | RecTypeFeature12 recordType = 2168 // section 2.4.115 261 | RecTypeCondFmt12 recordType = 2169 // section 2.4.57 262 | RecTypeCF12 recordType = 2170 // section 2.4.43 263 | RecTypeCFEx recordType = 2171 // section 2.4.44 264 | RecTypeXFCRC recordType = 2172 // section 2.4.354 265 | RecTypeXFExt recordType = 2173 // section 2.4.355 266 | RecTypeAutoFilter12 recordType = 2174 // section 2.4.7 267 | RecTypeContinueFrt12 recordType = 2175 // section 2.4.62 268 | RecTypeMDTInfo recordType = 2180 // section 2.4.162 269 | RecTypeMDXStr recordType = 2181 // section 2.4.166 270 | RecTypeMDXTuple recordType = 2182 // section 2.4.167 271 | RecTypeMDXSet recordType = 2183 // section 2.4.165 272 | RecTypeMDXProp recordType = 2184 // section 2.4.164 273 | RecTypeMDXKPI recordType = 2185 // section 2.4.163 274 | RecTypeMDB recordType = 2186 // section 2.4.161 275 | RecTypePLV recordType = 2187 // section 2.4.200 276 | RecTypeCompat12 recordType = 2188 // section 2.4.54 277 | RecTypeDXF recordType = 2189 // section 2.4.97 278 | RecTypeTableStyles recordType = 2190 // section 2.4.322 279 | RecTypeTableStyle recordType = 2191 // section 2.4.320 280 | RecTypeTableStyleElement recordType = 2192 // section 2.4.321 281 | RecTypeStyleExt recordType = 2194 // section 2.4.270 282 | RecTypeNamePublish recordType = 2195 // section 2.4.178 283 | RecTypeNameCmt recordType = 2196 // section 2.4.176 284 | RecTypeSortData recordType = 2197 // section 2.4.264 285 | RecTypeTheme recordType = 2198 // section 2.4.326 286 | RecTypeGUIDTypeLib recordType = 2199 // section 2.4.133 287 | RecTypeFnGrp12 recordType = 2200 // section 2.4.121 288 | RecTypeNameFnGrp12 recordType = 2201 // section 2.4.177 289 | RecTypeMTRSettings recordType = 2202 // section 2.4.173 290 | RecTypeCompressPictures recordType = 2203 // section 2.4.55 291 | RecTypeHeaderFooter recordType = 2204 // section 2.4.137 292 | RecTypeCrtLayout12 recordType = 2205 // section 2.4.66 293 | RecTypeCrtMlFrt recordType = 2206 // section 2.4.70 294 | RecTypeCrtMlFrtContinue recordType = 2207 // section 2.4.71 295 | RecTypeForceFullCalculation recordType = 2211 // section 2.4.125 296 | RecTypeShapePropsStream recordType = 2212 // section 2.4.258 297 | RecTypeTextPropsStream recordType = 2213 // section 2.4.325 298 | RecTypeRichTextStream recordType = 2214 // section 2.4.218 299 | RecTypeCrtLayout12A recordType = 2215 // section 2.4.67 300 | RecTypeUnits recordType = 4097 // section 2.4.332 301 | RecTypeChart recordType = 4098 // section 2.4.45 302 | RecTypeSeries recordType = 4099 // section 2.4.252 303 | RecTypeDataFormat recordType = 4102 // section 2.4.74 304 | RecTypeLineFormat recordType = 4103 // section 2.4.156 305 | RecTypeMarkerFormat recordType = 4105 // section 2.4.160 306 | RecTypeAreaFormat recordType = 4106 // section 2.4.3 307 | RecTypePieFormat recordType = 4107 // section 2.4.195 308 | RecTypeAttachedLabel recordType = 4108 // section 2.4.5 309 | RecTypeSeriesText recordType = 4109 // section 2.4.254 310 | RecTypeChartFormat recordType = 4116 // section 2.4.48 311 | RecTypeLegend recordType = 4117 // section 2.4.152 312 | RecTypeSeriesList recordType = 4118 // section 2.4.253 313 | RecTypeBar recordType = 4119 // section 2.4.15 314 | RecTypeLine recordType = 4120 // section 2.4.155 315 | RecTypePie recordType = 4121 // section 2.4.194 316 | RecTypeArea recordType = 4122 // section 2.4.2 317 | RecTypeScatter recordType = 4123 // section 2.4.243 318 | RecTypeCrtLine recordType = 4124 // section 2.4.68 319 | RecTypeAxis recordType = 4125 // section 2.4.11 320 | RecTypeTick recordType = 4126 // section 2.4.327 321 | RecTypeValueRange recordType = 4127 // section 2.4.341 322 | RecTypeCatSerRange recordType = 4128 // section 2.4.39 323 | RecTypeAxisLine recordType = 4129 // section 2.4.12 324 | RecTypeCrtLink recordType = 4130 // section 2.4.69 325 | RecTypeDefaultText recordType = 4132 // section 2.4.88 326 | RecTypeText recordType = 4133 // section 2.4.324 327 | RecTypeFontX recordType = 4134 // section 2.4.123 328 | RecTypeObjectLink recordType = 4135 // section 2.4.182 329 | RecTypeFrame recordType = 4146 // section 2.4.128 330 | RecTypeBegin recordType = 4147 // section 2.4.17 331 | RecTypeEnd recordType = 4148 // section 2.4.99 332 | RecTypePlotArea recordType = 4149 // section 2.4.197 333 | RecTypeChart3d recordType = 4154 // section 2.4.46 334 | RecTypePicF recordType = 4156 // section 2.4.193 335 | RecTypeDropBar recordType = 4157 // section 2.4.92 336 | RecTypeRadar recordType = 4158 // section 2.4.212 337 | RecTypeSurf recordType = 4159 // section 2.4.272 338 | RecTypeRadarArea recordType = 4160 // section 2.4.213 339 | RecTypeAxisParent recordType = 4161 // section 2.4.13 340 | RecTypeLegendException recordType = 4163 // section 2.4.153( 341 | RecTypeShtProps recordType = 4164 // section 2.4.261 342 | RecTypeSerToCrt recordType = 4165 // section 2.4.256 343 | RecTypeAxesUsed recordType = 4166 // section 2.4.10 344 | RecTypeSBaseRef recordType = 4168 // section 2.4.242 345 | RecTypeSerParent recordType = 4170 // section 2.4.255 346 | RecTypeSerAuxTrend recordType = 4171 // section 2.4.250 347 | RecTypeIFmtRecord recordType = 4174 // section 2.4.143 348 | RecTypePos recordType = 4175 // section 2.4.201 349 | RecTypeAlRuns recordType = 4176 // section 2.4.1 350 | RecTypeBRAI recordType = 4177 // section 2.4.29 351 | RecTypeSerAuxErrBar recordType = 4187 // section 2.4.249 352 | RecTypeClrtClient recordType = 4188 // section 2.4.50 353 | RecTypeSerFmt recordType = 4189 // section 2.4.251 354 | RecTypeChart3DBarShape recordType = 4191 // section 2.4.47 355 | RecTypeFbi recordType = 4192 // section 2.4.109 356 | RecTypeBopPop recordType = 4193 // section 2.4.25 357 | RecTypeAxcExt recordType = 4194 // section 2.4.9 358 | RecTypeDat recordType = 4195 // section 2.4.73 359 | RecTypePlotGrowth recordType = 4196 // section 2.4.198 360 | RecTypeSIIndex recordType = 4197 // section 2.4.262 361 | RecTypeGelFrame recordType = 4198 // section 2.4.131 362 | RecTypeBopPopCustom recordType = 4199 // section 2.4.26 363 | RecTypeFbi2 recordType = 4200 // section 2.4.110 364 | ) 365 | 366 | func (r recordType) String() string { 367 | switch r { 368 | case RecTypeFormula: 369 | return "Formula (6)" 370 | case RecTypeEOF: 371 | return "EOF (10)" 372 | case RecTypeCalcCount: 373 | return "CalcCount (12)" 374 | case RecTypeCalcMode: 375 | return "CalcMode (13)" 376 | case RecTypeCalcPrecision: 377 | return "CalcPrecision (14)" 378 | case RecTypeCalcRefMode: 379 | return "CalcRefMode (15)" 380 | case RecTypeCalcDelta: 381 | return "CalcDelta (16)" 382 | case RecTypeCalcIter: 383 | return "CalcIter (17)" 384 | case RecTypeProtect: 385 | return "Protect (18)" 386 | case RecTypePassword: 387 | return "Password (19)" 388 | case RecTypeHeader: 389 | return "Header (20)" 390 | case RecTypeFooter: 391 | return "Footer (21)" 392 | case RecTypeExternSheet: 393 | return "ExternSheet (23)" 394 | case RecTypeLbl: 395 | return "Lbl (24)" 396 | case RecTypeWinProtect: 397 | return "WinProtect (25)" 398 | case RecTypeVerticalPageBreaks: 399 | return "VerticalPageBreaks (26)" 400 | case RecTypeHorizontalPageBreaks: 401 | return "HorizontalPageBreaks (27)" 402 | case RecTypeNote: 403 | return "Note (28)" 404 | case RecTypeSelection: 405 | return "Selection (29)" 406 | case RecTypeDate1904: 407 | return "Date1904 (34)" 408 | case RecTypeExternName: 409 | return "ExternName (35)" 410 | case RecTypeLeftMargin: 411 | return "LeftMargin (38)" 412 | case RecTypeRightMargin: 413 | return "RightMargin (39)" 414 | case RecTypeTopMargin: 415 | return "TopMargin (40)" 416 | case RecTypeBottomMargin: 417 | return "BottomMargin (41)" 418 | case RecTypePrintRowCol: 419 | return "PrintRowCol (42)" 420 | case RecTypePrintGrid: 421 | return "PrintGrid (43)" 422 | case RecTypeFilePass: 423 | return "FilePass (47)" 424 | case RecTypeFont: 425 | return "Font (49)" 426 | case RecTypePrintSize: 427 | return "PrintSize (51)" 428 | case RecTypeContinue: 429 | return "Continue (60)" 430 | case RecTypeWindow1: 431 | return "Window1 (61)" 432 | case RecTypeBackup: 433 | return "Backup (64)" 434 | case RecTypePane: 435 | return "Pane (65)" 436 | case RecTypeCodePage: 437 | return "CodePage (66)" 438 | case RecTypePls: 439 | return "Pls (77)" 440 | case RecTypeDCon: 441 | return "DCon (80)" 442 | case RecTypeDConRef: 443 | return "DConRef (81)" 444 | case RecTypeDConName: 445 | return "DConName (82)" 446 | case RecTypeDefColWidth: 447 | return "DefColWidth (85)" 448 | case RecTypeXCT: 449 | return "XCT (89)" 450 | case RecTypeCRN: 451 | return "CRN (90)" 452 | case RecTypeFileSharing: 453 | return "FileSharing (91)" 454 | case RecTypeWriteAccess: 455 | return "WriteAccess (92)" 456 | case RecTypeObj: 457 | return "Obj (93)" 458 | case RecTypeUncalced: 459 | return "Uncalced (94)" 460 | case RecTypeCalcSaveRecalc: 461 | return "CalcSaveRecalc (95)" 462 | case RecTypeTemplate: 463 | return "Template (96)" 464 | case RecTypeIntl: 465 | return "Intl (97)" 466 | case RecTypeObjProtect: 467 | return "ObjProtect (99)" 468 | case RecTypeColInfo: 469 | return "ColInfo (125)" 470 | case RecTypeGuts: 471 | return "Guts (128)" 472 | case RecTypeWsBool: 473 | return "WsBool (129)" 474 | case RecTypeGridSet: 475 | return "GridSet (130)" 476 | case RecTypeHCenter: 477 | return "HCenter (131)" 478 | case RecTypeVCenter: 479 | return "VCenter (132)" 480 | case RecTypeBoundSheet8: 481 | return "BoundSheet8 (133)" 482 | case RecTypeWriteProtect: 483 | return "WriteProtect (134)" 484 | case RecTypeCountry: 485 | return "Country (140)" 486 | case RecTypeHideObj: 487 | return "HideObj (141)" 488 | case RecTypeSort: 489 | return "Sort (144)" 490 | case RecTypePalette: 491 | return "Palette (146)" 492 | case RecTypeSync: 493 | return "Sync (151)" 494 | case RecTypeLPr: 495 | return "LPr (152)" 496 | case RecTypeDxGCol: 497 | return "DxGCol (153)" 498 | case RecTypeFnGroupName: 499 | return "FnGroupName (154)" 500 | case RecTypeFilterMode: 501 | return "FilterMode (155)" 502 | case RecTypeBuiltInFnGroupCount: 503 | return "BuiltInFnGroupCount (156)" 504 | case RecTypeAutoFilterInfo: 505 | return "AutoFilterInfo (157)" 506 | case RecTypeAutoFilter: 507 | return "AutoFilter (158)" 508 | case RecTypeScl: 509 | return "Scl (160)" 510 | case RecTypeSetup: 511 | return "Setup (161)" 512 | case RecTypeScenMan: 513 | return "ScenMan (174)" 514 | case RecTypeSCENARIO: 515 | return "SCENARIO (175)" 516 | case RecTypeSxView: 517 | return "SxView (176)" 518 | case RecTypeSxvd: 519 | return "Sxvd (177)" 520 | case RecTypeSXVI: 521 | return "SXVI (178)" 522 | case RecTypeSxIvd: 523 | return "SxIvd (180)" 524 | case RecTypeSXLI: 525 | return "SXLI (181)" 526 | case RecTypeSXPI: 527 | return "SXPI (182)" 528 | case RecTypeDocRoute: 529 | return "DocRoute (184)" 530 | case RecTypeRecipName: 531 | return "RecipName (185)" 532 | case RecTypeMulRk: 533 | return "MulRk (189)" 534 | case RecTypeMulBlank: 535 | return "MulBlank (190)" 536 | case RecTypeMms: 537 | return "Mms (193)" 538 | case RecTypeSXDI: 539 | return "SXDI (197)" 540 | case RecTypeSXDB: 541 | return "SXDB (198)" 542 | case RecTypeSXFDB: 543 | return "SXFDB (199)" 544 | case RecTypeSXDBB: 545 | return "SXDBB (200)" 546 | case RecTypeSXNum: 547 | return "SXNum (201)" 548 | case RecTypeSxBool: 549 | return "SxBool (202)" 550 | case RecTypeSxErr: 551 | return "SxErr (203)" 552 | case RecTypeSXInt: 553 | return "SXInt (204)" 554 | case RecTypeSXString: 555 | return "SXString (205)" 556 | case RecTypeSXDtr: 557 | return "SXDtr (206)" 558 | case RecTypeSxNil: 559 | return "SxNil (207)" 560 | case RecTypeSXTbl: 561 | return "SXTbl (208)" 562 | case RecTypeSXTBRGIITM: 563 | return "SXTBRGIITM (209)" 564 | case RecTypeSxTbpg: 565 | return "SxTbpg (210)" 566 | case RecTypeObProj: 567 | return "ObProj (211)" 568 | case RecTypeSXStreamID: 569 | return "SXStreamID (213)" 570 | case RecTypeDBCell: 571 | return "DBCell (215)" 572 | case RecTypeSXRng: 573 | return "SXRng (216)" 574 | case RecTypeSxIsxoper: 575 | return "SxIsxoper (217)" 576 | case RecTypeBookBool: 577 | return "BookBool (218)" 578 | case RecTypeDbOrParamQry: 579 | return "DbOrParamQry (220)" 580 | case RecTypeScenarioProtect: 581 | return "ScenarioProtect (221)" 582 | case RecTypeOleObjectSize: 583 | return "OleObjectSize (222)" 584 | case RecTypeXF: 585 | return "XF (224)" 586 | case RecTypeInterfaceHdr: 587 | return "InterfaceHdr (225)" 588 | case RecTypeInterfaceEnd: 589 | return "InterfaceEnd (226)" 590 | case RecTypeSXVS: 591 | return "SXVS (227)" 592 | case RecTypeMergeCells: 593 | return "MergeCells (229)" 594 | case RecTypeBkHim: 595 | return "BkHim (233)" 596 | case RecTypeMsoDrawingGroup: 597 | return "MsoDrawingGroup (235)" 598 | case RecTypeMsoDrawing: 599 | return "MsoDrawing (236)" 600 | case RecTypeMsoDrawingSelection: 601 | return "MsoDrawingSelection (237)" 602 | case RecTypePhoneticInfo: 603 | return "PhoneticInfo (239)" 604 | case RecTypeSxRule: 605 | return "SxRule (240)" 606 | case RecTypeSXEx: 607 | return "SXEx (241)" 608 | case RecTypeSxFilt: 609 | return "SxFilt (242)" 610 | case RecTypeSxDXF: 611 | return "SxDXF (244)" 612 | case RecTypeSxItm: 613 | return "SxItm (245)" 614 | case RecTypeSxName: 615 | return "SxName (246)" 616 | case RecTypeSxSelect: 617 | return "SxSelect (247)" 618 | case RecTypeSXPair: 619 | return "SXPair (248)" 620 | case RecTypeSxFmla: 621 | return "SxFmla (249)" 622 | case RecTypeSxFormat: 623 | return "SxFormat (251)" 624 | case RecTypeSST: 625 | return "SST (252)" 626 | case RecTypeLabelSst: 627 | return "LabelSst (253)" 628 | case RecTypeExtSST: 629 | return "ExtSST (255)" 630 | case RecTypeSXVDEx: 631 | return "SXVDEx (256)" 632 | case RecTypeSXFormula: 633 | return "SXFormula (259)" 634 | case RecTypeSXDBEx: 635 | return "SXDBEx (290)" 636 | case RecTypeRRDInsDel: 637 | return "RRDInsDel (311)" 638 | case RecTypeRRDHead: 639 | return "RRDHead (312)" 640 | case RecTypeRRDChgCell: 641 | return "RRDChgCell (315)" 642 | case RecTypeRRTabID: 643 | return "RRTabID (317)" 644 | case RecTypeRRDRenSheet: 645 | return "RRDRenSheet (318)" 646 | case RecTypeRRSort: 647 | return "RRSort (319)" 648 | case RecTypeRRDMove: 649 | return "RRDMove (320)" 650 | case RecTypeRRFormat: 651 | return "RRFormat (330)" 652 | case RecTypeRRAutoFmt: 653 | return "RRAutoFmt (331)" 654 | case RecTypeRRInsertSh: 655 | return "RRInsertSh (333)" 656 | case RecTypeRRDMoveBegin: 657 | return "RRDMoveBegin (334)" 658 | case RecTypeRRDMoveEnd: 659 | return "RRDMoveEnd (335)" 660 | case RecTypeRRDInsDelBegin: 661 | return "RRDInsDelBegin (336)" 662 | case RecTypeRRDInsDelEnd: 663 | return "RRDInsDelEnd (337)" 664 | case RecTypeRRDConflict: 665 | return "RRDConflict (338)" 666 | case RecTypeRRDDefName: 667 | return "RRDDefName (339)" 668 | case RecTypeRRDRstEtxp: 669 | return "RRDRstEtxp (340)" 670 | case RecTypeLRng: 671 | return "LRng (351)" 672 | case RecTypeUsesELFs: 673 | return "UsesELFs (352)" 674 | case RecTypeDSF: 675 | return "DSF (353)" 676 | case RecTypeCUsr: 677 | return "CUsr (401)" 678 | case RecTypeCbUsr: 679 | return "CbUsr (402)" 680 | case RecTypeUsrInfo: 681 | return "UsrInfo (403)" 682 | case RecTypeUsrExcl: 683 | return "UsrExcl (404)" 684 | case RecTypeFileLock: 685 | return "FileLock (405)" 686 | case RecTypeRRDInfo: 687 | return "RRDInfo (406)" 688 | case RecTypeBCUsrs: 689 | return "BCUsrs (407)" 690 | case RecTypeUsrChk: 691 | return "UsrChk (408)" 692 | case RecTypeUserBView: 693 | return "UserBView (425)" 694 | case RecTypeUserSViewBegin: 695 | return "UserSViewBegin[Chart] (426)" 696 | case RecTypeUserSViewEnd: 697 | return "UserSViewEnd (427)" 698 | case RecTypeRRDUserView: 699 | return "RRDUserView (428)" 700 | case RecTypeQsi: 701 | return "Qsi (429)" 702 | case RecTypeSupBook: 703 | return "SupBook (430)" 704 | case RecTypeProt4Rev: 705 | return "Prot4Rev (431)" 706 | case RecTypeCondFmt: 707 | return "CondFmt (432)" 708 | case RecTypeCF: 709 | return "CF (433)" 710 | case RecTypeDVal: 711 | return "DVal (434)" 712 | case RecTypeDConBin: 713 | return "DConBin (437)" 714 | case RecTypeTxO: 715 | return "TxO (438)" 716 | case RecTypeRefreshAll: 717 | return "RefreshAll (439)" 718 | case RecTypeHLink: 719 | return "HLink (440)" 720 | case RecTypeLel: 721 | return "Lel (441)" 722 | case RecTypeCodeName: 723 | return "CodeName (442)" 724 | case RecTypeSXFDBType: 725 | return "SXFDBType (443)" 726 | case RecTypeProt4RevPass: 727 | return "Prot4RevPass (444)" 728 | case RecTypeObNoMacros: 729 | return "ObNoMacros (445)" 730 | case RecTypeDv: 731 | return "Dv (446)" 732 | case RecTypeExcel9File: 733 | return "Excel9File (448)" 734 | case RecTypeRecalcID: 735 | return "RecalcID (449)" 736 | case RecTypeEntExU2: 737 | return "EntExU2 (450)" 738 | case RecTypeDimensions: 739 | return "Dimensions (512)" 740 | case RecTypeBlank: 741 | return "Blank (513)" 742 | case RecTypeNumber: 743 | return "Number (515)" 744 | case RecTypeLabel: 745 | return "Label (516)" 746 | case RecTypeBoolErr: 747 | return "BoolErr (517)" 748 | case RecTypeString: 749 | return "String (519)" 750 | case RecTypeRow: 751 | return "Row (520)" 752 | case RecTypeIndex: 753 | return "Index (523)" 754 | case RecTypeArray: 755 | return "Array (545)" 756 | case RecTypeDefaultRowHeight: 757 | return "DefaultRowHeight (549)" 758 | case RecTypeTable: 759 | return "Table (566)" 760 | case RecTypeWindow2: 761 | return "Window2 (574)" 762 | case RecTypeRK: 763 | return "RK (638)" 764 | case RecTypeStyle: 765 | return "Style (659)" 766 | case RecTypeBigName: 767 | return "BigName (1048)" 768 | case RecTypeFormat: 769 | return "Format (1054)" 770 | case RecTypeContinueBigName: 771 | return "ContinueBigName (1084)" 772 | case RecTypeShrFmla: 773 | return "ShrFmla (1212)" 774 | case RecTypeHLinkTooltip: 775 | return "HLinkTooltip (2048)" 776 | case RecTypeWebPub: 777 | return "WebPub (2049)" 778 | case RecTypeQsiSXTag: 779 | return "QsiSXTag (2050)" 780 | case RecTypeDBQueryExt: 781 | return "DBQueryExt (2051)" 782 | case RecTypeExtString: 783 | return "ExtString (2052)" 784 | case RecTypeTxtQry: 785 | return "TxtQry (2053)" 786 | case RecTypeQsir: 787 | return "Qsir (2054)" 788 | case RecTypeQsif: 789 | return "Qsif (2055)" 790 | case RecTypeRRDTQSIF: 791 | return "RRDTQSIF (2056)" 792 | case RecTypeBOF: 793 | return "BOF (2057)" 794 | case RecTypeOleDbConn: 795 | return "OleDbConn (2058)" 796 | case RecTypeWOpt: 797 | return "WOpt (2059)" 798 | case RecTypeSXViewEx: 799 | return "SXViewEx (2060)" 800 | case RecTypeSXTH: 801 | return "SXTH (2061)" 802 | case RecTypeSXPIEx: 803 | return "SXPIEx (2062)" 804 | case RecTypeSXVDTEx: 805 | return "SXVDTEx (2063)" 806 | case RecTypeSXViewEx9: 807 | return "SXViewEx9 (2064)" 808 | case RecTypeContinueFrt: 809 | return "ContinueFrt (2066)" 810 | case RecTypeRealTimeData: 811 | return "RealTimeData (2067)" 812 | case RecTypeChartFrtInfo: 813 | return "ChartFrtInfo (2128)" 814 | case RecTypeFrtWrapper: 815 | return "FrtWrapper (2129)" 816 | case RecTypeStartBlock: 817 | return "StartBlock (2130)" 818 | case RecTypeEndBlock: 819 | return "EndBlock (2131)" 820 | case RecTypeStartObject: 821 | return "StartObject (2132)" 822 | case RecTypeEndObject: 823 | return "EndObject (2133)" 824 | case RecTypeCatLab: 825 | return "CatLab (2134)" 826 | case RecTypeYMult: 827 | return "YMult (2135)" 828 | case RecTypeSXViewLink: 829 | return "SXViewLink (2136)" 830 | case RecTypePivotChartBits: 831 | return "PivotChartBits (2137)" 832 | case RecTypeFrtFontList: 833 | return "FrtFontList (2138)" 834 | case RecTypeSheetExt: 835 | return "SheetExt (2146)" 836 | case RecTypeBookExt: 837 | return "BookExt (2147)" 838 | case RecTypeSXAddl: 839 | return "SXAddl (2148)" 840 | case RecTypeCrErr: 841 | return "CrErr (2149)" 842 | case RecTypeHFPicture: 843 | return "HFPicture (2150)" 844 | case RecTypeFeatHdr: 845 | return "FeatHdr (2151)" 846 | case RecTypeFeat: 847 | return "Feat (2152)" 848 | case RecTypeDataLabExt: 849 | return "DataLabExt (2154)" 850 | case RecTypeDataLabExtContents: 851 | return "DataLabExtContents (2155)" 852 | case RecTypeCellWatch: 853 | return "CellWatch (2156)" 854 | case RecTypeFeatHdr11: 855 | return "FeatHdr11 (2161)" 856 | case RecTypeFeature11: 857 | return "Feature11 (2162)" 858 | case RecTypeDropDownObjIds: 859 | return "DropDownObjIds (2164)" 860 | case RecTypeContinueFrt11: 861 | return "ContinueFrt11 (2165)" 862 | case RecTypeDConn: 863 | return "DConn (2166)" 864 | case RecTypeList12: 865 | return "List12 (2167)" 866 | case RecTypeFeature12: 867 | return "Feature12 (2168)" 868 | case RecTypeCondFmt12: 869 | return "CondFmt12 (2169)" 870 | case RecTypeCF12: 871 | return "CF12 (2170)" 872 | case RecTypeCFEx: 873 | return "CFEx (2171)" 874 | case RecTypeXFCRC: 875 | return "XFCRC (2172)" 876 | case RecTypeXFExt: 877 | return "XFExt (2173)" 878 | case RecTypeAutoFilter12: 879 | return "AutoFilter12 (2174)" 880 | case RecTypeContinueFrt12: 881 | return "ContinueFrt12 (2175)" 882 | case RecTypeMDTInfo: 883 | return "MDTInfo (2180)" 884 | case RecTypeMDXStr: 885 | return "MDXStr (2181)" 886 | case RecTypeMDXTuple: 887 | return "MDXTuple (2182)" 888 | case RecTypeMDXSet: 889 | return "MDXSet (2183)" 890 | case RecTypeMDXProp: 891 | return "MDXProp (2184)" 892 | case RecTypeMDXKPI: 893 | return "MDXKPI (2185)" 894 | case RecTypeMDB: 895 | return "MDB (2186)" 896 | case RecTypePLV: 897 | return "PLV (2187)" 898 | case RecTypeCompat12: 899 | return "Compat12 (2188)" 900 | case RecTypeDXF: 901 | return "DXF (2189)" 902 | case RecTypeTableStyles: 903 | return "TableStyles (2190)" 904 | case RecTypeTableStyle: 905 | return "TableStyle (2191)" 906 | case RecTypeTableStyleElement: 907 | return "TableStyleElement (2192)" 908 | case RecTypeStyleExt: 909 | return "StyleExt (2194)" 910 | case RecTypeNamePublish: 911 | return "NamePublish (2195)" 912 | case RecTypeNameCmt: 913 | return "NameCmt (2196)" 914 | case RecTypeSortData: 915 | return "SortData (2197)" 916 | case RecTypeTheme: 917 | return "Theme (2198)" 918 | case RecTypeGUIDTypeLib: 919 | return "GUIDTypeLib (2199)" 920 | case RecTypeFnGrp12: 921 | return "FnGrp12 (2200)" 922 | case RecTypeNameFnGrp12: 923 | return "NameFnGrp12 (2201)" 924 | case RecTypeMTRSettings: 925 | return "MTRSettings (2202)" 926 | case RecTypeCompressPictures: 927 | return "CompressPictures (2203)" 928 | case RecTypeHeaderFooter: 929 | return "HeaderFooter (2204)" 930 | case RecTypeCrtLayout12: 931 | return "CrtLayout12 (2205)" 932 | case RecTypeCrtMlFrt: 933 | return "CrtMlFrt (2206)" 934 | case RecTypeCrtMlFrtContinue: 935 | return "CrtMlFrtContinue (2207)" 936 | case RecTypeForceFullCalculation: 937 | return "ForceFullCalculation (2211)" 938 | case RecTypeShapePropsStream: 939 | return "ShapePropsStream (2212)" 940 | case RecTypeTextPropsStream: 941 | return "TextPropsStream (2213)" 942 | case RecTypeRichTextStream: 943 | return "RichTextStream (2214)" 944 | case RecTypeCrtLayout12A: 945 | return "CrtLayout12A (2215)" 946 | case RecTypeUnits: 947 | return "Units (4097)" 948 | case RecTypeChart: 949 | return "Chart (4098)" 950 | case RecTypeSeries: 951 | return "Series (4099)" 952 | case RecTypeDataFormat: 953 | return "DataFormat (4102)" 954 | case RecTypeLineFormat: 955 | return "LineFormat (4103)" 956 | case RecTypeMarkerFormat: 957 | return "MarkerFormat (4105)" 958 | case RecTypeAreaFormat: 959 | return "AreaFormat (4106)" 960 | case RecTypePieFormat: 961 | return "PieFormat (4107)" 962 | case RecTypeAttachedLabel: 963 | return "AttachedLabel (4108)" 964 | case RecTypeSeriesText: 965 | return "SeriesText (4109)" 966 | case RecTypeChartFormat: 967 | return "ChartFormat (4116)" 968 | case RecTypeLegend: 969 | return "Legend (4117)" 970 | case RecTypeSeriesList: 971 | return "SeriesList (4118)" 972 | case RecTypeBar: 973 | return "Bar (4119)" 974 | case RecTypeLine: 975 | return "Line (4120)" 976 | case RecTypePie: 977 | return "Pie (4121)" 978 | case RecTypeArea: 979 | return "Area (4122)" 980 | case RecTypeScatter: 981 | return "Scatter (4123)" 982 | case RecTypeCrtLine: 983 | return "CrtLine (4124)" 984 | case RecTypeAxis: 985 | return "Axis (4125)" 986 | case RecTypeTick: 987 | return "Tick (4126)" 988 | case RecTypeValueRange: 989 | return "ValueRange (4127)" 990 | case RecTypeCatSerRange: 991 | return "CatSerRange (4128)" 992 | case RecTypeAxisLine: 993 | return "AxisLine (4129)" 994 | case RecTypeCrtLink: 995 | return "CrtLink (4130)" 996 | case RecTypeDefaultText: 997 | return "DefaultText (4132)" 998 | case RecTypeText: 999 | return "Text (4133)" 1000 | case RecTypeFontX: 1001 | return "FontX (4134)" 1002 | case RecTypeObjectLink: 1003 | return "ObjectLink (4135)" 1004 | case RecTypeFrame: 1005 | return "Frame (4146)" 1006 | case RecTypeBegin: 1007 | return "Begin (4147)" 1008 | case RecTypeEnd: 1009 | return "End (4148)" 1010 | case RecTypePlotArea: 1011 | return "PlotArea (4149)" 1012 | case RecTypeChart3d: 1013 | return "Chart3d (4154)" 1014 | case RecTypePicF: 1015 | return "PicF (4156)" 1016 | case RecTypeDropBar: 1017 | return "DropBar (4157)" 1018 | case RecTypeRadar: 1019 | return "Radar (4158)" 1020 | case RecTypeSurf: 1021 | return "Surf (4159)" 1022 | case RecTypeRadarArea: 1023 | return "RadarArea (4160)" 1024 | case RecTypeAxisParent: 1025 | return "AxisParent (4161)" 1026 | case RecTypeLegendException: 1027 | return "LegendException (4163)" 1028 | case RecTypeShtProps: 1029 | return "ShtProps (4164)" 1030 | case RecTypeSerToCrt: 1031 | return "SerToCrt (4165)" 1032 | case RecTypeAxesUsed: 1033 | return "AxesUsed (4166)" 1034 | case RecTypeSBaseRef: 1035 | return "SBaseRef (4168)" 1036 | case RecTypeSerParent: 1037 | return "SerParent (4170)" 1038 | case RecTypeSerAuxTrend: 1039 | return "SerAuxTrend (4171)" 1040 | case RecTypeIFmtRecord: 1041 | return "IFmtRecord (4174)" 1042 | case RecTypePos: 1043 | return "Pos (4175)" 1044 | case RecTypeAlRuns: 1045 | return "AlRuns (4176)" 1046 | case RecTypeBRAI: 1047 | return "BRAI (4177)" 1048 | case RecTypeSerAuxErrBar: 1049 | return "SerAuxErrBar (4187)" 1050 | case RecTypeClrtClient: 1051 | return "ClrtClient (4188)" 1052 | case RecTypeSerFmt: 1053 | return "SerFmt (4189)" 1054 | case RecTypeChart3DBarShape: 1055 | return "Chart3DBarShape (4191)" 1056 | case RecTypeFbi: 1057 | return "Fbi (4192)" 1058 | case RecTypeBopPop: 1059 | return "BopPop (4193)" 1060 | case RecTypeAxcExt: 1061 | return "AxcExt (4194)" 1062 | case RecTypeDat: 1063 | return "Dat (4195)" 1064 | case RecTypePlotGrowth: 1065 | return "PlotGrowth (4196)" 1066 | case RecTypeSIIndex: 1067 | return "SIIndex (4197)" 1068 | case RecTypeGelFrame: 1069 | return "GelFrame (4198)" 1070 | case RecTypeBopPopCustom: 1071 | return "BopPopCustom (4199)" 1072 | case RecTypeFbi2: 1073 | return "Fbi2 (4200)" 1074 | } 1075 | return fmt.Sprintf("unknown (%d 0x%x)", uint16(r), uint16(r)) 1076 | } 1077 | --------------------------------------------------------------------------------