├── .credo.exs ├── .formatter.exs ├── .github └── workflows │ └── elixir.yml ├── .gitignore ├── CHANGELOG.md ├── README.md ├── assets ├── logo.png └── logo_hires.png ├── benchmark ├── files │ └── .gitkeep ├── init.exs └── run.exs ├── lib ├── xlsx_reader.ex └── xlsx_reader │ ├── array.ex │ ├── cell.ex │ ├── cell_reference.ex │ ├── conversion.ex │ ├── number.ex │ ├── package.ex │ ├── package_loader.ex │ ├── parsers │ ├── relationships_parser.ex │ ├── shared_strings_parser.ex │ ├── styles_parser.ex │ ├── utils.ex │ ├── workbook_parser.ex │ └── worksheet_parser.ex │ ├── sheet.ex │ ├── styles.ex │ ├── workbook.ex │ └── zip_archive.ex ├── mix.exs ├── mix.lock └── test ├── compatibility_test.exs ├── fixtures ├── cells_missing_attributes.xlsx ├── custom_dates.xlsx ├── google_spreadsheet.xlsx ├── has_formulas.xlsx ├── hidden_sheets.xlsx ├── merged.xlsx ├── not_a_zip.zip ├── omitted_row.xlsx ├── package │ ├── [Content_Types].xml │ ├── _rels │ │ └── .rels │ ├── docProps │ │ ├── app.xml │ │ └── core.xml │ └── xl │ │ ├── _rels │ │ └── workbook.xml.rels │ │ ├── sharedStrings.xml │ │ ├── styles.xml │ │ ├── theme │ │ └── theme1.xml │ │ ├── workbook.xml │ │ └── worksheets │ │ ├── _rels │ │ └── sheet3.xml.rels │ │ ├── sheet1.xml │ │ ├── sheet2.xml │ │ ├── sheet3.xml │ │ ├── sheet4.xml │ │ └── sheet5.xml ├── test.xlsx ├── test.zip └── xml │ ├── sharedStringsWithRichText.xml │ ├── sharedStringsWithXmlSpacePreserve.xml │ ├── worksheetWithInlineStr.xml │ └── worksheetWithSharedFormulas.xml ├── test_helper.exs ├── xlsx_reader ├── cell_reference_test.exs ├── conversion_test.exs ├── custom_dates_test.exs ├── package_test.exs ├── parsers │ ├── relationships_parser_test.exs │ ├── shared_strings_parser_test.exs │ ├── styles_parser_test.exs │ ├── utils_test.exs │ ├── workbook_parser_test.exs │ └── worksheet_parser_test.exs ├── styles_test.exs └── zip_archive_test.exs └── xlsx_reader_test.exs /.credo.exs: -------------------------------------------------------------------------------- 1 | # This file contains the configuration for Credo and you are probably reading 2 | # this after creating it with `mix credo.gen.config`. 3 | # 4 | # If you find anything wrong or unclear in this file, please report an 5 | # issue on GitHub: https://github.com/rrrene/credo/issues 6 | # 7 | %{ 8 | # 9 | # You can have as many configs as you like in the `configs:` field. 10 | configs: [ 11 | %{ 12 | # 13 | # Run any exec using `mix credo -C `. If no exec name is given 14 | # "default" is used. 15 | # 16 | name: "default", 17 | # 18 | # These are the files included in the analysis: 19 | files: %{ 20 | # 21 | # You can give explicit globs or simply directories. 22 | # In the latter case `**/*.{ex,exs}` will be used. 23 | # 24 | included: ["lib/", "src/", "test/", "web/", "apps/"], 25 | excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] 26 | }, 27 | # 28 | # Load and configure plugins here: 29 | # 30 | plugins: [], 31 | # 32 | # If you create your own checks, you must specify the source files for 33 | # them here, so they can be loaded by Credo before running the analysis. 34 | # 35 | requires: [], 36 | # 37 | # If you want to enforce a style guide and need a more traditional linting 38 | # experience, you can change `strict` to `true` below: 39 | # 40 | strict: false, 41 | # 42 | # If you want to use uncolored output by default, you can change `color` 43 | # to `false` below: 44 | # 45 | color: true, 46 | # 47 | # You can customize the parameters of any check by adding a second element 48 | # to the tuple. 49 | # 50 | # To disable a check put `false` as second element: 51 | # 52 | # {Credo.Check.Design.DuplicatedCode, false} 53 | # 54 | checks: [ 55 | # 56 | ## Consistency Checks 57 | # 58 | {Credo.Check.Consistency.ExceptionNames, []}, 59 | {Credo.Check.Consistency.LineEndings, []}, 60 | {Credo.Check.Consistency.ParameterPatternMatching, []}, 61 | {Credo.Check.Consistency.SpaceAroundOperators, []}, 62 | {Credo.Check.Consistency.SpaceInParentheses, []}, 63 | {Credo.Check.Consistency.TabsOrSpaces, []}, 64 | 65 | # 66 | ## Design Checks 67 | # 68 | # You can customize the priority of any check 69 | # Priority values are: `low, normal, high, higher` 70 | # 71 | {Credo.Check.Design.AliasUsage, 72 | [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, 73 | # You can also customize the exit_status of each check. 74 | # If you don't want TODO comments to cause `mix credo` to fail, just 75 | # set this value to 0 (zero). 76 | # 77 | {Credo.Check.Design.TagTODO, [exit_status: 2]}, 78 | {Credo.Check.Design.TagFIXME, []}, 79 | 80 | # 81 | ## Readability Checks 82 | # 83 | {Credo.Check.Readability.AliasOrder, []}, 84 | {Credo.Check.Readability.FunctionNames, []}, 85 | {Credo.Check.Readability.LargeNumbers, []}, 86 | {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, 87 | {Credo.Check.Readability.ModuleAttributeNames, []}, 88 | {Credo.Check.Readability.ModuleDoc, []}, 89 | {Credo.Check.Readability.ModuleNames, []}, 90 | {Credo.Check.Readability.ParenthesesInCondition, []}, 91 | {Credo.Check.Readability.ParenthesesOnZeroArityDefs, []}, 92 | {Credo.Check.Readability.PredicateFunctionNames, []}, 93 | {Credo.Check.Readability.PreferImplicitTry, []}, 94 | {Credo.Check.Readability.RedundantBlankLines, []}, 95 | {Credo.Check.Readability.Semicolons, []}, 96 | {Credo.Check.Readability.SpaceAfterCommas, []}, 97 | {Credo.Check.Readability.StringSigils, []}, 98 | {Credo.Check.Readability.TrailingBlankLine, []}, 99 | {Credo.Check.Readability.TrailingWhiteSpace, []}, 100 | # TODO: enable by default in Credo 1.1 101 | {Credo.Check.Readability.UnnecessaryAliasExpansion, false}, 102 | {Credo.Check.Readability.VariableNames, []}, 103 | 104 | # 105 | ## Refactoring Opportunities 106 | # 107 | {Credo.Check.Refactor.CondStatements, []}, 108 | {Credo.Check.Refactor.CyclomaticComplexity, []}, 109 | {Credo.Check.Refactor.FunctionArity, []}, 110 | {Credo.Check.Refactor.LongQuoteBlocks, []}, 111 | {Credo.Check.Refactor.MapInto, false}, 112 | {Credo.Check.Refactor.MatchInCondition, []}, 113 | {Credo.Check.Refactor.NegatedConditionsInUnless, []}, 114 | {Credo.Check.Refactor.NegatedConditionsWithElse, []}, 115 | {Credo.Check.Refactor.Nesting, []}, 116 | {Credo.Check.Refactor.UnlessWithElse, []}, 117 | {Credo.Check.Refactor.WithClauses, []}, 118 | 119 | # 120 | ## Warnings 121 | # 122 | {Credo.Check.Warning.BoolOperationOnSameValues, []}, 123 | {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, 124 | {Credo.Check.Warning.IExPry, []}, 125 | {Credo.Check.Warning.IoInspect, []}, 126 | {Credo.Check.Warning.LazyLogging, false}, 127 | {Credo.Check.Warning.OperationOnSameValues, []}, 128 | {Credo.Check.Warning.OperationWithConstantResult, []}, 129 | {Credo.Check.Warning.RaiseInsideRescue, []}, 130 | {Credo.Check.Warning.UnusedEnumOperation, []}, 131 | {Credo.Check.Warning.UnusedFileOperation, []}, 132 | {Credo.Check.Warning.UnusedKeywordOperation, []}, 133 | {Credo.Check.Warning.UnusedListOperation, []}, 134 | {Credo.Check.Warning.UnusedPathOperation, []}, 135 | {Credo.Check.Warning.UnusedRegexOperation, []}, 136 | {Credo.Check.Warning.UnusedStringOperation, []}, 137 | {Credo.Check.Warning.UnusedTupleOperation, []}, 138 | 139 | # 140 | # Controversial and experimental checks (opt-in, just replace `false` with `[]`) 141 | # 142 | {Credo.Check.Consistency.MultiAliasImportRequireUse, false}, 143 | {Credo.Check.Consistency.UnusedVariableNames, false}, 144 | {Credo.Check.Design.DuplicatedCode, false}, 145 | {Credo.Check.Readability.AliasAs, false}, 146 | {Credo.Check.Readability.MultiAlias, false}, 147 | {Credo.Check.Readability.Specs, false}, 148 | {Credo.Check.Readability.SinglePipe, false}, 149 | {Credo.Check.Refactor.ABCSize, false}, 150 | {Credo.Check.Refactor.AppendSingleItem, false}, 151 | {Credo.Check.Refactor.DoubleBooleanNegation, false}, 152 | {Credo.Check.Refactor.ModuleDependencies, false}, 153 | {Credo.Check.Refactor.PipeChainStart, false}, 154 | {Credo.Check.Refactor.VariableRebinding, false}, 155 | {Credo.Check.Warning.MapGetUnsafePass, false}, 156 | {Credo.Check.Warning.UnsafeToAtom, false} 157 | 158 | # 159 | # Custom checks can be created using `mix credo.gen.check`. 160 | # 161 | ] 162 | } 163 | ] 164 | } 165 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.github/workflows/elixir.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: push 4 | 5 | jobs: 6 | unit-test: 7 | runs-on: ubuntu-latest 8 | name: OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} 9 | env: 10 | MIX_ENV: test 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | include: 15 | - elixir: 1.15.6 16 | otp: 25.3.2.6 17 | - elixir: 1.15.6 18 | otp: 24.3.4.13 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | - name: Install Elixir and Erlang 23 | uses: erlef/setup-beam@v1 24 | with: 25 | elixir-version: ${{ matrix.elixir }} 26 | otp-version: ${{ matrix.otp }} 27 | - name: Restore deps and _build cache 28 | uses: actions/cache@v3 29 | with: 30 | path: | 31 | deps 32 | _build 33 | key: ${{ runner.os }}-${{ matrix.elixir }}-${{ matrix.otp }}-${{ hashFiles('**/mix.lock') }} 34 | restore-keys: | 35 | ${{ runner.os }}-${{ matrix.elixir }}-${{ matrix.otp }}- 36 | - name: Install dependencies 37 | run: mix deps.get 38 | - name: Compile deps 39 | run: mix deps.compile 40 | - name: Run tests 41 | run: mix test 42 | - name: Check format 43 | run: mix format --check-formatted 44 | - name: Check quality 45 | run: mix credo --all --strict || true 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | xlsx_reader-*.tar 24 | 25 | .DS_Store 26 | 27 | # Ignore generated benchmark files 28 | /benchmark/files/*.xlsx 29 | /benchmark/output/*.benchee 30 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.8.8] - 2025-03-01 4 | 5 | - Relax `:saxy` dependency 6 | 7 | ## [0.8.7] - 2024-07-14 8 | 9 | - Fix issue with some empty cell elements being returned as `:expect_formula`, they are now returned as empty strings 10 | 11 | ## [0.8.6] - 2024-06-28 12 | 13 | - Fix handling of cell type/style when some cell elements were missing attributes 14 | - Handle date and datetime values encoded as numeric cell types 15 | 16 | ## [0.8.5] - 2024-06-02 17 | 18 | - Handle files without shared strings or styles relationships. 19 | 20 | ## [0.8.4] - 2024-04-28 21 | 22 | - Upgrade ex_doc 23 | - Fix issue with some empty cell elements being returned as `:expect_chars`, they are now returned as empty strings. 24 | 25 | ## [0.8.3] - 2024-04-19 26 | 27 | - Improve handling of UTF-8/16/32 encoding 28 | 29 | ## [0.8.2] - 2024-04-06 30 | 31 | - Add `exclude_hidden_sheets?` option 32 | - Return `#ERROR` value instead of crashing in case of cell conversion 33 | error 34 | 35 | ## [0.8.1] - 2024-01-11 36 | 37 | - Add support for shared formulas 38 | 39 | ## [0.8.0] - 2023-12-11 40 | 41 | - Add `cell_data_format` option to return data as `Cell` structs instead of values 42 | 43 | ## [0.7.0] - 2023-10-15 44 | 45 | - Improve ZIP file error handling 46 | - Update Saxy XML parser 47 | - Improve UTF-16 support 48 | 49 | ## [0.6.0] - 2022-10-30 50 | 51 | - Update Saxy XML parser 52 | 53 | ## [0.5.0] - 2022-06-12 54 | 55 | - Require Elixir 1.10 to fix publishing of documentation 56 | 57 | ## [0.4.3] - 2021-02-08 58 | 59 | - Improve compatibility with XLSX writers (Excel for Mac, …) which completely omit empty rows in worksheets 60 | 61 | ## [0.4.2] - 2021-02-09 62 | 63 | - Add `skip_row?` callback 64 | 65 | ## [0.4.1] - 2020-10-15 66 | 67 | - Add support for `decimal ~> 2.0` 68 | 69 | ## [0.4.0] - 2020-06-23 70 | 71 | - Add `:supported_custom_format` option to `XlsxReader.open/2` 72 | - Support ISO 8601 and US date/time custom format by default 73 | 74 | ## [0.3.0] - 2020-05-07 75 | 76 | - Add `:only` and `:except` options to `XlsxReader.sheets/2` and `XlsxReader.async_sheets/3` 77 | 78 | ## [0.2.0] - 2020-04-27 79 | 80 | - Add `XlsxReader.async_sheets/3` 81 | 82 | ## [0.1.4] - 2020-04-24 83 | 84 | - Speed-up shared string and styles lookups 85 | 86 | ## [0.1.3] - 2020-02-26 87 | 88 | - Improve compatibility with XLSX writers (Excel, Elixslx, …) which completely omit empty cells in worksheets 89 | 90 | ## [0.1.2] - 2019-12-30 91 | 92 | - Add `String` number type to disable numeric conversions 93 | 94 | ## [0.1.1] - 2019-12-20 95 | 96 | - Improve handling of whitespace in shared strings 97 | 98 | ## [0.1.0] - 2019-12-16 99 | 100 | - Initial release 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![XlsxReader logo](https://raw.githubusercontent.com/xavier/xlsx_reader/master/assets/logo.png) 2 | 3 | # XlsxReader 4 | 5 | ![Build status](https://github.com/xavier/xlsx_reader/workflows/CI/badge.svg) 6 | 7 | An XLSX reader in Elixir. 8 | 9 | Features: 10 | 11 | - Accepts XLSX data located on the file system or in memory 12 | - Automatic type conversions (numbers, date & times, booleans) 13 | - Optional support for arbitrary precision [decimal](https://github.com/ericmj/decimal) numbers 14 | - Straightforward architecture: no ETS tables, no race-conditions, no manual resource management 15 | 16 | The docs can be found at [https://hexdocs.pm/xlsx_reader](https://hexdocs.pm/xlsx_reader). 17 | 18 | ## Installation 19 | 20 | Add `xlsx_reader` as a dependency in your `mix.exs`: 21 | 22 | ```elixir 23 | def deps do 24 | [ 25 | {:xlsx_reader, "~> 0.8.0"} 26 | ] 27 | end 28 | ``` 29 | 30 | Run `mix deps.get` in your shell to fetch and compile XlsxReader. 31 | 32 | ## Examples 33 | 34 | ### Loading from the file system 35 | 36 | ```elixir 37 | 38 | {:ok, package} = XlsxReader.open("test.xlsx") 39 | 40 | XlsxReader.sheet_names(package) 41 | # ["Sheet 1", "Sheet 2", "Sheet 3"] 42 | 43 | {:ok, rows} = XlsxReader.sheet(package, "Sheet 1") 44 | # [ 45 | # ["Date", "Temperature"], 46 | # [~D[2019-11-01], 8.4], 47 | # [~D[2019-11-02], 7.5], 48 | # ... 49 | # ] 50 | ``` 51 | 52 | ### Loading from memory 53 | 54 | ```elixir 55 | blob = File.read!("test.xlsx") 56 | 57 | {:ok, package} = XlsxReader.open(blob, source: :binary) 58 | ``` 59 | 60 | ### Loading all sheets at once 61 | 62 | ```elixir 63 | {:ok, sheets} = XlsxReader.sheets(package) 64 | # [ 65 | # {"Sheet 1", [["Date", "Temperature"], ...]}, 66 | # {"Sheet 2", [...]}, 67 | # ... 68 | # ] 69 | ``` 70 | 71 | ### Loading sheets selectively 72 | 73 | ```elixir 74 | {:ok, sheets} = XlsxReader.sheets(package, only: ["Parameters", ~r/Sheet \d+/], except: ["Sheet 2"]) 75 | # [ 76 | # {"Parameters", [...]}, 77 | # {"Sheet 1", [...]}, 78 | # {"Sheet 3", [...]}, 79 | # {"Sheet 4", [...]}, 80 | # ... 81 | # ] 82 | ``` 83 | 84 | ### Loading all sheets at once concurrently 85 | 86 | ```elixir 87 | {:ok, sheets} = XlsxReader.async_sheets(package) 88 | # [ 89 | # {"Sheet 1", [["Date", "Temperature"], ...]}, 90 | # {"Sheet 2", [...]}, 91 | # ... 92 | # ] 93 | ``` 94 | 95 | ### Using arbitrary precision numbers 96 | 97 | ```elixir 98 | {:ok, rows} = XlsxReader.sheet(package, "Sheet 1", number_type: Decimal) 99 | # [ 100 | # ["Date", "Temperature"], 101 | # [~D[2019-11-01], %Decimal{coef: 84, exp: -1, sign: 1}], 102 | # [~D[2019-11-02], %Decimal{coef: 75, exp: -1, sign: 1}], 103 | # ... 104 | # ] 105 | ``` 106 | 107 | ### Access cell formulas 108 | 109 | ```elixir 110 | {:ok, rows} = XlsxReader.sheet(package, "Sheet 1", cell_data_format: :cell) 111 | # [ 112 | # [%Cell{value: 1234.0, formula: "SUM(B1, B10)", ref: "A1"}, ...], 113 | # ... 114 | # ] 115 | ``` 116 | 117 | ## Development 118 | 119 | ### Benchmarking 120 | 121 | 1. `mix run benchmark/init.exs` to create the benchmarking dataset 122 | 2. `mix run benchmark/run.exs` to run the [Benchee](https://github.com/bencheeorg/benchee) suite 123 | 124 | ## Contributors 125 | 126 | In order of appearance: 127 | 128 | - Xavier Defrang ([xavier](https://github.com/xavier)) 129 | - Darragh Enright ([darraghenright](https://github.com/darraghenright)) 130 | - Patryk Woziński ([patrykwozinski](https://github.com/patrykwozinski)) 131 | - Evaldo Bratti ([evaldobratti](https://github.com/evaldobratti)) 132 | - Zach Liss ([ZachLiss](https://github.com/ZachLiss)) 133 | - [Paranojik](https://github.com/paranojik) 134 | - Juan Barrios ([03juan](https://github.com/03juan)) 135 | - Dylan Harness ([dharness](https://github.com/dharness)) 136 | - Victor Rodrigues ([rodrigues](https://github.com/rodrigues)) 137 | - Jose Valim ([josevalim](https://github.com/josevalim)) 138 | - Vinicius Moraes ([ding-an-sich](https://github.com/ding-an-sich)) 139 | - [Gladear](https://github.com/gladear) 140 | 141 | ## License 142 | 143 | Copyright 2020 Xavier Defrang 144 | 145 | Licensed under the Apache License, Version 2.0 (the "License"); 146 | you may not use this file except in compliance with the License. 147 | You may obtain a copy of the License at 148 | 149 | http://www.apache.org/licenses/LICENSE-2.0 150 | 151 | Unless required by applicable law or agreed to in writing, software 152 | distributed under the License is distributed on an "AS IS" BASIS, 153 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 154 | See the License for the specific language governing permissions and 155 | limitations under the License. 156 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/assets/logo.png -------------------------------------------------------------------------------- /assets/logo_hires.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/assets/logo_hires.png -------------------------------------------------------------------------------- /benchmark/files/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/benchmark/files/.gitkeep -------------------------------------------------------------------------------- /benchmark/init.exs: -------------------------------------------------------------------------------- 1 | defmodule Generator do 2 | def generate_workbook(sheets, rows, cols, length) do 3 | %Elixlsx.Workbook{ 4 | sheets: Enum.map(1..sheets, &generate_sheet(&1, rows, cols, length)) 5 | } 6 | end 7 | 8 | def generate_sheet(index, rows, cols, length) do 9 | %Elixlsx.Sheet{ 10 | name: "Sheet #{index}", 11 | rows: 12 | for _row <- 1..rows do 13 | Enum.map(1..cols, fn _ -> generate_string(length) end) 14 | end 15 | } 16 | end 17 | 18 | def generate_string(length) do 19 | length 20 | |> :crypto.strong_rand_bytes() 21 | |> Base.encode32() 22 | |> binary_part(0, length) 23 | end 24 | end 25 | 26 | benchmarks = [ 27 | {"benchmark/files/01_small.xlsx", 4, 10, 8, 10}, 28 | {"benchmark/files/02_medium.xlsx", 8, 1024, 16, 32}, 29 | {"benchmark/files/03_large.xlsx", 16, 4096, 32, 48} 30 | ] 31 | 32 | Enum.each(benchmarks, fn {file, sheets, rows, cols, length} -> 33 | IO.puts("Generating #{file}…") 34 | Elixlsx.write_to(Generator.generate_workbook(sheets, rows, cols, length), file) 35 | end) 36 | -------------------------------------------------------------------------------- /benchmark/run.exs: -------------------------------------------------------------------------------- 1 | defmodule Benchmark do 2 | def files(), do: Path.wildcard("benchmark/files/*.xlsx") 3 | 4 | def run(files) do 5 | timeout = 60_000 6 | 7 | Benchee.run( 8 | %{ 9 | "XlsxReader.sheets/2" => fn package -> 10 | XlsxReader.sheets(package) 11 | end, 12 | "XlsxReader.async_sheets/3 - ordered" => fn package -> 13 | XlsxReader.async_sheets(package, [], timeout: timeout) 14 | end, 15 | "XlsxReader.async_sheets/3 - unordered" => fn package -> 16 | XlsxReader.async_sheets(package, [], ordered: false, timeout: timeout) 17 | end 18 | }, 19 | inputs: for(file <- files, do: {Path.basename(file), file}), 20 | before_scenario: fn file -> 21 | IO.puts("Opening #{file}...") 22 | {:ok, package} = XlsxReader.open(file) 23 | package 24 | end, 25 | time: 10, 26 | memory_time: 2, 27 | print: [configuration: false], 28 | save: [path: "benchmark/output/save.benchee", tag: "previous"], 29 | load: "benchmark/output/save.benchee" 30 | ) 31 | end 32 | end 33 | 34 | case Benchmark.files() do 35 | [] -> 36 | IO.puts("Benchmarking data is missing. Please first run: mix run benchmark/init.exs") 37 | System.halt(1) 38 | 39 | files -> 40 | Benchmark.run(files) 41 | end 42 | -------------------------------------------------------------------------------- /lib/xlsx_reader.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader do 2 | @moduledoc """ 3 | 4 | Opens XLSX workbooks and reads its worksheets. 5 | 6 | ## Example 7 | 8 | ```elixir 9 | {:ok, package} = XlsxReader.open("test.xlsx") 10 | 11 | XlsxReader.sheet_names(package) 12 | # ["Sheet 1", "Sheet 2", "Sheet 3"] 13 | 14 | {:ok, rows} = XlsxReader.sheet(package, "Sheet 1") 15 | # [ 16 | # ["Date", "Temperature"], 17 | # [~D[2019-11-01], 8.4], 18 | # [~D[2019-11-02], 7.5], 19 | # ... 20 | # ] 21 | ``` 22 | 23 | ## Sheet contents 24 | 25 | Sheets are loaded on-demand by `sheet/3` and `sheets/2`. 26 | 27 | The sheet contents is returned as a list of lists: 28 | 29 | ```elixir 30 | [ 31 | ["A1", "B1", "C1" | _], 32 | ["A2", "B2", "C2" | _], 33 | ["A3", "B3", "C3" | _], 34 | | _ 35 | ] 36 | ``` 37 | 38 | The behavior of the sheet parser can be customized for each 39 | individual sheet, see `sheet/3`. 40 | 41 | ## Cell types 42 | 43 | This library takes a best effort approach for determining cell types. 44 | In order of priority, the actual type of an XLSX cell value is determined using: 45 | 46 | 1. basic cell properties (e.g. boolean) 47 | 2. predefined known styles (e.g. default money/date formats) 48 | 3. introspection of the [custom format string](https://support.microsoft.com/en-us/office/number-format-codes-5026bbd6-04bc-48cd-bf33-80f18b4eae68) associated with the cell 49 | 50 | ### Custom formats supported by default 51 | 52 | * percentages 53 | * ISO 8601 date/time (y-m-d) 54 | * US date/time (m/d/y) 55 | * European date/time (d/m/y) 56 | 57 | ### Additional custom formats support 58 | 59 | If the spreadsheet you need to process contains some unusual cell formatting, you 60 | may provide hints to map format strings to a known cell type. 61 | 62 | The hints are given as a list of `{matcher, type}` tuples. The matcher is either a 63 | string or regex to match against the custom format string. The supported types are: 64 | 65 | * `:string` 66 | * `:number` 67 | * `:percentage` 68 | * `:date` 69 | * `:time` 70 | * `:date_time` 71 | * `:unsupported` (used for explicitly unsupported styles and formats) 72 | 73 | ### Conversion errors 74 | 75 | Cell data which could not be converted using the detected format is returned as the `"#ERROR"` placeholder. 76 | 77 | #### Example 78 | 79 | ```elixir 80 | [ 81 | {"mmm yy", :date}, 82 | {~r/mmm? yy hh:mm/, :date_time}, 83 | {"[$CHF]0.00", :number} 84 | ] 85 | ``` 86 | 87 | To find out what custom formats are in use in the workbook, you can inspect `package.workbook.custom_formats`: 88 | 89 | ```elixir 90 | # num_fmt_id => format string 91 | %{ 92 | "0" => "General", 93 | "59" => "dd/mm/yyyy", 94 | "60" => "dd/mm/yyyy hh:mm", 95 | "61" => "hh:mm", 96 | "62" => "0.0%", 97 | "63" => "[$CHF]0.00" 98 | } 99 | ``` 100 | 101 | """ 102 | 103 | alias XlsxReader.{PackageLoader, ZipArchive} 104 | 105 | @typedoc """ 106 | Source for the XLSX file: file system (`:path`) or in-memory (`:binary`) 107 | """ 108 | @type source :: :path | :binary 109 | 110 | @typedoc """ 111 | Option to specify the XLSX file source 112 | """ 113 | @type open_option :: 114 | {:exclude_hidden_sheets?, boolean()} 115 | | {:source, source()} 116 | | {:supported_custom_formats, XlsxReader.Styles.supported_custom_formats()} 117 | 118 | @typedoc """ 119 | List of cell values 120 | """ 121 | @type row :: list(any()) 122 | 123 | @typedoc """ 124 | List of rows 125 | """ 126 | @type rows :: list(row()) 127 | 128 | @typedoc """ 129 | Sheet name 130 | """ 131 | @type sheet_name :: String.t() 132 | 133 | @typedoc """ 134 | Error tuple with message describing the cause of the error 135 | """ 136 | @type error :: {:error, String.t()} 137 | 138 | @doc """ 139 | 140 | Opens an XLSX file located on the file system (default) or from memory. 141 | 142 | ## Examples 143 | 144 | ### Opening XLSX file on the file system 145 | 146 | ```elixir 147 | {:ok, package} = XlsxReader.open("test.xlsx") 148 | ``` 149 | 150 | ### Opening XLSX file from memory 151 | 152 | ```elixir 153 | blob = File.read!("test.xlsx") 154 | 155 | {:ok, package} = XlsxReader.open(blob, source: :binary) 156 | ``` 157 | 158 | ## Options 159 | 160 | * `source`: `:path` (on the file system, default) or `:binary` (in memory) 161 | * `supported_custom_formats`: a list of `{regex | string, type}` tuples (see "Additional custom formats support") 162 | * `exclude_hidden_sheets?`: Whether to exclude hidden sheets in the workbook 163 | 164 | """ 165 | @spec open(String.t() | binary(), [open_option()]) :: 166 | {:ok, XlsxReader.Package.t()} | error() 167 | def open(file, options \\ []) do 168 | file 169 | |> ZipArchive.handle(Keyword.get(options, :source, :path)) 170 | |> PackageLoader.open( 171 | Keyword.take(options, [:supported_custom_formats, :exclude_hidden_sheets?]) 172 | ) 173 | end 174 | 175 | @doc """ 176 | 177 | Lists the names of the sheets in the package's workbook 178 | 179 | """ 180 | @spec sheet_names(XlsxReader.Package.t()) :: [sheet_name()] 181 | def sheet_names(package) do 182 | for %{name: name} <- package.workbook.sheets, do: name 183 | end 184 | 185 | @doc """ 186 | 187 | Loads the sheet with the given name (see `sheet_names/1`) 188 | 189 | ## Options 190 | 191 | * `type_conversion` - boolean (default: `true`) 192 | * `blank_value` - placeholder value for empty cells (default: `""`) 193 | * `empty_rows` - include empty rows (default: `true`) 194 | * `number_type` - type used for numeric conversion :`Integer`, `Decimal` or `Float` (default: `Float`) 195 | * `skip_row?`: function callback that determines if a row should be skipped. 196 | Takes precedence over `blank_value` and `empty_rows`. 197 | Defaults to `nil` (keeping the behaviour of `blank_value` and `empty_rows`). 198 | * `cell_data_format`: Controls the format of the cell data. Can be `:value` (default, returns the cell value only) or `:cell` (returns instances of `XlsxReader.Cell`). 199 | 200 | The `Decimal` type requires the [decimal](https://github.com/ericmj/decimal) library. 201 | 202 | ## Examples 203 | 204 | ### Skipping rows 205 | 206 | When using the `skip_row?` callback, rows are ignored in the parser which is more memory efficient. 207 | 208 | ```elixir 209 | # Skip all rows for which all the values are either blank or "-" 210 | XlsxReader.sheet(package, "Sheet1", skip_row?: fn row -> 211 | Enum.all?(row, & String.trim(&1) in ["", "-"]) 212 | end) 213 | 214 | # Skip all rows for which the first column contains the text "disabled" 215 | XlsxReader.sheet(package, "Sheet1", skip_row?: fn [column | _] -> 216 | column == "disabled" 217 | end) 218 | ``` 219 | 220 | """ 221 | @spec sheet(XlsxReader.Package.t(), sheet_name(), Keyword.t()) :: {:ok, rows()} | error() 222 | def sheet(package, sheet_name, options \\ []) do 223 | PackageLoader.load_sheet_by_name(package, sheet_name, options) 224 | end 225 | 226 | @doc """ 227 | 228 | Loads all the sheets in the workbook. 229 | 230 | On success, returns `{:ok, [{sheet_name, rows}, ...]}`. 231 | 232 | ## Filtering options 233 | 234 | * `only` - include the sheets whose name matches the filter 235 | * `except` - exclude the sheets whose name matches the filter 236 | 237 | Sheets can filtered by name using: 238 | 239 | * a string (e.g. `"Exact Match"`) 240 | * a regex (e.g. `~r/Sheet \d+/`) 241 | * a list of string and/or regexes (e.g. `["Parameters", ~r/Sheet [12]/]`) 242 | 243 | ## Sheet options 244 | 245 | See `sheet/2`. 246 | 247 | """ 248 | @spec sheets(XlsxReader.Package.t(), Keyword.t()) :: 249 | {:ok, list({sheet_name(), rows()})} | error() 250 | def sheets(package, options \\ []) do 251 | package.workbook.sheets 252 | |> filter_sheets_by_name( 253 | sheet_filter_option(options, :only), 254 | sheet_filter_option(options, :except) 255 | ) 256 | |> Enum.reduce_while([], fn sheet, acc -> 257 | case PackageLoader.load_sheet_by_rid(package, sheet.rid, options) do 258 | {:ok, rows} -> 259 | {:cont, [{sheet.name, rows} | acc]} 260 | 261 | error -> 262 | {:halt, error} 263 | end 264 | end) 265 | |> case do 266 | sheets when is_list(sheets) -> 267 | {:ok, Enum.reverse(sheets)} 268 | 269 | error -> 270 | error 271 | end 272 | end 273 | 274 | @doc """ 275 | 276 | Loads all the sheets in the workbook concurrently. 277 | 278 | On success, returns `{:ok, [{sheet_name, rows}, ...]}`. 279 | 280 | When processing files with multiple sheets, `async_sheets/3` is ~3x faster than `sheets/2` 281 | but it comes with a caveat. `async_sheets/3` uses `Task.async_stream/3` under the hood and thus 282 | runs each concurrent task with a timeout. If you expect your dataset to be of a significant size, 283 | you may want to increase it from the default 10000ms (see "Concurrency options" below). 284 | 285 | If the order in which the sheets are returned is not relevant for your application, you can 286 | pass `ordered: false` (see "Concurrency options" below) for a modest speed gain. 287 | 288 | ## Filtering options 289 | 290 | See `sheets/2`. 291 | 292 | ## Sheet options 293 | 294 | See `sheet/2`. 295 | 296 | ## Concurrency options 297 | 298 | * `max_concurrency` - maximum number of tasks to run at the same time (default: `System.schedulers_online/0`) 299 | * `ordered` - maintain order consistent with `sheet_names/1` (default: `true`) 300 | * `timeout` - maximum duration in milliseconds to process a sheet (default: `10_000`) 301 | 302 | """ 303 | def async_sheets(package, sheet_options \\ [], task_options \\ []) do 304 | max_concurrency = Keyword.get(task_options, :max_concurrency, System.schedulers_online()) 305 | ordered = Keyword.get(task_options, :ordered, true) 306 | timeout = Keyword.get(task_options, :timeout, 10_000) 307 | 308 | package.workbook.sheets 309 | |> filter_sheets_by_name( 310 | sheet_filter_option(sheet_options, :only), 311 | sheet_filter_option(sheet_options, :except) 312 | ) 313 | |> Task.async_stream( 314 | fn sheet -> 315 | case PackageLoader.load_sheet_by_rid(package, sheet.rid, sheet_options) do 316 | {:ok, rows} -> 317 | {:ok, {sheet.name, rows}} 318 | 319 | error -> 320 | error 321 | end 322 | end, 323 | max_concurrency: max_concurrency, 324 | ordered: ordered, 325 | timeout: timeout, 326 | on_timeout: :kill_task 327 | ) 328 | |> Enum.reduce_while({:ok, []}, fn 329 | {:ok, {:ok, entry}}, {:ok, acc} -> 330 | {:cont, {:ok, [entry | acc]}} 331 | 332 | {:ok, error}, _acc -> 333 | {:halt, {:error, error}} 334 | 335 | {:exit, :timeout}, _acc -> 336 | {:halt, {:error, "timeout exceeded"}} 337 | 338 | {:exit, reason}, _acc -> 339 | {:halt, {:error, reason}} 340 | end) 341 | |> case do 342 | {:ok, list} -> 343 | if ordered, 344 | do: {:ok, Enum.reverse(list)}, 345 | else: {:ok, list} 346 | 347 | error -> 348 | error 349 | end 350 | end 351 | 352 | ## Sheet filter 353 | 354 | def sheet_filter_option(options, key), 355 | do: options |> Keyword.get(key, []) |> List.wrap() 356 | 357 | defp filter_sheets_by_name(sheets, [], []), do: sheets 358 | 359 | defp filter_sheets_by_name(sheets, only, except) do 360 | Enum.filter(sheets, fn %{name: name} -> 361 | filter_only?(name, only) && !filter_except?(name, except) 362 | end) 363 | end 364 | 365 | defp filter_only?(_name, []), do: true 366 | defp filter_only?(name, filters), do: Enum.any?(filters, &filter_match?(name, &1)) 367 | 368 | defp filter_except?(_name, []), do: false 369 | defp filter_except?(name, filters), do: Enum.any?(filters, &filter_match?(name, &1)) 370 | 371 | defp filter_match?(name, %Regex{} = regex), do: String.match?(name, regex) 372 | defp filter_match?(exact_match, exact_match) when is_binary(exact_match), do: true 373 | defp filter_match?(_, _), do: false 374 | end 375 | -------------------------------------------------------------------------------- /lib/xlsx_reader/array.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Array do 2 | @moduledoc false 3 | 4 | @type t :: :array.array() 5 | @type t(type) :: :array.array(type) 6 | 7 | def from_list(list) do 8 | :array.from_list(list, nil) 9 | end 10 | 11 | def insert(array, index, value) do 12 | :array.set(index, value, array) 13 | end 14 | 15 | def lookup(array, index, default \\ nil) do 16 | case :array.get(index, array) do 17 | :undefined -> 18 | default 19 | 20 | value -> 21 | value 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/xlsx_reader/cell.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Cell do 2 | @moduledoc """ 3 | Cell structure. 4 | 5 | This structure contains the information of a cell in a sheet. 6 | 7 | - `value` - The value of the cell 8 | - `formula` - The formula used in the cell, if any 9 | - `ref` - The cell reference, like 'A1', 'B2', etc. 10 | 11 | This structure is used when the `cell_data_format` option is set to `:cell`. 12 | """ 13 | 14 | defstruct [:value, :formula, :ref] 15 | 16 | @typedoc """ 17 | XLSX cell data 18 | """ 19 | @type t :: %__MODULE__{ 20 | value: term(), 21 | formula: String.t() | nil, 22 | ref: String.t() 23 | } 24 | end 25 | -------------------------------------------------------------------------------- /lib/xlsx_reader/cell_reference.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.CellReference do 2 | @moduledoc false 3 | 4 | @spec parse(String.t()) :: {pos_integer(), pos_integer()} | :error 5 | def parse(reference) when not is_nil(reference) do 6 | case Regex.run(~r/\A([A-Z]+)(\d+)\z/, reference, capture: :all_but_first) do 7 | [letters, digits] -> 8 | {column_number(letters), String.to_integer(digits)} 9 | 10 | _ -> 11 | :error 12 | end 13 | end 14 | 15 | defp column_number(letters) do 16 | letters 17 | |> String.to_charlist() 18 | |> Enum.reduce(0, fn character_code, column_number -> 19 | column_number * 26 + (character_code - ?A) + 1 20 | end) 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /lib/xlsx_reader/conversion.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Conversion do 2 | @moduledoc """ 3 | 4 | Conversion of cell values to Elixir types. 5 | 6 | """ 7 | 8 | @typedoc """ 9 | Date system identified by its reference year 10 | """ 11 | @type date_system :: 1900 | 1904 12 | 13 | @typedoc """ 14 | Supported number types identified by module name 15 | """ 16 | @type number_type :: Integer | Float | Decimal | String 17 | 18 | @typedoc """ 19 | Supported number value types 20 | """ 21 | @type number_value :: integer() | float() | Decimal.t() | String.t() 22 | 23 | @doc """ 24 | 25 | Converts the string representation of a truth value into to a boolean. 26 | 27 | Cells with type attribute `"b"` store boolean values as a single digit: `"1"` or `"0"`. 28 | 29 | ## Examples 30 | 31 | iex> XlsxReader.Conversion.to_boolean("1") 32 | {:ok, true} 33 | 34 | iex> XlsxReader.Conversion.to_boolean("0") 35 | {:ok, false} 36 | 37 | iex> XlsxReader.Conversion.to_boolean("true") 38 | :error 39 | 40 | """ 41 | @spec to_boolean(String.t()) :: {:ok, boolean()} | :error 42 | def to_boolean("1"), do: {:ok, true} 43 | def to_boolean("0"), do: {:ok, false} 44 | def to_boolean(_), do: :error 45 | 46 | @doc """ 47 | 48 | Converts a string into the given number type. 49 | 50 | Supported number types are: `Integer`, `Float`, `String` or `Decimal` (requires the [decimal](https://github.com/ericmj/decimal) library) 51 | 52 | ## Examples 53 | 54 | iex> XlsxReader.Conversion.to_number("123", Integer) 55 | {:ok, 123} 56 | 57 | iex> XlsxReader.Conversion.to_number("-123.45", Float) 58 | {:ok, -123.45} 59 | 60 | iex> XlsxReader.Conversion.to_number("0.12345e3", Float) 61 | {:ok, 123.45} 62 | 63 | iex> XlsxReader.Conversion.to_number("-123.45", Decimal) 64 | {:ok, %Decimal{coef: 12345, exp: -2, sign: -1}} 65 | 66 | iex> XlsxReader.Conversion.to_number("0.12345E3", Decimal) 67 | {:ok, %Decimal{coef: 12345, exp: -2, sign: 1}} 68 | 69 | iex> XlsxReader.Conversion.to_number("-123.45", String) 70 | {:ok, "-123.45"} 71 | 72 | iex> XlsxReader.Conversion.to_number("0.12345e3", String) 73 | {:ok, "0.12345e3"} 74 | 75 | iex> XlsxReader.Conversion.to_number("123.0", Integer) 76 | :error 77 | 78 | """ 79 | @spec to_number(String.t(), number_type()) :: {:ok, number_value()} | :error 80 | 81 | def to_number(string, Integer) do 82 | to_integer(string) 83 | end 84 | 85 | def to_number(string, Float) do 86 | to_float(string) 87 | end 88 | 89 | def to_number(string, Decimal) do 90 | to_decimal(string) 91 | end 92 | 93 | def to_number(string, String) do 94 | {:ok, string} 95 | end 96 | 97 | @doc """ 98 | 99 | Converts a string into a float. 100 | 101 | ## Examples 102 | 103 | iex> XlsxReader.Conversion.to_float("123") 104 | {:ok, 123.0} 105 | 106 | iex> XlsxReader.Conversion.to_float("-123.45") 107 | {:ok, -123.45} 108 | 109 | iex> XlsxReader.Conversion.to_float("0.12345e3") 110 | {:ok, 123.45} 111 | 112 | iex> XlsxReader.Conversion.to_float("0.12345E3") 113 | {:ok, 123.45} 114 | 115 | iex> XlsxReader.Conversion.to_float("bogus") 116 | :error 117 | 118 | """ 119 | @spec to_float(String.t()) :: {:ok, float()} | :error 120 | def to_float(string) do 121 | case Float.parse(string) do 122 | {number, ""} -> 123 | {:ok, number} 124 | 125 | _ -> 126 | :error 127 | end 128 | end 129 | 130 | @doc """ 131 | 132 | Converts a string into an arbitrary precision [decimal](https://github.com/ericmj/decimal). 133 | 134 | ## Examples 135 | 136 | iex> XlsxReader.Conversion.to_decimal("123") 137 | {:ok, %Decimal{coef: 123, exp: 0, sign: 1}} 138 | 139 | iex> XlsxReader.Conversion.to_decimal("-123.45") 140 | {:ok, %Decimal{coef: 12345, exp: -2, sign: -1}} 141 | 142 | iex> XlsxReader.Conversion.to_decimal("0.12345e3") 143 | {:ok, %Decimal{coef: 12345, exp: -2, sign: 1}} 144 | 145 | iex> XlsxReader.Conversion.to_decimal("0.12345E3") 146 | {:ok, %Decimal{coef: 12345, exp: -2, sign: 1}} 147 | 148 | iex> XlsxReader.Conversion.to_decimal("bogus") 149 | :error 150 | 151 | """ 152 | @spec to_decimal(String.t()) :: {:ok, Decimal.t()} | :error 153 | def to_decimal(string) do 154 | case Decimal.parse(string) do 155 | {:ok, decimal} -> 156 | {:ok, decimal} 157 | 158 | {decimal, ""} -> 159 | {:ok, decimal} 160 | 161 | _ -> 162 | :error 163 | end 164 | end 165 | 166 | @doc """ 167 | 168 | Converts a string into an integer. 169 | 170 | ## Examples 171 | 172 | iex> XlsxReader.Conversion.to_integer("123") 173 | {:ok, 123} 174 | 175 | iex> XlsxReader.Conversion.to_integer("-123") 176 | {:ok, -123} 177 | 178 | iex> XlsxReader.Conversion.to_integer("123.45") 179 | :error 180 | 181 | iex> XlsxReader.Conversion.to_integer("bogus") 182 | :error 183 | 184 | """ 185 | @spec to_integer(String.t()) :: {:ok, integer()} | :error 186 | def to_integer(string) do 187 | case Integer.parse(string) do 188 | {number, ""} -> 189 | {:ok, number} 190 | 191 | _ -> 192 | :error 193 | end 194 | end 195 | 196 | # This is why we can't have nice things: http://www.cpearson.com/excel/datetime.htm 197 | @base_date_system_1900 ~D[1899-12-30] 198 | @base_date_system_1904 ~D[1904-01-01] 199 | 200 | @doc """ 201 | Returns the base date for the given date system. 202 | 203 | ## Examples 204 | 205 | iex> XlsxReader.Conversion.base_date(1900) 206 | ~D[1899-12-30] 207 | 208 | iex> XlsxReader.Conversion.base_date(1904) 209 | ~D[1904-01-01] 210 | 211 | iex> XlsxReader.Conversion.base_date(2019) 212 | :error 213 | 214 | """ 215 | @spec base_date(date_system()) :: Date.t() | :error 216 | def base_date(1900), do: @base_date_system_1900 217 | def base_date(1904), do: @base_date_system_1904 218 | def base_date(_date_system), do: :error 219 | 220 | @doc """ 221 | 222 | Converts a serial date into a `Date`. 223 | 224 | ## Examples 225 | 226 | iex> XlsxReader.Conversion.to_date("40396") 227 | {:ok, ~D[2010-08-06]} 228 | 229 | iex> XlsxReader.Conversion.to_date("43783") 230 | {:ok, ~D[2019-11-14]} 231 | 232 | iex> XlsxReader.Conversion.to_date("1", ~D[1999-12-31]) 233 | {:ok, ~D[2000-01-01]} 234 | 235 | iex> XlsxReader.Conversion.to_date("-1", ~D[1999-12-31]) 236 | :error 237 | 238 | """ 239 | @spec to_date(String.t(), Date.t()) :: {:ok, Date.t()} | :error 240 | def to_date(string, base_date \\ @base_date_system_1900) do 241 | case split_serial_date(string) do 242 | {:ok, days, _fraction_of_24} when days > 0.0 -> 243 | {:ok, Date.add(base_date, days)} 244 | 245 | {:ok, _days, _fraction_of_24} -> 246 | :error 247 | 248 | error -> 249 | error 250 | end 251 | end 252 | 253 | @doc """ 254 | 255 | Converts a serial date to a `NaiveDateTime`. 256 | 257 | ## Examples 258 | 259 | iex> XlsxReader.Conversion.to_date_time("43783.0") 260 | {:ok, ~N[2019-11-14 00:00:00]} 261 | 262 | iex> XlsxReader.Conversion.to_date_time("43783.760243055556") 263 | {:ok, ~N[2019-11-14 18:14:45]} 264 | 265 | iex> XlsxReader.Conversion.to_date_time("0.4895833333333333") 266 | {:ok, ~N[1899-12-30 11:45:00]} 267 | 268 | iex> XlsxReader.Conversion.to_date_time("1.760243055556", ~D[1999-12-31]) 269 | {:ok, ~N[2000-01-01 18:14:45]} 270 | 271 | iex> XlsxReader.Conversion.to_date_time("-30.760243055556", ~D[1999-12-31]) 272 | :error 273 | 274 | """ 275 | @spec to_date_time(String.t(), Date.t()) :: {:ok, NaiveDateTime.t()} | :error 276 | def to_date_time(string, base_date \\ @base_date_system_1900) do 277 | with {:ok, days, fraction_of_24} when days >= 0.0 <- split_serial_date(string), 278 | date <- Date.add(base_date, days), 279 | {:ok, time} <- fraction_of_24_to_time(fraction_of_24) do 280 | NaiveDateTime.new(date, time) 281 | else 282 | {:ok, _, _} -> 283 | :error 284 | 285 | {:error, _} -> 286 | :error 287 | 288 | error -> 289 | error 290 | end 291 | end 292 | 293 | ## Private 294 | 295 | # Splits a serial date into `{:ok, days_since_base_date, time_as_fraction_of_24}` 296 | @spec split_serial_date(String.t()) :: {:ok, integer(), float()} | :error 297 | defp split_serial_date(string) do 298 | with {:ok, value} <- to_float(string) do 299 | days = Float.floor(value) 300 | {:ok, trunc(days), value - days} 301 | end 302 | end 303 | 304 | @seconds_per_day 60 * 60 * 24 305 | 306 | @spec fraction_of_24_to_time(float()) :: {:ok, Time.t()} | {:error, atom()} 307 | defp fraction_of_24_to_time(fraction_of_24) do 308 | seconds = round(fraction_of_24 * @seconds_per_day) 309 | 310 | Time.new( 311 | seconds |> div(3600), 312 | seconds |> div(60) |> rem(60), 313 | seconds |> rem(60) 314 | ) 315 | end 316 | end 317 | -------------------------------------------------------------------------------- /lib/xlsx_reader/number.ex: -------------------------------------------------------------------------------- 1 | defprotocol XlsxReader.Number do 2 | @moduledoc false 3 | def multiply(number, factor) 4 | end 5 | 6 | defimpl XlsxReader.Number, for: Integer do 7 | def multiply(number, factor), do: number * factor 8 | end 9 | 10 | defimpl XlsxReader.Number, for: Float do 11 | def multiply(number, factor), do: number * factor 12 | end 13 | 14 | defimpl XlsxReader.Number, for: Decimal do 15 | def multiply(number, factor), do: Decimal.mult(number, factor) 16 | end 17 | 18 | defimpl XlsxReader.Number, for: String do 19 | def multiply(number, _factor), do: number 20 | end 21 | -------------------------------------------------------------------------------- /lib/xlsx_reader/package.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Package do 2 | @moduledoc """ 3 | 4 | XSLX Package structure. 5 | 6 | This structure is initialized by `XlsxReader.open/2` and is used to access 7 | the contents of the file 8 | 9 | It should not be manipulated directly. 10 | 11 | """ 12 | 13 | @enforce_keys [:zip_handle, :workbook] 14 | defstruct zip_handle: nil, workbook: nil 15 | 16 | @typedoc """ 17 | XLSX package 18 | """ 19 | @type t :: %__MODULE__{ 20 | zip_handle: XlsxReader.ZipArchive.zip_handle(), 21 | workbook: XlsxReader.Workbook.t() 22 | } 23 | end 24 | -------------------------------------------------------------------------------- /lib/xlsx_reader/package_loader.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.PackageLoader do 2 | @moduledoc false 3 | 4 | # Loads the content of an XLSX file. 5 | # 6 | # An XLSX file is ZIP archive containing XML files linked to each other 7 | # using relationships defined in `_rels/*.xml.rels` files. 8 | 9 | alias XlsxReader.Parsers.{ 10 | RelationshipsParser, 11 | SharedStringsParser, 12 | StylesParser, 13 | Utils, 14 | WorkbookParser, 15 | WorksheetParser 16 | } 17 | 18 | alias XlsxReader.ZipArchive 19 | 20 | @typedoc """ 21 | Source for the XLSX file: file system (`:path`) or in-memory (`:binary`) 22 | """ 23 | @type source :: :path | :binary 24 | 25 | @typedoc """ 26 | Option to specify the XLSX file source 27 | """ 28 | @type open_option :: 29 | {:exclude_hidden_sheets?, boolean()} 30 | | {:source, source()} 31 | | {:supported_custom_formats, [{String.t() | Regex.t(), atom()}]} 32 | 33 | @doc """ 34 | 35 | Opens an XLSX package. 36 | 37 | It verifies the contents of the archive and preloads the workbook sheet list 38 | and relationships as well as the shared strings and style information required 39 | to load the sheet data. 40 | 41 | To load the actual sheet data, see `load_sheet_by_rid/3` and `load_sheet_by_name/3`. 42 | 43 | """ 44 | @spec open(XlsxReader.ZipArchive.zip_handle()) :: 45 | {:ok, XlsxReader.Package.t()} | XlsxReader.error() 46 | def open(zip_handle, options \\ []) do 47 | with :ok <- check_contents(zip_handle), 48 | {:ok, workbook} <- load_workbook_xml(zip_handle, options), 49 | {:ok, workbook_rels} <- load_workbook_xml_rels(zip_handle) do 50 | package = 51 | %XlsxReader.Package{ 52 | zip_handle: zip_handle, 53 | workbook: %{workbook | rels: workbook_rels} 54 | } 55 | |> load_shared_strings 56 | |> load_styles(Keyword.get(options, :supported_custom_formats, [])) 57 | 58 | {:ok, package} 59 | end 60 | end 61 | 62 | @doc """ 63 | Loads a single sheet identified by relationship id (`rId`) 64 | 65 | ## Options 66 | 67 | See `XlsxReader.sheet/2`. 68 | 69 | """ 70 | @spec load_sheet_by_rid(XlsxReader.Package.t(), String.t(), Keyword.t()) :: 71 | {:ok, XlsxReader.row()} | XlsxReader.error() 72 | def load_sheet_by_rid(package, rid, options \\ []) do 73 | case fetch_rel_target(package.workbook.rels, :sheets, rid) do 74 | {:ok, target} -> 75 | load_worksheet_xml(package, xl_path(target), options) 76 | 77 | :error -> 78 | {:error, "sheet relationship not found"} 79 | end 80 | end 81 | 82 | @doc """ 83 | Loads a single sheet identified by name 84 | 85 | ## Options 86 | 87 | See `XlsxReader.sheet/2`. 88 | 89 | """ 90 | @spec load_sheet_by_name(XlsxReader.Package.t(), String.t(), Keyword.t()) :: 91 | {:ok, XlsxReader.row()} | XlsxReader.error() 92 | def load_sheet_by_name(package, name, options \\ []) do 93 | case find_sheet_by_name(package, name) do 94 | %{rid: rid} -> 95 | load_sheet_by_rid(package, rid, options) 96 | 97 | nil -> 98 | {:error, "sheet #{inspect(name)} not found"} 99 | end 100 | end 101 | 102 | ## 103 | 104 | @workbook_xml "xl/workbook.xml" 105 | @workbook_xml_rels "xl/_rels/workbook.xml.rels" 106 | 107 | @required_files [ 108 | "[Content_Types].xml", 109 | @workbook_xml, 110 | @workbook_xml_rels 111 | ] 112 | 113 | defp check_contents(zip_handle) do 114 | with {:ok, files} <- ZipArchive.list(zip_handle) do 115 | if Enum.all?(@required_files, &Enum.member?(files, &1)), 116 | do: :ok, 117 | else: {:error, "invalid xlsx file"} 118 | end 119 | end 120 | 121 | defp load_workbook_xml(zip_handle, options) do 122 | options = Keyword.take(options, [:exclude_hidden_sheets?]) 123 | 124 | with {:ok, xml} <- extract_xml(zip_handle, @workbook_xml) do 125 | WorkbookParser.parse(xml, options) 126 | end 127 | end 128 | 129 | defp load_workbook_xml_rels(zip_handle) do 130 | with {:ok, xml} <- extract_xml(zip_handle, @workbook_xml_rels) do 131 | RelationshipsParser.parse(xml) 132 | end 133 | end 134 | 135 | defp load_shared_strings(package) do 136 | with {:ok, file} <- single_rel_target(package.workbook.rels.shared_strings), 137 | {:ok, xml} <- extract_xml(package.zip_handle, file), 138 | {:ok, shared_strings} <- SharedStringsParser.parse(xml) do 139 | %{package | workbook: %{package.workbook | shared_strings: shared_strings}} 140 | else 141 | {:error, :no_rel_target} -> 142 | package 143 | end 144 | end 145 | 146 | defp load_styles(package, supported_custom_formats) do 147 | with {:ok, file} <- single_rel_target(package.workbook.rels.styles), 148 | {:ok, xml} <- extract_xml(package.zip_handle, file), 149 | {:ok, style_types, custom_formats} <- StylesParser.parse(xml, supported_custom_formats) do 150 | %{ 151 | package 152 | | workbook: %{package.workbook | style_types: style_types, custom_formats: custom_formats} 153 | } 154 | else 155 | {:error, :no_rel_target} -> 156 | package 157 | end 158 | end 159 | 160 | defp single_rel_target(rels) do 161 | case Map.values(rels) do 162 | [target] -> 163 | {:ok, xl_path(target)} 164 | 165 | [] -> 166 | {:error, :no_rel_target} 167 | 168 | targets -> 169 | {:error, "expected a single rel target, got #{inspect(targets)}"} 170 | end 171 | end 172 | 173 | defp load_worksheet_xml(package, file, options) do 174 | with {:ok, xml} <- extract_xml(package.zip_handle, file) do 175 | WorksheetParser.parse(xml, package.workbook, options) 176 | end 177 | end 178 | 179 | defp extract_xml(zip_handle, file) do 180 | with {:ok, xml} <- ZipArchive.extract(zip_handle, file) do 181 | Utils.ensure_utf8(xml) 182 | end 183 | end 184 | 185 | defp xl_path(relative_path), do: Path.join("xl", relative_path) 186 | 187 | defp find_sheet_by_name(package, name) do 188 | Enum.find(package.workbook.sheets, fn %{name: n} -> name == n end) 189 | end 190 | 191 | defp fetch_rel_target(rels, type, rid) do 192 | with {:ok, paths} <- Map.fetch(rels, type) do 193 | Map.fetch(paths, rid) 194 | end 195 | end 196 | end 197 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/relationships_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.RelationshipsParser do 2 | @moduledoc false 3 | 4 | # Parses SpreadsheetML workbook relationships. 5 | # 6 | # The relationships determine the exact location of the shared strings, styles, 7 | # themes and worksheet files within the archive. 8 | 9 | alias XlsxReader.Parsers.Utils 10 | 11 | @behaviour Saxy.Handler 12 | 13 | def parse(xml) do 14 | Saxy.parse_string(xml, __MODULE__, %{ 15 | shared_strings: %{}, 16 | styles: %{}, 17 | themes: %{}, 18 | sheets: %{} 19 | }) 20 | end 21 | 22 | @namespace "http://schemas.openxmlformats.org/officeDocument/2006/relationships" 23 | 24 | @types %{ 25 | "#{@namespace}/sharedStrings" => :shared_strings, 26 | "#{@namespace}/styles" => :styles, 27 | "#{@namespace}/theme" => :themes, 28 | "#{@namespace}/worksheet" => :sheets 29 | } 30 | 31 | @impl Saxy.Handler 32 | def handle_event(:start_document, _prolog, state) do 33 | {:ok, state} 34 | end 35 | 36 | @impl Saxy.Handler 37 | def handle_event(:end_document, _data, state) do 38 | {:ok, state} 39 | end 40 | 41 | @impl Saxy.Handler 42 | def handle_event(:start_element, {"Relationship", attributes}, state) do 43 | with %{id: id, target: target, type: type} <- extract_relationship_attributes(attributes), 44 | {:ok, key} <- Map.fetch(@types, type) do 45 | {:ok, Map.update!(state, key, fn rels -> Map.put_new(rels, id, target) end)} 46 | else 47 | _ -> 48 | {:ok, state} 49 | end 50 | end 51 | 52 | @impl Saxy.Handler 53 | def handle_event(:start_element, _element, state) do 54 | {:ok, state} 55 | end 56 | 57 | @impl Saxy.Handler 58 | def handle_event(:end_element, _name, state) do 59 | {:ok, state} 60 | end 61 | 62 | @impl Saxy.Handler 63 | def handle_event(:characters, _chars, state) do 64 | {:ok, state} 65 | end 66 | 67 | ## 68 | 69 | @relationship_attributes_mapping %{ 70 | "Id" => :id, 71 | "Target" => :target, 72 | "Type" => :type 73 | } 74 | 75 | defp extract_relationship_attributes(attributes) do 76 | Utils.map_attributes(attributes, @relationship_attributes_mapping) 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/shared_strings_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.SharedStringsParser do 2 | @moduledoc false 3 | 4 | # Parses SpreadsheetML shared strings definitions. 5 | # 6 | # The parser builds a list of shared strings. 7 | # 8 | # Worksheets only contain numbers: the value of cells containing text is 9 | # numeric index to the array of shared strings. 10 | 11 | @behaviour Saxy.Handler 12 | 13 | alias XlsxReader.Array 14 | alias XlsxReader.Parsers.Utils 15 | 16 | defmodule State do 17 | @moduledoc false 18 | 19 | defstruct current_string: nil, 20 | strings: [], 21 | expect_chars: false, 22 | preserve_space: false 23 | end 24 | 25 | def parse(xml) do 26 | Saxy.parse_string(xml, __MODULE__, %State{}) 27 | end 28 | 29 | @impl Saxy.Handler 30 | def handle_event(:start_document, _prolog, state) do 31 | {:ok, state} 32 | end 33 | 34 | @impl Saxy.Handler 35 | def handle_event(:end_document, _data, state) do 36 | {:ok, Array.from_list(Enum.reverse(state.strings))} 37 | end 38 | 39 | @impl Saxy.Handler 40 | def handle_event(:start_element, {"si", _attributes}, state) do 41 | {:ok, %{state | current_string: ""}} 42 | end 43 | 44 | @impl Saxy.Handler 45 | def handle_event(:start_element, {"t", attributes}, state) do 46 | {:ok, 47 | %{ 48 | state 49 | | expect_chars: true, 50 | preserve_space: Utils.get_attribute(attributes, "xml:space") == "preserve" 51 | }} 52 | end 53 | 54 | @impl Saxy.Handler 55 | def handle_event(:start_element, _element, state) do 56 | {:ok, state} 57 | end 58 | 59 | @impl Saxy.Handler 60 | def handle_event(:end_element, "t", state) do 61 | {:ok, %{state | expect_chars: false, preserve_space: false}} 62 | end 63 | 64 | @impl Saxy.Handler 65 | def handle_event(:end_element, "si", state) do 66 | {:ok, %{state | current_string: nil, strings: [state.current_string | state.strings]}} 67 | end 68 | 69 | @impl Saxy.Handler 70 | def handle_event(:end_element, _name, state) do 71 | {:ok, state} 72 | end 73 | 74 | @impl Saxy.Handler 75 | def handle_event(:characters, chars, %{expect_chars: true} = state) do 76 | {:ok, %{state | current_string: state.current_string <> preserve_space(state, chars)}} 77 | end 78 | 79 | @impl Saxy.Handler 80 | def handle_event(:characters, _chars, %{expect_chars: false} = state) do 81 | {:ok, state} 82 | end 83 | 84 | ## 85 | defp preserve_space(%{preserve_space: true}, string), do: string 86 | defp preserve_space(%{preserve_space: false}, string), do: String.trim(string) 87 | end 88 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/styles_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.StylesParser do 2 | @moduledoc false 3 | 4 | # Parses SpreadsheetML style definitions. 5 | # 6 | # It extracts the relevant subset of style definitions in order to build 7 | # a `style_types` array which is used to look up the cell value format 8 | # for type conversions. 9 | 10 | @behaviour Saxy.Handler 11 | 12 | alias XlsxReader.{Array, Styles} 13 | alias XlsxReader.Parsers.Utils 14 | 15 | defmodule State do 16 | @moduledoc false 17 | defstruct collect_xf: false, 18 | style_types: [], 19 | custom_formats: %{}, 20 | supported_custom_formats: [] 21 | end 22 | 23 | def parse(xml, supported_custom_formats \\ []) do 24 | with {:ok, state} <- 25 | Saxy.parse_string(xml, __MODULE__, %State{ 26 | supported_custom_formats: supported_custom_formats 27 | }) do 28 | {:ok, state.style_types, state.custom_formats} 29 | end 30 | end 31 | 32 | @impl Saxy.Handler 33 | def handle_event(:start_document, _prolog, state) do 34 | {:ok, state} 35 | end 36 | 37 | @impl Saxy.Handler 38 | def handle_event(:end_document, _data, state) do 39 | {:ok, state} 40 | end 41 | 42 | @impl Saxy.Handler 43 | def handle_event(:start_element, {"numFmt", attributes}, state) do 44 | num_fmt_id = Utils.get_attribute(attributes, "numFmtId") 45 | format_code = Utils.get_attribute(attributes, "formatCode") 46 | {:ok, %{state | custom_formats: Map.put(state.custom_formats, num_fmt_id, format_code)}} 47 | end 48 | 49 | @impl Saxy.Handler 50 | def handle_event(:start_element, {"cellXfs", _attributes}, state) do 51 | {:ok, %{state | collect_xf: true}} 52 | end 53 | 54 | @impl Saxy.Handler 55 | def handle_event(:start_element, {"xf", attributes}, %{collect_xf: true} = state) do 56 | num_fmt_id = Utils.get_attribute(attributes, "numFmtId") 57 | 58 | {:ok, 59 | %{ 60 | state 61 | | style_types: [ 62 | Styles.get_style_type( 63 | num_fmt_id, 64 | state.custom_formats, 65 | state.supported_custom_formats 66 | ) 67 | | state.style_types 68 | ] 69 | }} 70 | end 71 | 72 | @impl Saxy.Handler 73 | def handle_event(:start_element, _element, state) do 74 | {:ok, state} 75 | end 76 | 77 | @impl Saxy.Handler 78 | def handle_event(:end_element, "cellXfs", state) do 79 | {:ok, 80 | %{state | collect_xf: false, style_types: Array.from_list(Enum.reverse(state.style_types))}} 81 | end 82 | 83 | @impl Saxy.Handler 84 | def handle_event(:end_element, _name, state) do 85 | {:ok, state} 86 | end 87 | 88 | @impl Saxy.Handler 89 | def handle_event(:characters, _chars, state) do 90 | {:ok, state} 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/utils.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.Utils do 2 | @moduledoc false 3 | 4 | # Utility functions used by the XML parser modules 5 | 6 | @type xml_attribute :: {String.t(), String.t()} 7 | @type xml_attributes :: [xml_attribute] 8 | 9 | @doc """ 10 | 11 | Get value of attribute by name 12 | 13 | ## Examples 14 | 15 | iex> XlsxReader.Parsers.Utils.get_attribute([{"a", "x"}, {"b", "y"}, {"c", "y"}], "a") 16 | "x" 17 | 18 | iex> XlsxReader.Parsers.Utils.get_attribute([{"a", "x"}, {"b", "y"}, {"c", "y"}], "b") 19 | "y" 20 | 21 | iex> XlsxReader.Parsers.Utils.get_attribute([{"a", "x"}, {"b", "y"}, {"c", "z"}], "c") 22 | "z" 23 | 24 | iex> XlsxReader.Parsers.Utils.get_attribute([{"a", "x"}, {"b", "y"}, {"c", "z"}], "d") 25 | nil 26 | 27 | iex> XlsxReader.Parsers.Utils.get_attribute([{"a", "x"}, {"b", "y"}, {"c", "z"}], "d", "default") 28 | "default" 29 | 30 | """ 31 | @spec get_attribute(xml_attributes(), String.t(), nil | String.t()) :: 32 | nil | String.t() 33 | def get_attribute(attributes, name, default \\ nil) 34 | def get_attribute([], _name, default), do: default 35 | def get_attribute([{name, value} | _], name, _default), do: value 36 | def get_attribute([_ | rest], name, default), do: get_attribute(rest, name, default) 37 | 38 | @doc """ 39 | 40 | Extracts XML attributes into to map based on the given mapping 41 | 42 | ## Examples 43 | 44 | iex> XlsxReader.Parsers.Utils.map_attributes([{"a", "x"}, {"b", "y"}], %{"a" => :foo, "b" => :bar, "c" => :baz}) 45 | %{foo: "x", bar: "y"} 46 | 47 | """ 48 | 49 | @spec map_attributes(xml_attributes(), map(), map()) :: map() 50 | def map_attributes(attributes, mapping, initial \\ %{}) do 51 | Enum.reduce(attributes, initial, fn {name, value}, acc -> 52 | case Map.fetch(mapping, name) do 53 | {:ok, key} -> 54 | Map.put(acc, key, value) 55 | 56 | :error -> 57 | acc 58 | end 59 | end) 60 | end 61 | 62 | @doc """ 63 | 64 | Returns an UTF-8 binary which is the only character encoding supported by the XML parser. 65 | 66 | Converts to UTF-8 from UTF-16BE/LE if a BOM is detected. 67 | 68 | ## Examples 69 | 70 | iex> XlsxReader.Parsers.Utils.ensure_utf8("UTF-8") 71 | {:ok, "UTF-8"} 72 | 73 | iex> XlsxReader.Parsers.Utils.ensure_utf8("\uFEFFUTF-8 with BOM") 74 | {:ok, "UTF-8 with BOM"} 75 | 76 | iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xff, 0xfe, 0x55, 0x00, 0x54, 0x00, 0x46, 0x00, 0x2d, 0x00, 0x31, 0x00, 0x36, 0x00, 0x4c, 0x00, 0x45, 0x00>>) 77 | {:ok, "UTF-16LE"} 78 | 79 | iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xfe, 0xff, 0x00, 0x55, 0x00, 0x54, 0x00, 0x46, 0x00, 0x2d, 0x00, 0x31, 0x00, 0x36, 0x00, 0x42, 0x00, 0x45>>) 80 | {:ok, "UTF-16BE"} 81 | 82 | iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xff, 0xfe, 0x00>>) 83 | {:error, "incomplete UTF-16LE binary"} 84 | 85 | """ 86 | @spec ensure_utf8(binary()) :: {:ok, String.t()} | {:error, String.t()} 87 | def ensure_utf8(string) do 88 | case :unicode.bom_to_encoding(string) do 89 | {:latin1, 0} -> 90 | # No BOM found, assumes UTF-8 91 | {:ok, string} 92 | 93 | {:utf8, bom_length} -> 94 | # BOM found with UTF-8 encoding 95 | {:ok, strip_bom(string, bom_length)} 96 | 97 | {encoding, bom_length} -> 98 | # BOM found with UTF-16/32 encoding given as an {encoding, endianess} tuple 99 | string |> strip_bom(bom_length) |> convert_to_utf8(encoding) 100 | end 101 | end 102 | 103 | defp strip_bom(string, bom_length) do 104 | :binary.part(string, {bom_length, byte_size(string) - bom_length}) 105 | end 106 | 107 | defp convert_to_utf8(string, encoding) do 108 | case :unicode.characters_to_binary(string, encoding) do 109 | utf8 when is_binary(utf8) -> 110 | {:ok, utf8} 111 | 112 | {:error, _, _} -> 113 | {:error, "error converting #{format_encoding(encoding)} binary to UTF-8"} 114 | 115 | {:incomplete, _, _} -> 116 | {:error, "incomplete #{format_encoding(encoding)} binary"} 117 | end 118 | end 119 | 120 | defp format_encoding({:utf16, endianess}), do: "UTF-16#{format_endianess(endianess)}" 121 | defp format_encoding({:utf32, endianess}), do: "UTF-32#{format_endianess(endianess)}" 122 | 123 | def format_endianess(:big), do: "BE" 124 | def format_endianess(:little), do: "LE" 125 | end 126 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/workbook_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.WorkbookParser do 2 | @moduledoc false 3 | 4 | # Parses SpreadsheetML workbooks. 5 | # 6 | # The `workbook.xml` contains the worksheet list and their relationship identifier (`rId`). 7 | # The workbook may also contain a hint regarding the [date system](https://docs.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system) in use. 8 | # 9 | 10 | @behaviour Saxy.Handler 11 | 12 | alias XlsxReader.Conversion 13 | alias XlsxReader.Parsers.Utils 14 | 15 | @doc """ 16 | Parses a workbook XML document. 17 | 18 | ## Options 19 | * `:exclude_hidden_sheets?` - Whether to exclude hidden sheets in the workbook 20 | """ 21 | def parse(xml, options \\ []) do 22 | exclude_hidden_sheets? = Keyword.get(options, :exclude_hidden_sheets?, false) 23 | 24 | Saxy.parse_string(xml, __MODULE__, %XlsxReader.Workbook{ 25 | options: %{exclude_hidden_sheets?: exclude_hidden_sheets?} 26 | }) 27 | end 28 | 29 | @impl Saxy.Handler 30 | def handle_event(:start_document, _prolog, workbook) do 31 | {:ok, workbook} 32 | end 33 | 34 | @impl Saxy.Handler 35 | def handle_event(:end_document, _data, workbook) do 36 | {:ok, %{workbook | base_date: workbook.base_date || Conversion.base_date(1900)}} 37 | end 38 | 39 | @impl Saxy.Handler 40 | def handle_event(:start_element, {"workbookPr", attributes}, workbook) do 41 | {:ok, %{workbook | base_date: attributes |> date_system() |> Conversion.base_date()}} 42 | end 43 | 44 | @impl Saxy.Handler 45 | def handle_event(:start_element, {"sheet", attributes}, workbook) do 46 | is_hidden? = attributes |> Utils.get_attribute("state") === "hidden" 47 | skip_sheet? = workbook.options.exclude_hidden_sheets? && is_hidden? 48 | 49 | case skip_sheet? do 50 | true -> {:ok, workbook} 51 | false -> {:ok, %{workbook | sheets: [build_sheet(attributes) | workbook.sheets]}} 52 | end 53 | end 54 | 55 | @impl Saxy.Handler 56 | def handle_event(:start_element, _element, workbook) do 57 | {:ok, workbook} 58 | end 59 | 60 | @impl Saxy.Handler 61 | def handle_event(:end_element, "sheets", workbook) do 62 | {:ok, %{workbook | sheets: Enum.reverse(workbook.sheets)}} 63 | end 64 | 65 | @impl Saxy.Handler 66 | def handle_event(:end_element, _name, workbook) do 67 | {:ok, workbook} 68 | end 69 | 70 | @impl Saxy.Handler 71 | def handle_event(:characters, _chars, workbook) do 72 | {:ok, workbook} 73 | end 74 | 75 | ## 76 | 77 | @sheet_attributes %{ 78 | "name" => :name, 79 | "r:id" => :rid, 80 | "sheetId" => :sheet_id 81 | } 82 | 83 | defp build_sheet(attributes) do 84 | Utils.map_attributes(attributes, @sheet_attributes, %XlsxReader.Sheet{}) 85 | end 86 | 87 | defp date_system(attributes) do 88 | if Utils.get_attribute(attributes, "date1904", "0") == "1", 89 | do: 1904, 90 | else: 1900 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /lib/xlsx_reader/parsers/worksheet_parser.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.WorksheetParser do 2 | @moduledoc false 3 | 4 | # Parses SpreadsheetML worksheets. 5 | 6 | @behaviour Saxy.Handler 7 | 8 | alias XlsxReader.{Cell, CellReference, Conversion, Number} 9 | alias XlsxReader.Parsers.Utils 10 | 11 | defmodule State do 12 | @moduledoc false 13 | @enforce_keys [:workbook, :type_conversion, :blank_value] 14 | defstruct workbook: nil, 15 | rows: [], 16 | row: nil, 17 | current_row: nil, 18 | expected_row: 1, 19 | expected_column: nil, 20 | cell_ref: nil, 21 | cell_type: nil, 22 | cell_style: nil, 23 | value: nil, 24 | formula: nil, 25 | shared_formula_index: nil, 26 | shared_formulas: :array.new(), 27 | type_conversion: nil, 28 | blank_value: nil, 29 | empty_rows: nil, 30 | number_type: nil, 31 | skip_row?: nil, 32 | cell_data_format: :value 33 | end 34 | 35 | @doc """ 36 | Parse the given worksheet XML in the context of the given workbook. 37 | 38 | ## Options 39 | 40 | * `type_conversion`: boolean (default: `true`) 41 | * `blank_value`: placeholder value for empty cells (default: `""`) 42 | * `empty_rows`: include empty rows (default: `true`) 43 | * `number_type` - type used for numeric conversion : `String` (no conversion), `Integer`, 'Decimal' or `Float` (default: `Float`) 44 | * `skip_row?`: function callback that determines if a row should be skipped or not. 45 | Overwrites `blank_value` and `empty_rows` on the matter of skipping rows. 46 | Defaults to `nil` (keeping the behaviour of `blank_value` and `empty_rows`). 47 | * `cell_data_format`: Controls the format of the cell data. Can be `:value` (default, returns the cell value only) or `:cell` (returns instances of `XlsxReader.Cell`). 48 | 49 | """ 50 | def parse(xml, workbook, options \\ []) do 51 | Saxy.parse_string(xml, __MODULE__, %State{ 52 | workbook: workbook, 53 | type_conversion: Keyword.get(options, :type_conversion, true), 54 | blank_value: Keyword.get(options, :blank_value, ""), 55 | empty_rows: Keyword.get(options, :empty_rows, true), 56 | number_type: Keyword.get(options, :number_type, Float), 57 | skip_row?: Keyword.get(options, :skip_row?), 58 | cell_data_format: Keyword.get(options, :cell_data_format, :value) 59 | }) 60 | end 61 | 62 | @impl Saxy.Handler 63 | def handle_event(:start_document, _prolog, state) do 64 | {:ok, state} 65 | end 66 | 67 | @impl Saxy.Handler 68 | def handle_event(:end_document, _data, state) do 69 | {:ok, state.rows} 70 | end 71 | 72 | @impl Saxy.Handler 73 | def handle_event(:start_element, {"row", attributes}, state) do 74 | # Some XLSX writers (Excel, Elixlsx, …) completely omit `` or `` elements when empty. 75 | # As we build the sheet, we'll keep track of the expected row and column number and 76 | # fill the blanks as needed usingthe coordinates indicated in the row or cell reference. 77 | 78 | current_row = 79 | case Utils.get_attribute(attributes, "r") do 80 | nil -> 81 | state.expected_row 82 | 83 | value -> 84 | String.to_integer(value) 85 | end 86 | 87 | {:ok, 88 | %{ 89 | state 90 | | row: [], 91 | current_row: current_row, 92 | expected_column: 1 93 | }} 94 | end 95 | 96 | def handle_event(:start_element, {"c", attributes}, state) do 97 | {:ok, new_cell(state, extract_cell_attributes(attributes))} 98 | end 99 | 100 | def handle_event(:start_element, {"v", _attributes}, state) do 101 | {:ok, expect_value(state)} 102 | end 103 | 104 | def handle_event(:start_element, {"t", _attributes}, state) do 105 | {:ok, expect_value(state)} 106 | end 107 | 108 | def handle_event(:start_element, {"f", attributes}, state) do 109 | type = Utils.get_attribute(attributes, "t") 110 | ref = Utils.get_attribute(attributes, "ref") 111 | 112 | case {type, ref} do 113 | {"shared", ref} when is_binary(ref) -> 114 | string_index = Utils.get_attribute(attributes, "si") 115 | {:ok, expect_shared_formula(state, string_index)} 116 | 117 | _ -> 118 | {:ok, expect_formula(state)} 119 | end 120 | end 121 | 122 | @impl Saxy.Handler 123 | def handle_event(:start_element, _element, state) do 124 | {:ok, state} 125 | end 126 | 127 | @impl Saxy.Handler 128 | def handle_event(:end_element, "c", state) do 129 | {:ok, add_cell_to_row(state)} 130 | end 131 | 132 | @impl Saxy.Handler 133 | def handle_event(:end_element, "f", %{formula: nil} = state) do 134 | formula = lookup_shared_formula(state, state.shared_formula_index) 135 | {:ok, store_formula(state, formula)} 136 | end 137 | 138 | @impl Saxy.Handler 139 | def handle_event(:end_element, "row", state) do 140 | if skip_row?(state) do 141 | {:ok, skip_row(state)} 142 | else 143 | {:ok, emit_row(state)} 144 | end 145 | end 146 | 147 | @impl Saxy.Handler 148 | def handle_event(:end_element, "sheetData", state) do 149 | {:ok, restore_rows_order(state)} 150 | end 151 | 152 | @impl Saxy.Handler 153 | def handle_event(:end_element, _name, state) do 154 | {:ok, state} 155 | end 156 | 157 | @impl Saxy.Handler 158 | def handle_event(:characters, chars, %{value: :expect_chars} = state) do 159 | {:ok, store_value(state, chars)} 160 | end 161 | 162 | @impl Saxy.Handler 163 | def handle_event(:characters, chars, %{value: :expect_formula} = state) do 164 | {:ok, store_formula(state, chars)} 165 | end 166 | 167 | @impl Saxy.Handler 168 | def handle_event(:characters, chars, %{value: :expect_shared_formula} = state) do 169 | state = store_shared_formula(state, state.shared_formula_index, chars) 170 | {:ok, store_formula(state, chars)} 171 | end 172 | 173 | @impl Saxy.Handler 174 | def handle_event(:characters, _chars, state) do 175 | {:ok, state} 176 | end 177 | 178 | ## 179 | 180 | ## State machine 181 | 182 | defp new_cell(state, cell_attributes) do 183 | state 184 | |> Map.merge(cell_attributes) 185 | |> handle_omitted_cells() 186 | end 187 | 188 | defp expect_value(state) do 189 | %{state | value: :expect_chars} 190 | end 191 | 192 | defp expect_formula(state) do 193 | %{state | value: :expect_formula} 194 | end 195 | 196 | defp expect_shared_formula(state, string_index) do 197 | {:ok, index} = Conversion.to_integer(string_index) 198 | shared_formulas = state.shared_formulas |> XlsxReader.Array.insert(index, nil) 199 | 200 | %{ 201 | state 202 | | value: :expect_shared_formula, 203 | shared_formulas: shared_formulas, 204 | shared_formula_index: index 205 | } 206 | end 207 | 208 | defp store_value(state, value) do 209 | %{state | value: value} 210 | end 211 | 212 | defp store_formula(state, formula) do 213 | %{state | formula: formula} 214 | end 215 | 216 | defp store_shared_formula(state, index, formula) do 217 | shared_formulas = state.shared_formulas |> XlsxReader.Array.insert(index, formula) 218 | %{state | shared_formulas: shared_formulas} 219 | end 220 | 221 | defp add_cell_to_row(state) do 222 | %{ 223 | state 224 | | row: [format_cell_data(state) | state.row], 225 | cell_ref: nil, 226 | cell_type: nil, 227 | value: nil, 228 | formula: nil 229 | } 230 | end 231 | 232 | defp skip_row?(%{skip_row?: skip_row?, row: row}) when is_function(skip_row?) do 233 | skip_row?.(row) 234 | end 235 | 236 | defp skip_row?(%{empty_rows: false} = state), do: empty_row?(state) 237 | 238 | defp skip_row?(_state), do: false 239 | 240 | defp empty_row?(state) do 241 | Enum.all?(state.row, fn value -> value == state.blank_value end) 242 | end 243 | 244 | defp skip_row(state) do 245 | %{state | row: nil, expected_row: state.current_row + 1} 246 | end 247 | 248 | defp emit_row(state) do 249 | state = handle_omitted_rows(state) 250 | row = state.row |> sanitize_row() |> Enum.reverse() 251 | 252 | %{ 253 | state 254 | | row: nil, 255 | rows: [row | state.rows], 256 | expected_row: state.current_row + 1 257 | } 258 | end 259 | 260 | defp sanitize_row(row) do 261 | Enum.map(row, fn 262 | # If the element has no text child node, we didn't receive any :characters event 263 | # and the current value still contains the placeholder used by the parser 264 | :expect_chars -> "" 265 | :expect_formula -> "" 266 | # Otherwise assume that the row contains an actual value 267 | value -> value 268 | end) 269 | end 270 | 271 | defp restore_rows_order(state) do 272 | %{state | rows: Enum.reverse(state.rows)} 273 | end 274 | 275 | ## Omitted rows / cells 276 | 277 | defp handle_omitted_rows(%{empty_rows: true} = state) do 278 | omitted_rows = state.current_row - state.expected_row 279 | 280 | if omitted_rows > 0 do 281 | blank_row = List.duplicate(state.blank_value, length(state.row)) 282 | %{state | rows: prepend_n_times(state.rows, blank_row, omitted_rows)} 283 | else 284 | state 285 | end 286 | end 287 | 288 | defp handle_omitted_rows(state), do: state 289 | 290 | defp handle_omitted_cells(state) do 291 | # Using the current cell reference and the expected column: 292 | # 1. fill any missing cell 293 | # 2. determine the next expected column 294 | with %{cell_ref: cell_ref} when not is_nil(cell_ref) <- state, 295 | {column, _row} <- CellReference.parse(cell_ref) do 296 | omitted_cells = column - state.expected_column 297 | 298 | state 299 | |> add_omitted_cells_to_row(omitted_cells) 300 | |> Map.put(:expected_column, column + 1) 301 | else 302 | _ -> 303 | state 304 | end 305 | end 306 | 307 | defp add_omitted_cells_to_row(state, n) do 308 | %{state | row: prepend_n_times(state.row, state.blank_value, n)} 309 | end 310 | 311 | defp prepend_n_times(list, _value, 0), do: list 312 | 313 | defp prepend_n_times(list, value, n) when n > 0, 314 | do: prepend_n_times([value | list], value, n - 1) 315 | 316 | ## Cell format handling 317 | 318 | defp extract_cell_attributes(attributes) do 319 | # Initialize current cell attributes 320 | Utils.map_attributes( 321 | attributes, 322 | %{ 323 | "r" => :cell_ref, 324 | "s" => :cell_style, 325 | "t" => :cell_type 326 | }, 327 | # Make we start from a blank slate to prevent reusing the previous cell data 328 | # if one of the attribute is missing 329 | %{cell_ref: nil, cell_style: nil, cell_type: nil} 330 | ) 331 | end 332 | 333 | defp format_cell_data(state) do 334 | value = convert_current_cell_value(state) 335 | 336 | case state.cell_data_format do 337 | :cell -> %Cell{value: value, formula: state.formula, ref: state.cell_ref} 338 | :value -> value 339 | _ -> value 340 | end 341 | end 342 | 343 | defp convert_current_cell_value(%State{type_conversion: false} = state) do 344 | case {state.cell_type, state.value} do 345 | {_, nil} -> 346 | state.blank_value 347 | 348 | {"s", value} -> 349 | lookup_shared_string(state, value) 350 | 351 | {_, value} -> 352 | value 353 | end 354 | end 355 | 356 | # credo:disable-for-lines:54 Credo.Check.Refactor.CyclomaticComplexity 357 | defp convert_current_cell_value(%State{type_conversion: true} = state) do 358 | style_type = lookup_current_cell_style_type(state) 359 | 360 | case {state.cell_type, style_type, state.value} do 361 | # Blank 362 | 363 | {_, _, value} when is_nil(value) or value == "" -> 364 | state.blank_value 365 | 366 | # Strings 367 | 368 | {"s", _, value} -> 369 | lookup_shared_string(state, value) 370 | 371 | {"inlineStr", _, value} -> 372 | value 373 | 374 | {nil, :string, value} -> 375 | value 376 | 377 | {"b", _, value} -> 378 | value |> Conversion.to_boolean() |> handle_conversion_error 379 | 380 | # Numbers 381 | 382 | {"n", :date, value} -> 383 | value |> Conversion.to_date(state.workbook.base_date) |> handle_conversion_error() 384 | 385 | {"n", type, value} when type in [:time, :date_time] -> 386 | value |> Conversion.to_date_time(state.workbook.base_date) |> handle_conversion_error() 387 | 388 | {"n", _, value} -> 389 | value |> Conversion.to_number(state.number_type) |> handle_conversion_error() 390 | 391 | {nil, :number, value} -> 392 | value |> Conversion.to_number(state.number_type) |> handle_conversion_error() 393 | 394 | {_, :percentage, value} -> 395 | case Conversion.to_number(value, state.number_type) do 396 | {:ok, number} -> 397 | Number.multiply(number, 100) 398 | 399 | _ -> 400 | "#ERROR" 401 | end 402 | 403 | # Dates/times 404 | 405 | {nil, :date, value} -> 406 | value |> Conversion.to_date(state.workbook.base_date) |> handle_conversion_error() 407 | 408 | {nil, type, value} when type in [:time, :date_time] -> 409 | value |> Conversion.to_date_time(state.workbook.base_date) |> handle_conversion_error() 410 | 411 | # Fall back 412 | 413 | {_, _, value} -> 414 | value 415 | end 416 | end 417 | 418 | defp handle_conversion_error({:ok, value}), do: value 419 | defp handle_conversion_error(_error), do: "#ERROR" 420 | 421 | defp lookup_current_cell_style_type(state) do 422 | if state.cell_style, 423 | do: lookup_index(state.workbook.style_types, state.cell_style), 424 | else: nil 425 | end 426 | 427 | defp lookup_shared_string(state, value) do 428 | lookup_index(state.workbook.shared_strings, value) 429 | end 430 | 431 | defp lookup_shared_formula(state, index) do 432 | state.shared_formulas |> XlsxReader.Array.lookup(index) 433 | end 434 | 435 | defp lookup_index(nil, _string_index), do: nil 436 | 437 | defp lookup_index(table, string_index) do 438 | {:ok, index} = Conversion.to_integer(string_index) 439 | XlsxReader.Array.lookup(table, index) 440 | end 441 | end 442 | -------------------------------------------------------------------------------- /lib/xlsx_reader/sheet.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Sheet do 2 | @moduledoc """ 3 | 4 | Worksheet structure. 5 | 6 | This structure only contains the information useful to identify and retrieve the sheet actual data. 7 | 8 | - `name` - name of the sheet 9 | - `rid` - relationship ID used to retrieve the corresponding sheet in the archive 10 | - `sheet_id` - unique identifier of the sheet withing the workbook (unused by XlsxReader) 11 | 12 | To access the sheet cells, see `XlsxReader.sheet/3` and `XlsxReader.sheets/2`. 13 | 14 | """ 15 | 16 | defstruct [:name, :rid, :sheet_id] 17 | 18 | @typedoc """ 19 | XLSX worksheet metadata 20 | """ 21 | @type t :: %__MODULE__{ 22 | name: String.t(), 23 | rid: String.t(), 24 | sheet_id: String.t() 25 | } 26 | end 27 | -------------------------------------------------------------------------------- /lib/xlsx_reader/styles.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Styles do 2 | @moduledoc """ 3 | Utility functions and types to deal with cell styles 4 | """ 5 | 6 | @typedoc """ 7 | Knwon cell styles for which type conversion is supported 8 | """ 9 | @type known_style_type :: 10 | :string | :number | :percentage | :date | :time | :date_time | :unsupported 11 | @typedoc """ 12 | Cell styles for which type conversion is supported 13 | """ 14 | @type style_type :: known_style_type() | String.t() 15 | @type style_types :: XlsxReader.Array.t(style_type()) 16 | @type custom_formats :: %{optional(String.t()) => String.t()} 17 | @typedoc """ 18 | Matches the text representation of a cell value 19 | """ 20 | @type custom_format_matcher :: String.t() | Regex.t() 21 | @type supported_custom_formats :: [{custom_format_matcher(), known_style_type()}] 22 | 23 | @known_styles %{ 24 | # General 25 | "0" => :number, 26 | # 0 27 | "1" => :number, 28 | # 0.00 29 | "2" => :number, 30 | # #,##0 31 | "3" => :number, 32 | # #,##0.00 33 | "4" => :number, 34 | # $#,##0_);($#,##0) 35 | "5" => :unsupported, 36 | # $#,##0_);[Red]($#,##0) 37 | "6" => :unsupported, 38 | # $#,##0.00_);($#,##0.00) 39 | "7" => :unsupported, 40 | # $#,##0.00_);[Red]($#,##0.00) 41 | "8" => :unsupported, 42 | # 0% 43 | "9" => :percentage, 44 | # 0.00% 45 | "10" => :percentage, 46 | # 0.00E+00 47 | "11" => :number, 48 | # # ?/? 49 | "12" => :unsupported, 50 | # # ??/?? 51 | "13" => :unsupported, 52 | # mm-dd-yy 53 | "14" => :date, 54 | # d-mmm-yy 55 | "15" => :date, 56 | # d-mmm 57 | "16" => :date, 58 | # mmm-yy 59 | "17" => :date, 60 | # h:mm AM/PM 61 | "18" => :time, 62 | # h:mm:ss AM/PM 63 | "19" => :time, 64 | # h:mm 65 | "20" => :time, 66 | # h:mm:ss 67 | "21" => :time, 68 | # m/d/yy h:mm 69 | "22" => :date_time, 70 | # #,##0 ;(#,##0) 71 | "37" => :unsupported, 72 | # #,##0 ;[Red](#,##0) 73 | "38" => :unsupported, 74 | # #,##0.00;(#,##0.00) 75 | "39" => :unsupported, 76 | # #,##0.00;[Red](#,##0.00) 77 | "40" => :unsupported, 78 | # mm:ss 79 | "45" => :time, 80 | # [h]:mm:ss 81 | "46" => :time, 82 | # mmss.0 83 | "47" => :time, 84 | # ##0.0E+0 85 | "48" => :number, 86 | # @ 87 | "49" => :unsupported 88 | } 89 | 90 | @default_supported_custom_formats [ 91 | {"0.0%", :percentage}, 92 | {~r/\Add?\/mm?\/yy(?:yy)\z/, :date}, 93 | {~r/\Add?\/mm?\/yy(?:yy) hh?:mm?\z/, :date_time}, 94 | {"yyyy-mm-dd", :date}, 95 | {~r/\Ayyyy-mm-dd[T\s]hh?:mm:ssZ?\z/, :date_time}, 96 | {"m/d/yyyy", :date}, 97 | {"m/d/yyyy h:mm", :date_time}, 98 | {"hh:mm", :time} 99 | ] 100 | 101 | @doc """ 102 | Guesses the type of a cell based on its style. 103 | 104 | The type is: 105 | 106 | 1. looked-up from a list of "standard" styles, or 107 | 2. guessed from a list of default supported custom formats, or 108 | 3. guessed from a list of user-provided supported custom formats. 109 | 110 | If no type could be guessed, returns `nil`. 111 | 112 | """ 113 | @spec get_style_type(String.t(), custom_formats(), supported_custom_formats()) :: 114 | style_type() | nil 115 | def get_style_type(num_fmt_id, custom_formats \\ %{}, supported_custom_formats \\ []) do 116 | get_known_style(num_fmt_id) || 117 | get_custom_style(num_fmt_id, custom_formats, supported_custom_formats) 118 | end 119 | 120 | defp get_known_style(num_fmt_id), 121 | do: Map.get(@known_styles, num_fmt_id) 122 | 123 | defp get_custom_style(num_fmt_id, custom_formats, supported_custom_formats) do 124 | get_style_type_from_custom_format( 125 | num_fmt_id, 126 | custom_formats, 127 | @default_supported_custom_formats 128 | ) || 129 | get_style_type_from_custom_format( 130 | num_fmt_id, 131 | custom_formats, 132 | supported_custom_formats 133 | ) 134 | end 135 | 136 | defp get_style_type_from_custom_format(num_fmt_id, custom_formats, supported_custom_format) do 137 | custom_formats 138 | |> Map.get(num_fmt_id) 139 | |> custom_format_to_style_type(supported_custom_format) 140 | end 141 | 142 | defp custom_format_to_style_type(nil, _), do: nil 143 | defp custom_format_to_style_type(_custom_format, []), do: nil 144 | 145 | defp custom_format_to_style_type(custom_format, [{%Regex{} = regex, style_type} | others]) do 146 | if Regex.match?(regex, custom_format), 147 | do: style_type, 148 | else: custom_format_to_style_type(custom_format, others) 149 | end 150 | 151 | defp custom_format_to_style_type(custom_format, [{custom_format, style_type} | _others]), 152 | do: style_type 153 | 154 | defp custom_format_to_style_type(custom_format, [_ | others]), 155 | do: custom_format_to_style_type(custom_format, others) 156 | end 157 | -------------------------------------------------------------------------------- /lib/xlsx_reader/workbook.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Workbook do 2 | @moduledoc """ 3 | 4 | Workbook structure. 5 | 6 | - `sheets` - list of sheet metadata 7 | - `rels` - workbook relationships 8 | - `shared_strings` - list of shared strings 9 | - `style_types` - List of types indexed by style 10 | - `custom_formats` - Map of custom formats declared for this document 11 | - `base_date` - base date for all serial dates in the workbook 12 | - `options` - Map of options for the workbook. Currently includes: 13 | - `exclude_hidden_sheets?`: Whether to exclude hidden sheets in the workbook 14 | 15 | """ 16 | 17 | defstruct sheets: [], 18 | rels: nil, 19 | shared_strings: nil, 20 | style_types: nil, 21 | custom_formats: nil, 22 | base_date: nil, 23 | options: %{ 24 | exclude_hidden_sheets?: false 25 | } 26 | 27 | @typedoc """ 28 | XLSX workbook 29 | """ 30 | @type t :: %__MODULE__{ 31 | sheets: [XlsxReader.Sheet.t()], 32 | rels: nil | map(), 33 | shared_strings: nil | XlsxReader.Array.t(String.t()), 34 | style_types: nil | XlsxReader.Styles.style_types(), 35 | custom_formats: map(), 36 | base_date: nil | Date.t(), 37 | options: %{exclude_hidden_sheets?: boolean()} 38 | } 39 | end 40 | -------------------------------------------------------------------------------- /lib/xlsx_reader/zip_archive.ex: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.ZipArchive do 2 | @moduledoc false 3 | 4 | # Zip archive utility functions. 5 | # 6 | # To allow for transparent handling of archives located on disk or in memory, 7 | # you must first obtain a handle with `handle/2` which can then be used to 8 | # access the contents of the archive. 9 | 10 | @type source :: String.t() | binary() 11 | @type source_type :: :path | :binary 12 | @type zip_handle :: {:path, String.t()} | {:binary, binary()} 13 | 14 | @doc """ 15 | 16 | Returns a `zip_handle` to be used by `list/1` and `extract/2` 17 | 18 | """ 19 | @spec handle(source(), source_type()) :: zip_handle() 20 | def handle(source, type) when type in [:path, :binary], 21 | do: {type, source} 22 | 23 | @doc """ 24 | 25 | Lists the content of the archive. 26 | 27 | """ 28 | @spec list(zip_handle()) :: {:ok, [String.t()]} | XlsxReader.error() 29 | def list(zip_handle) do 30 | with {:ok, zip} <- source(zip_handle), 31 | {:ok, entries} <- :zip.list_dir(zip) do 32 | {:ok, collect_files(entries)} 33 | else 34 | error -> 35 | translate_zip_error(error) 36 | end 37 | end 38 | 39 | @doc """ 40 | 41 | Extracts a file from the archive 42 | 43 | """ 44 | @spec extract(zip_handle(), String.t()) :: {:ok, binary()} | XlsxReader.error() 45 | def extract(zip_handle, file) do 46 | with {:ok, zip} <- source(zip_handle), 47 | {:ok, [{_, contents}]} <- :zip.extract(zip, extract_options(file)) do 48 | {:ok, contents} 49 | else 50 | {:ok, []} -> 51 | {:error, "file #{inspect(file)} not found in archive"} 52 | 53 | error -> 54 | translate_zip_error(error) 55 | end 56 | end 57 | 58 | ## 59 | 60 | defp source({:path, path}) do 61 | {:ok, String.to_charlist(path)} 62 | end 63 | 64 | defp source({:binary, binary}) do 65 | {:ok, binary} 66 | end 67 | 68 | defp collect_files(entries) do 69 | entries 70 | |> Enum.reduce([], fn entry, acc -> 71 | case entry do 72 | {:zip_file, path, _, _, _, _} -> 73 | [to_string(path) | acc] 74 | 75 | _ -> 76 | acc 77 | end 78 | end) 79 | |> Enum.sort() 80 | end 81 | 82 | def extract_options(file) do 83 | [{:file_list, [String.to_charlist(file)]}, :memory] 84 | end 85 | 86 | defp translate_zip_error({:error, :enoent}) do 87 | {:error, "file not found"} 88 | end 89 | 90 | defp translate_zip_error({:error, code}) 91 | when code in [:einval, :bad_eocd, :bad_central_directory, :eisdir] do 92 | {:error, "invalid zip file"} 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :xlsx_reader, 7 | version: "0.8.8", 8 | elixir: "~> 1.10", 9 | start_permanent: Mix.env() == :prod, 10 | deps: deps(), 11 | description: description(), 12 | package: package(), 13 | dialyzer: [ 14 | plt_ignore_apps: [:saxy] 15 | ], 16 | # Docs 17 | homepage_url: "https://github.com/xavier/xlsx_reader", 18 | source_url: "https://github.com/xavier/xlsx_reader", 19 | docs: [ 20 | main: "XlsxReader", 21 | logo: "assets/logo.png", 22 | extras: ["README.md"] 23 | ] 24 | ] 25 | end 26 | 27 | # Run "mix help compile.app" to learn about applications. 28 | def application do 29 | [ 30 | extra_applications: [:logger] 31 | ] 32 | end 33 | 34 | # Run "mix help deps" to learn about dependencies. 35 | defp deps do 36 | [ 37 | {:saxy, "~> 1.5"}, 38 | {:credo, "~> 1.4.0", only: [:dev, :test], runtime: false}, 39 | {:decimal, "~> 1.0 or ~> 2.0", optional: true}, 40 | {:dialyxir, "~> 1.0.0", only: :dev, runtime: false}, 41 | {:ex_doc, "~> 0.22", only: :dev, runtime: false}, 42 | {:elixlsx, "~> 0.4.0", only: [:dev, :test], runtime: false}, 43 | {:benchee, "~> 1.0", only: :dev} 44 | ] 45 | end 46 | 47 | defp description do 48 | "XLSX file reader for Elixir" 49 | end 50 | 51 | defp package do 52 | %{ 53 | licenses: ["Apache-2.0"], 54 | links: %{"GitHub" => "https://github.com/xavier/xlsx_reader"} 55 | } 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "benchee": {:hex, :benchee, "1.0.1", "66b211f9bfd84bd97e6d1beaddf8fc2312aaabe192f776e8931cb0c16f53a521", [:mix], [{:deep_merge, "~> 1.0", [hex: :deep_merge, repo: "hexpm", optional: false]}], "hexpm", "3ad58ae787e9c7c94dd7ceda3b587ec2c64604563e049b2a0e8baafae832addb"}, 3 | "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm", "7af5c7e09fe1d40f76c8e4f9dd2be7cebd83909f31fee7cd0e9eadc567da8353"}, 4 | "credo": {:hex, :credo, "1.4.0", "92339d4cbadd1e88b5ee43d427b639b68a11071b6f73854e33638e30a0ea11f5", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "1fd3b70dce216574ce3c18bdf510b57e7c4c85c2ec9cad4bff854abaf7e58658"}, 5 | "decimal": {:hex, :decimal, "1.8.1", "a4ef3f5f3428bdbc0d35374029ffcf4ede8533536fa79896dd450168d9acdf3c", [:mix], [], "hexpm", "3cb154b00225ac687f6cbd4acc4b7960027c757a5152b369923ead9ddbca7aec"}, 6 | "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm", "ce708e5f094b9cd4e8f2be4f00d2f4250c4095be93f8cd6d018c753894885430"}, 7 | "dialyxir": {:hex, :dialyxir, "1.0.0", "6a1fa629f7881a9f5aaf3a78f094b2a51a0357c843871b8bc98824e7342d00a5", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "aeb06588145fac14ca08d8061a142d52753dbc2cf7f0d00fc1013f53f8654654"}, 8 | "earmark": {:hex, :earmark, "1.4.5", "62ffd3bd7722fb7a7b1ecd2419ea0b458c356e7168c1f5d65caf09b4fbdd13c8", [:mix], [], "hexpm"}, 9 | "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, 10 | "elixlsx": {:hex, :elixlsx, "0.4.2", "37dd3ada960eb1fd32ddb8736a984787a9a22734ebb305cde0c437805938dee3", [:mix], [], "hexpm", "6e42e53f193275adeafd1252f8c07cb0dabf50da356b4d75b12a8ed8e5322733"}, 11 | "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, 12 | "ex_doc": {:hex, :ex_doc, "0.32.1", "21e40f939515373bcdc9cffe65f3b3543f05015ac6c3d01d991874129d173420", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5142c9db521f106d61ff33250f779807ed2a88620e472ac95dc7d59c380113da"}, 13 | "jason": {:hex, :jason, "1.2.0", "10043418c42d2493d0ee212d3fddd25d7ffe484380afad769a0a38795938e448", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "116747dbe057794c3a3e4e143b7c8390b29f634e16c78a7f59ba75bfa6852e7f"}, 14 | "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, 15 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 16 | "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, 17 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 18 | "saxy": {:hex, :saxy, "1.5.0", "0141127f2d042856f135fb2d94e0beecda7a2306f47546dbc6411fc5b07e28bf", [:mix], [], "hexpm", "ea7bb6328fbd1f2aceffa3ec6090bfb18c85aadf0f8e5030905e84235861cf89"}, 19 | } 20 | -------------------------------------------------------------------------------- /test/compatibility_test.exs: -------------------------------------------------------------------------------- 1 | # credo:disable-for-this-file Credo.Check.Readability.LargeNumbers 2 | defmodule CompatibilityTest do 3 | use ExUnit.Case 4 | 5 | describe "elixlsx" do 6 | test "file generated by elixlsx" do 7 | test_row = [ 8 | "string1", 9 | "", 10 | nil, 11 | :empty, 12 | "string1", 13 | "string2", 14 | 123, 15 | true, 16 | false 17 | ] 18 | 19 | workbook = %Elixlsx.Workbook{ 20 | sheets: [ 21 | %Elixlsx.Sheet{ 22 | name: "sheet1", 23 | rows: [ 24 | test_row 25 | ] 26 | }, 27 | %Elixlsx.Sheet{name: "sheet2", rows: []} 28 | ] 29 | } 30 | 31 | assert {:ok, {_filename, zip_binary}} = Elixlsx.write_to_memory(workbook, "test.xlsx") 32 | 33 | assert {:ok, package} = XlsxReader.open(zip_binary, source: :binary) 34 | 35 | assert ["sheet1", "sheet2"] = XlsxReader.sheet_names(package) 36 | 37 | assert {:ok, 38 | [ 39 | [ 40 | "string1", 41 | "", 42 | nil, 43 | nil, 44 | "string1", 45 | "string2", 46 | 123.0, 47 | true, 48 | false 49 | ] 50 | ]} = XlsxReader.sheet(package, "sheet1", blank_value: nil) 51 | 52 | assert {:ok, []} = XlsxReader.sheet(package, "sheet2") 53 | end 54 | 55 | test "sheets with dates and datetimes" do 56 | s1 = 57 | Elixlsx.Sheet.with_name("1") 58 | |> Elixlsx.Sheet.set_cell("A1", {{2015, 11, 30}, {21, 20, 38}}, datetime: true) 59 | 60 | s2 = 61 | Elixlsx.Sheet.with_name("2") 62 | |> Elixlsx.Sheet.set_cell("A1", {{2015, 11, 30}, {21, 20, 38}}, yyyymmdd: true) 63 | 64 | wk = %Elixlsx.Workbook{sheets: [s1, s2]} 65 | 66 | {:ok, {_filename, bin}} = Elixlsx.write_to_memory(wk, "test.xlsx") 67 | {:ok, package} = XlsxReader.open(bin, source: :binary) 68 | {:ok, sheets} = XlsxReader.sheets(package) 69 | 70 | assert [{"1", [[~N[2015-11-30 21:20:38]]]}, {"2", [[~D[2015-11-30]]]}] = sheets 71 | end 72 | end 73 | 74 | test "google_spreadsheet.xlsx" do 75 | assert {:ok, package} = 76 | XlsxReader.open( 77 | TestFixtures.path("google_spreadsheet.xlsx"), 78 | supported_custom_formats: [{"mmmm d", :date}] 79 | ) 80 | 81 | assert ["Sheet1"] = XlsxReader.sheet_names(package) 82 | 83 | assert {:ok, 84 | [ 85 | ["integer", 123.0], 86 | ["float", 123.456], 87 | ["percentage", 12.5], 88 | ["date", ~D[2019-11-15]], 89 | ["time", ~N[1899-12-30 11:45:00]], 90 | ["ticked\n", true], 91 | ["unticked", false], 92 | ["image", ""] 93 | ]} = XlsxReader.sheet(package, "Sheet1") 94 | end 95 | 96 | test "merged.xlsx" do 97 | assert {:ok, package} = XlsxReader.open(TestFixtures.path("merged.xlsx")) 98 | 99 | assert ["merged"] = XlsxReader.sheet_names(package) 100 | 101 | assert {:ok, 102 | [ 103 | ["horizontal", "", "vertical"], 104 | ["horizontal + vertical", "", ""], 105 | ["", "", "none"] 106 | ]} = XlsxReader.sheet(package, "merged") 107 | end 108 | 109 | test "file with omitted row elements" do 110 | assert {:ok, package} = XlsxReader.open(TestFixtures.path("omitted_row.xlsx")) 111 | 112 | assert {:ok, [["", ""], ["", "b2"]]} = XlsxReader.sheet(package, "Sheet1", empty_rows: true) 113 | assert {:ok, [["", "b2"]]} = XlsxReader.sheet(package, "Sheet1", empty_rows: false) 114 | end 115 | 116 | test "cells with missing attributes" do 117 | # The worksheet has a mix of "s", "t" and no attributes 118 | assert {:ok, package} = XlsxReader.open(TestFixtures.path("cells_missing_attributes.xlsx")) 119 | 120 | assert { 121 | :ok, 122 | [ 123 | [ 124 | "Number", 125 | "Name", 126 | "Effective Date", 127 | "ID Number", 128 | "Operator", 129 | "County", 130 | "State", 131 | "Product", 132 | "Description", 133 | "Acres" 134 | ], 135 | [ 136 | "Number", 137 | "Name", 138 | # Date with s="5" is converted 139 | ~D[2024-07-01], 140 | # This cell has no attributes at all, it's treated as a string 141 | # and not converted to a date because all cell attributes are 142 | # properly reset 143 | "5575789630", 144 | "Operator", 145 | "County", 146 | "State", 147 | "Product", 148 | "Description", 149 | "Acres" 150 | ] 151 | ] 152 | } = XlsxReader.sheet(package, "Sheet 1") 153 | end 154 | end 155 | -------------------------------------------------------------------------------- /test/fixtures/cells_missing_attributes.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/cells_missing_attributes.xlsx -------------------------------------------------------------------------------- /test/fixtures/custom_dates.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/custom_dates.xlsx -------------------------------------------------------------------------------- /test/fixtures/google_spreadsheet.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/google_spreadsheet.xlsx -------------------------------------------------------------------------------- /test/fixtures/has_formulas.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/has_formulas.xlsx -------------------------------------------------------------------------------- /test/fixtures/hidden_sheets.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/hidden_sheets.xlsx -------------------------------------------------------------------------------- /test/fixtures/merged.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/merged.xlsx -------------------------------------------------------------------------------- /test/fixtures/not_a_zip.zip: -------------------------------------------------------------------------------- 1 | PKBOGUS 2 | -------------------------------------------------------------------------------- /test/fixtures/omitted_row.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/omitted_row.xlsx -------------------------------------------------------------------------------- /test/fixtures/package/[Content_Types].xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /test/fixtures/package/_rels/.rels: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /test/fixtures/package/docProps/app.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /test/fixtures/package/docProps/core.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/_rels/workbook.xml.rels: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/sharedStrings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | A 5 | 6 | 7 | B 8 | 9 | 10 | C 11 | 12 | 13 | D 14 | 15 | 16 | E 17 | 18 | 19 | F 20 | 21 | 22 | G 23 | 24 | 25 | some 26 | 27 | 28 | test 29 | 30 | 31 | date 32 | 33 | 34 | datetime 35 | 36 | 37 | time 38 | 39 | 40 | percentage 41 | 42 | 43 | money chf 44 | 45 | 46 | money usd 47 | 48 | 49 | 9999,99 USD 50 | 51 | 52 | ticked 53 | 54 | 55 | not ticked 56 | 57 | 58 | hyperlink 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | https://elixir-lang.org/ 69 | 70 | 71 | 72 | 73 | 74 | 75 | - 76 | 77 | 78 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/styles.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/theme/theme1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | 878 | 879 | 880 | 881 | 882 | 883 | 884 | 885 | 886 | 887 | 888 | 889 | 890 | 891 | 892 | 893 | 894 | 895 | 896 | 897 | 898 | 899 | 900 | 901 | 902 | 903 | 904 | 905 | 906 | 907 | 908 | 909 | 910 | 911 | 912 | 913 | 914 | 915 | 916 | 917 | 918 | 919 | 920 | 921 | 922 | 923 | 924 | 925 | 926 | 927 | 928 | 929 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | 941 | 942 | 943 | 944 | 945 | 946 | 947 | 948 | 949 | 950 | 951 | 952 | 953 | 954 | 955 | 956 | 957 | 958 | 959 | 960 | 961 | 962 | 963 | 964 | 965 | 966 | 967 | 968 | 969 | 970 | 971 | 972 | 973 | 974 | 975 | 976 | 977 | 978 | 979 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | 988 | 989 | 990 | 991 | 992 | 993 | 994 | 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 1010 | 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | 1022 | 1023 | 1024 | 1025 | 1026 | 1027 | 1028 | 1029 | 1030 | 1031 | 1032 | 1033 | 1034 | 1035 | 1036 | 1037 | 1038 | 1039 | 1040 | 1041 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 | 1049 | 1050 | 1051 | 1052 | 1053 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/workbook.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/_rels/sheet3.xml.rels: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/sheet1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 0 16 | 17 | 18 | 1 19 | 20 | 21 | 2 22 | 23 | 24 | 3 25 | 26 | 27 | 4 28 | 29 | 30 | 5 31 | 32 | 33 | 6 34 | 35 | 36 | 37 | 38 | 1 39 | 40 | 41 | 2 42 | 43 | 44 | 3 45 | 46 | 47 | 4 48 | 49 | 50 | 5 51 | 52 | 53 | 6 54 | 55 | 56 | 7 57 | 58 | 59 | 60 | 61 | 2 62 | 63 | 64 | $A3*B2 65 | 4 66 | 67 | 68 | $A3*C2 69 | 6 70 | 71 | 72 | $A3*D2 73 | 8 74 | 75 | 76 | $A3*E2 77 | 10 78 | 79 | 80 | $A3*F2 81 | 12 82 | 83 | 84 | $A3*G2 85 | 14 86 | 87 | 88 | 89 | 90 | 3 91 | 92 | 93 | $A4*B3 94 | 12 95 | 96 | 97 | $A4*C3 98 | 18 99 | 100 | 101 | $A4*D3 102 | 24 103 | 104 | 105 | $A4*E3 106 | 30 107 | 108 | 109 | $A4*F3 110 | 36 111 | 112 | 113 | $A4*G3 114 | 42 115 | 116 | 117 | 118 | 119 | 4 120 | 121 | 122 | $A5*B4 123 | 48 124 | 125 | 126 | $A5*C4 127 | 72 128 | 129 | 130 | $A5*D4 131 | 96 132 | 133 | 134 | $A5*E4 135 | 120 136 | 137 | 138 | $A5*F4 139 | 144 140 | 141 | 142 | $A5*G4 143 | 168 144 | 145 | 146 | 147 | 148 | 5 149 | 150 | 151 | $A6*B5 152 | 240 153 | 154 | 155 | $A6*C5 156 | 360 157 | 158 | 159 | $A6*D5 160 | 480 161 | 162 | 163 | $A6*E5 164 | 600 165 | 166 | 167 | $A6*F5 168 | 720 169 | 170 | 171 | $A6*G5 172 | 840 173 | 174 | 175 | 176 | 177 | 6 178 | 179 | 180 | $A7*B6 181 | 1440 182 | 183 | 184 | $A7*C6 185 | 2160 186 | 187 | 188 | $A7*D6 189 | 2880 190 | 191 | 192 | $A7*E6 193 | 3600 194 | 195 | 196 | $A7*F6 197 | 4320 198 | 199 | 200 | $A7*G6 201 | 5040 202 | 203 | 204 | 205 | 206 | 7 207 | 208 | 209 | $A8*B7 210 | 10080 211 | 212 | 213 | $A8*C7 214 | 15120 215 | 216 | 217 | $A8*D7 218 | 20160 219 | 220 | 221 | $A8*E7 222 | 25200 223 | 224 | 225 | $A8*F7 226 | 30240 227 | 228 | 229 | $A8*G7 230 | 35280 231 | 232 | 233 | 234 | 235 | 8 236 | 237 | 238 | $A9*B8 239 | 80640 240 | 241 | 242 | $A9*C8 243 | 120960 244 | 245 | 246 | $A9*D8 247 | 161280 248 | 249 | 250 | $A9*E8 251 | 201600 252 | 253 | 254 | $A9*F8 255 | 241920 256 | 257 | 258 | $A9*G8 259 | 282240 260 | 261 | 262 | 263 | 264 | 9 265 | 266 | 267 | $A10*B9 268 | 725760 269 | 270 | 271 | $A10*C9 272 | 1088640 273 | 274 | 275 | $A10*D9 276 | 1451520 277 | 278 | 279 | $A10*E9 280 | 1814400 281 | 282 | 283 | $A10*F9 284 | 2177280 285 | 286 | 287 | $A10*G9 288 | 2540160 289 | 290 | 291 | 292 | 293 | 10 294 | 295 | 296 | $A11*B10 297 | 7257600 298 | 299 | 300 | $A11*C10 301 | 10886400 302 | 303 | 304 | $A11*D10 305 | 14515200 306 | 307 | 308 | $A11*E10 309 | 18144000 310 | 311 | 312 | $A11*F10 313 | 21772800 314 | 315 | 316 | $A11*G10 317 | 25401600 318 | 319 | 320 | 321 | 322 | 11 323 | 324 | 325 | $A12*B11 326 | 79833600 327 | 328 | 329 | $A12*C11 330 | 119750400 331 | 332 | 333 | $A12*D11 334 | 159667200 335 | 336 | 337 | $A12*E11 338 | 199584000 339 | 340 | 341 | $A12*F11 342 | 239500800 343 | 344 | 345 | $A12*G11 346 | 279417600 347 | 348 | 349 | 350 | 351 | 12 352 | 353 | 354 | $A13*B12 355 | 958003200 356 | 357 | 358 | $A13*C12 359 | 1437004800 360 | 361 | 362 | $A13*D12 363 | 1916006400 364 | 365 | 366 | $A13*E12 367 | 2395008000 368 | 369 | 370 | $A13*F12 371 | 2874009600 372 | 373 | 374 | $A13*G12 375 | 3353011200 376 | 377 | 378 | 379 | 380 | 13 381 | 382 | 383 | $A14*B13 384 | 12454041600 385 | 386 | 387 | $A14*C13 388 | 18681062400 389 | 390 | 391 | $A14*D13 392 | 24908083200 393 | 394 | 395 | $A14*E13 396 | 31135104000 397 | 398 | 399 | $A14*F13 400 | 37362124800 401 | 402 | 403 | $A14*G13 404 | 43589145600 405 | 406 | 407 | 408 | 409 | 14 410 | 411 | 412 | $A15*B14 413 | 174356582400 414 | 415 | 416 | $A15*C14 417 | 261534873600 418 | 419 | 420 | $A15*D14 421 | 348713164800 422 | 423 | 424 | $A15*E14 425 | 435891456000 426 | 427 | 428 | $A15*F14 429 | 523069747200 430 | 431 | 432 | $A15*G14 433 | 610248038400 434 | 435 | 436 | 437 | 438 | 15 439 | 440 | 441 | $A16*B15 442 | 2615348736000 443 | 444 | 445 | $A16*C15 446 | 3923023104000 447 | 448 | 449 | $A16*D15 450 | 5230697472000 451 | 452 | 453 | $A16*E15 454 | 6538371840000 455 | 456 | 457 | $A16*F15 458 | 7846046208000 459 | 460 | 461 | $A16*G15 462 | 9153720576000 463 | 464 | 465 | 466 | 467 | 16 468 | 469 | 470 | $A17*B16 471 | 41845579776000 472 | 473 | 474 | $A17*C16 475 | 62768369664000 476 | 477 | 478 | $A17*D16 479 | 83691159552000 480 | 481 | 482 | $A17*E16 483 | 104613949440000 484 | 485 | 486 | $A17*F16 487 | 125536739328000 488 | 489 | 490 | $A17*G16 491 | 146459529216000 492 | 493 | 494 | 495 | 496 | 17 497 | 498 | 499 | $A18*B17 500 | 711374856192000 501 | 502 | 503 | $A18*C17 504 | 1067062284288000 505 | 506 | 507 | $A18*D17 508 | 1422749712384000 509 | 510 | 511 | $A18*E17 512 | 1778437140480000 513 | 514 | 515 | $A18*F17 516 | 2134124568576000 517 | 518 | 519 | $A18*G17 520 | 2489811996672000 521 | 522 | 523 | 524 | 525 | 18 526 | 527 | 528 | $A19*B18 529 | 1.2804747411456e+16 530 | 531 | 532 | $A19*C18 533 | 1.9207121117184e+16 534 | 535 | 536 | $A19*D18 537 | 2.5609494822912e+16 538 | 539 | 540 | $A19*E18 541 | 3.201186852864e+16 542 | 543 | 544 | $A19*F18 545 | 3.8414242234368e+16 546 | 547 | 548 | $A19*G18 549 | 4.4816615940096e+16 550 | 551 | 552 | 553 | 554 | 19 555 | 556 | 557 | $A20*B19 558 | 2.43290200817664e+17 559 | 560 | 561 | $A20*C19 562 | 3.64935301226496e+17 563 | 564 | 565 | $A20*D19 566 | 4.86580401635328e+17 567 | 568 | 569 | $A20*E19 570 | 6.0822550204416e+17 571 | 572 | 573 | $A20*F19 574 | 7.29870602452992e+17 575 | 576 | 577 | $A20*G19 578 | 8.51515702861824e+17 579 | 580 | 581 | 582 | 583 | 20 584 | 585 | 586 | $A21*B20 587 | 4.86580401635328e+18 588 | 589 | 590 | $A21*C20 591 | 7.29870602452992e+18 592 | 593 | 594 | $A21*D20 595 | 9.73160803270656e+18 596 | 597 | 598 | $A21*E20 599 | 1.21645100408832e+19 600 | 601 | 602 | $A21*F20 603 | 1.45974120490598e+19 604 | 605 | 606 | $A21*G20 607 | 1.70303140572365e+19 608 | 609 | 610 | 611 | 612 | 21 613 | 614 | 615 | $A22*B21 616 | 1.02181884343419e+20 617 | 618 | 619 | $A22*C21 620 | 1.53272826515128e+20 621 | 622 | 623 | $A22*D21 624 | 2.04363768686838e+20 625 | 626 | 627 | $A22*E21 628 | 2.55454710858547e+20 629 | 630 | 631 | $A22*F21 632 | 3.06545653030256e+20 633 | 634 | 635 | $A22*G21 636 | 3.57636595201967e+20 637 | 638 | 639 | 640 | 641 | 642 | 643 | &C&"Helvetica Neue,Regular"&12&K000000&P 644 | 645 | 646 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/sheet2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 7 26 | 27 | 28 | 8 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | &C&"Helvetica Neue,Regular"&12&K000000&P 95 | 96 | 97 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/sheet3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 9 24 | 25 | 26 | DATE(2019,11,15) 27 | 43784 28 | 29 | 30 | 31 | 32 | 10 33 | 34 | 35 | NOW() 36 | 43793.462650462963 37 | 38 | 39 | 40 | 41 | 11 42 | 43 | 44 | TIME(18,45,12) 45 | 1462.781388888889 46 | 47 | 48 | 49 | 50 | 12 51 | 52 | 53 | 0.125 54 | 55 | 56 | 57 | 58 | 13 59 | 60 | 61 | 100 62 | 63 | 64 | 65 | 66 | 14 67 | 68 | 69 | 15 70 | 71 | 72 | 73 | 74 | 16 75 | 76 | 77 | 1 78 | 79 | 80 | 81 | 82 | 17 83 | 84 | 85 | 0 86 | 87 | 88 | 89 | 90 | 18 91 | 92 | 93 | 19 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | &C&"Helvetica Neue,Regular"&12&K000000&P 104 | 105 | 106 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/sheet4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 28 | 42 | 43 | 44 | 45 | 46 | 47 | &C&"Times New Roman,Regular"&12&A 48 | &C&"Times New Roman,Regular"&12Página &P 49 | 50 | -------------------------------------------------------------------------------- /test/fixtures/package/xl/worksheets/sheet5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | Hello 21 | 22 | 0.0 23 | 24 | 25 | 26 | 27 | 28 | &C&"Helvetica Neue,Regular"&12&K000000&P 29 | 30 | 31 | -------------------------------------------------------------------------------- /test/fixtures/test.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/test.xlsx -------------------------------------------------------------------------------- /test/fixtures/test.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xavier/xlsx_reader/411a48e8234286d89e5a64fef18918e0b10e437c/test/fixtures/test.zip -------------------------------------------------------------------------------- /test/fixtures/xml/sharedStringsWithRichText.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Cell A1 6 | 7 | 8 | Cell B1 9 | 10 | 11 | My Cell 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | Cell 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | A2 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | Cell 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | B2 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /test/fixtures/xml/sharedStringsWithXmlSpacePreserve.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | with spaces 5 | 6 | 7 | without spaces 8 | 9 | 10 | without spaces 11 | 12 | 13 | -------------------------------------------------------------------------------- /test/fixtures/xml/worksheetWithInlineStr.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | inline string 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /test/fixtures/xml/worksheetWithSharedFormulas.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 1 8 | 9 | 10 | 11 | 12 | SUM(A1:A3) 13 | 14 | 15 | 6 16 | 17 | 18 | 19 | 20 | 21 | 22 | 2 23 | 24 | 25 | 26 | 27 | 28 | 6 29 | 30 | 31 | 32 | 33 | 34 | 35 | 3 36 | 37 | 38 | 39 | 40 | 41 | 6 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | 3 | defmodule TestFixtures do 4 | def read!(relative_path) do 5 | relative_path 6 | |> path 7 | |> File.read!() 8 | end 9 | 10 | def path(relative_path) do 11 | Path.join([__DIR__, "fixtures", relative_path]) 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /test/xlsx_reader/cell_reference_test.exs: -------------------------------------------------------------------------------- 1 | # credo:disable-for-this-file Credo.Check.Warning.OperationWithConstantResult 2 | defmodule XlsxReader.CellReferenceTest do 3 | use ExUnit.Case 4 | 5 | alias XlsxReader.CellReference 6 | 7 | describe ".parse/1" do 8 | test "returns a {col, row} tuple" do 9 | assert {1, 1} == CellReference.parse("A1") 10 | assert {1, 42} == CellReference.parse("A42") 11 | assert {2, 1} == CellReference.parse("B1") 12 | assert {26, 3} == CellReference.parse("Z3") 13 | assert {26 * 1 + 1, 123} == CellReference.parse("AA123") 14 | assert {26 * 1 + 26, 123} == CellReference.parse("AZ123") 15 | assert {26 * 2 + 1, 123} == CellReference.parse("BA123") 16 | assert {26 * 26 + 1, 123} == CellReference.parse("ZA123") 17 | assert {26 * 26 + 26, 123} == CellReference.parse("ZZ123") 18 | assert {26 * 26 * 1 + 26 * 1 + 1, 123_456} == CellReference.parse("AAA123456") 19 | assert {26 * 26 * 1 + 26 * 1 + 26, 123_456} == CellReference.parse("AAZ123456") 20 | assert {26 * 26 * 1 + 26 * 2 + 1, 123_456} == CellReference.parse("ABA123456") 21 | assert {26 * 26 * 2 + 26 * 26 + 1, 123_456} == CellReference.parse("BZA123456") 22 | end 23 | 24 | test "returns error if the reference is invalid" do 25 | assert :error == CellReference.parse("1A") 26 | assert :error == CellReference.parse("$A1") 27 | assert :error == CellReference.parse("A$1") 28 | assert :error == CellReference.parse("$A$1") 29 | assert :error == CellReference.parse("bogus") 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /test/xlsx_reader/conversion_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.ConversionTest do 2 | use ExUnit.Case 3 | doctest XlsxReader.Conversion 4 | end 5 | -------------------------------------------------------------------------------- /test/xlsx_reader/custom_dates_test.exs: -------------------------------------------------------------------------------- 1 | defmodule CustomDatesTest do 2 | @moduledoc """ 3 | Ensure that various custom date and 4 | datetime formats are parsed into 5 | Date or NaiveDateTime as appropriate. 6 | """ 7 | 8 | use ExUnit.Case 9 | 10 | test "custom_dates.xlsx" do 11 | assert {:ok, package} = XlsxReader.open(TestFixtures.path("custom_dates.xlsx")) 12 | 13 | assert ["Sheet1"] = XlsxReader.sheet_names(package) 14 | 15 | assert {:ok, 16 | [ 17 | ["ISO8601 Date", ~D[2020-05-01]], 18 | ["ISO8601 Datetime", ~N[2020-05-01 12:45:59]], 19 | ["US Date", ~D[2020-05-01]], 20 | ["US Date", ~D[2020-12-31]], 21 | ["US Datetime", ~N[2020-05-01 01:23:00]], 22 | ["US Datetime", ~N[2020-05-01 12:23:00]] 23 | ]} = XlsxReader.sheet(package, "Sheet1") 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /test/xlsx_reader/package_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.PackageLoaderTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.{PackageLoader, ZipArchive} 5 | 6 | describe "open/1" do 7 | test "opens a xlsx file" do 8 | zip_handle = ZipArchive.handle(TestFixtures.path("test.xlsx"), :path) 9 | 10 | assert {:ok, package} = PackageLoader.open(zip_handle) 11 | 12 | assert package.zip_handle == zip_handle 13 | end 14 | 15 | test "rejects non-xlsx file" do 16 | zip_handle = ZipArchive.handle(TestFixtures.path("test.zip"), :path) 17 | 18 | assert {:error, "invalid xlsx file"} = PackageLoader.open(zip_handle) 19 | end 20 | 21 | test "rejects non-zip file" do 22 | zip_handle = ZipArchive.handle(TestFixtures.path("not_a_zip.zip"), :path) 23 | 24 | assert {:error, "invalid zip file"} = PackageLoader.open(zip_handle) 25 | end 26 | end 27 | 28 | describe "load_sheet_by_name/2" do 29 | setup do 30 | zip_handle = ZipArchive.handle(TestFixtures.path("test.xlsx"), :path) 31 | {:ok, package} = PackageLoader.open(zip_handle) 32 | 33 | {:ok, %{package: package}} 34 | end 35 | 36 | test "loads a sheet by name", %{package: package} do 37 | assert {:ok, 38 | [ 39 | ["A", "B", "C" | _], 40 | [1.0, 2.0, 3.0 | _], 41 | [2.0, 4.0, 6.0 | _] 42 | | _ 43 | ]} = PackageLoader.load_sheet_by_name(package, "Sheet 1") 44 | 45 | assert {:ok, 46 | [ 47 | ["", "" | _], 48 | ["some ", "test" | _] 49 | | _ 50 | ]} = PackageLoader.load_sheet_by_name(package, "Sheet 2") 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/relationships_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.RelationshipsParserTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.Parsers.RelationshipsParser 5 | 6 | test "parses workbook.xml.rels" do 7 | workbook_xml_rels = TestFixtures.read!("package/xl/_rels/workbook.xml.rels") 8 | 9 | expected = %{ 10 | shared_strings: %{ 11 | "rId1" => "sharedStrings.xml" 12 | }, 13 | styles: %{ 14 | "rId2" => "styles.xml" 15 | }, 16 | themes: %{ 17 | "rId3" => "theme/theme1.xml" 18 | }, 19 | sheets: %{ 20 | "rId4" => "worksheets/sheet1.xml", 21 | "rId5" => "worksheets/sheet2.xml", 22 | "rId6" => "worksheets/sheet3.xml" 23 | } 24 | } 25 | 26 | assert {:ok, expected} == RelationshipsParser.parse(workbook_xml_rels) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/shared_strings_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.SharedStringsParserTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.Array 5 | alias XlsxReader.Parsers.SharedStringsParser 6 | 7 | test "parses sharedStrings.xml" do 8 | shared_strings_xml = TestFixtures.read!("package/xl/sharedStrings.xml") 9 | 10 | expected = 11 | Array.from_list([ 12 | "A", 13 | "B", 14 | "C", 15 | "D", 16 | "E", 17 | "F", 18 | "G", 19 | "some ", 20 | "test", 21 | "date", 22 | "datetime", 23 | "time", 24 | "percentage", 25 | "money chf", 26 | "money usd", 27 | "9999,99 USD", 28 | "ticked", 29 | "not ticked", 30 | "hyperlink", 31 | "https://elixir-lang.org/", 32 | " ", 33 | "-" 34 | ]) 35 | 36 | assert {:ok, expected} == SharedStringsParser.parse(shared_strings_xml) 37 | end 38 | 39 | test "parses strings with rich text" do 40 | shared_strings_xml = TestFixtures.read!("xml/sharedStringsWithRichText.xml") 41 | 42 | expected = 43 | Array.from_list([ 44 | "Cell A1", 45 | "Cell B1", 46 | "My Cell", 47 | "Cell A2", 48 | "Cell B2" 49 | ]) 50 | 51 | assert {:ok, expected} == SharedStringsParser.parse(shared_strings_xml) 52 | end 53 | 54 | test "takes xml:space instruction into account" do 55 | shared_strings_xml = TestFixtures.read!("xml/sharedStringsWithXmlSpacePreserve.xml") 56 | 57 | expected = 58 | Array.from_list([ 59 | " with spaces ", 60 | "without spaces", 61 | "without spaces" 62 | ]) 63 | 64 | assert {:ok, expected} == SharedStringsParser.parse(shared_strings_xml) 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/styles_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.StylesParserTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.Array 5 | alias XlsxReader.Parsers.StylesParser 6 | 7 | setup do 8 | {:ok, %{styles_xml: TestFixtures.read!("package/xl/styles.xml")}} 9 | end 10 | 11 | test "parses styles.xml into an array of style types", %{styles_xml: styles_xml} do 12 | expected_style_types = 13 | Array.from_list([ 14 | :number, 15 | :number, 16 | :unsupported, 17 | :number, 18 | :number, 19 | :number, 20 | :number, 21 | :number, 22 | :number, 23 | :number, 24 | :number, 25 | :unsupported, 26 | :unsupported, 27 | :number, 28 | :number, 29 | :number, 30 | :number, 31 | :number, 32 | :date, 33 | :unsupported, 34 | :date_time, 35 | :time, 36 | :percentage, 37 | nil, 38 | :unsupported 39 | ]) 40 | 41 | assert {:ok, ^expected_style_types, _custom_formats} = StylesParser.parse(styles_xml) 42 | end 43 | 44 | test "supports user-provided custom formats", %{styles_xml: styles_xml} do 45 | supported_custom_formats = [ 46 | {"[$CHF]0.00", :number} 47 | ] 48 | 49 | expected_style_types = 50 | Array.from_list([ 51 | :number, 52 | :number, 53 | :unsupported, 54 | :number, 55 | :number, 56 | :number, 57 | :number, 58 | :number, 59 | :number, 60 | :number, 61 | :number, 62 | :unsupported, 63 | :unsupported, 64 | :number, 65 | :number, 66 | :number, 67 | :number, 68 | :number, 69 | :date, 70 | :unsupported, 71 | :date_time, 72 | :time, 73 | :percentage, 74 | :number, 75 | :unsupported 76 | ]) 77 | 78 | assert {:ok, ^expected_style_types, _custom_formats} = 79 | StylesParser.parse(styles_xml, supported_custom_formats) 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/utils_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.UtilsTest do 2 | use ExUnit.Case 3 | doctest XlsxReader.Parsers.Utils 4 | end 5 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/workbook_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.WorkbookParserTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.Parsers.WorkbookParser 5 | 6 | test "parses workbook.xml" do 7 | workbook_xml = TestFixtures.read!("package/xl/workbook.xml") 8 | 9 | expected = %XlsxReader.Workbook{ 10 | sheets: [ 11 | %XlsxReader.Sheet{name: "Sheet 1", rid: "rId4", sheet_id: "1"}, 12 | %XlsxReader.Sheet{name: "Sheet 2", rid: "rId5", sheet_id: "2"}, 13 | %XlsxReader.Sheet{name: "Sheet 3", rid: "rId6", sheet_id: "3"} 14 | ], 15 | rels: nil, 16 | shared_strings: nil, 17 | style_types: nil, 18 | base_date: ~D[1899-12-30] 19 | } 20 | 21 | assert {:ok, expected} == WorkbookParser.parse(workbook_xml) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /test/xlsx_reader/parsers/worksheet_parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.Parsers.WorksheetParserTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.{Cell, Conversion, Workbook} 5 | alias XlsxReader.Parsers.{SharedStringsParser, StylesParser, WorksheetParser} 6 | 7 | setup do 8 | {:ok, shared_strings} = 9 | "package/xl/sharedStrings.xml" 10 | |> TestFixtures.read!() 11 | |> SharedStringsParser.parse() 12 | 13 | {:ok, style_types, custom_formats} = 14 | "package/xl/styles.xml" 15 | |> TestFixtures.read!() 16 | |> StylesParser.parse() 17 | 18 | workbook = %Workbook{ 19 | shared_strings: shared_strings, 20 | style_types: style_types, 21 | custom_formats: custom_formats, 22 | base_date: Conversion.base_date(1900) 23 | } 24 | 25 | {:ok, workbook: workbook} 26 | end 27 | 28 | test "parses sheet.xml", %{workbook: workbook} do 29 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet1.xml") 30 | 31 | assert {:ok, _result} = WorksheetParser.parse(sheet_xml, workbook) 32 | end 33 | 34 | test "looks up shared strings", %{workbook: workbook} do 35 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet1.xml") 36 | 37 | assert {:ok, rows} = WorksheetParser.parse(sheet_xml, workbook) 38 | 39 | assert [["A", "B", "C", "D", "E", "F", "G"] | _] = rows 40 | end 41 | 42 | test "performs cell type conversions by default", %{workbook: workbook} do 43 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet3.xml") 44 | 45 | expected = [ 46 | ["", ""], 47 | ["date", ~D[2019-11-15]], 48 | ["datetime", ~N[2019-11-24 11:06:13]], 49 | ["time", ~N[1904-01-01 18:45:12]], 50 | ["percentage", 12.5], 51 | ["money chf", "100"], 52 | ["money usd", "9999,99 USD"], 53 | ["ticked", true], 54 | ["not ticked", false], 55 | ["hyperlink", "https://elixir-lang.org/"] 56 | ] 57 | 58 | assert {:ok, rows} = WorksheetParser.parse(sheet_xml, workbook) 59 | 60 | assert expected == rows 61 | end 62 | 63 | test "returns raw values (except shared strings) when type conversion is disabled", %{ 64 | workbook: workbook 65 | } do 66 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet3.xml") 67 | 68 | expected = [ 69 | ["", ""], 70 | ["date", "43784"], 71 | ["datetime", "43793.462650462963"], 72 | ["time", "1462.781388888889"], 73 | ["percentage", "0.125"], 74 | ["money chf", "100"], 75 | ["money usd", "9999,99 USD"], 76 | ["ticked", "1"], 77 | ["not ticked", "0"], 78 | ["hyperlink", "https://elixir-lang.org/"] 79 | ] 80 | 81 | assert {:ok, rows} = WorksheetParser.parse(sheet_xml, workbook, type_conversion: false) 82 | 83 | assert expected == rows 84 | end 85 | 86 | test "handles correctly empty values", %{workbook: workbook} do 87 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet5.xml") 88 | 89 | assert {:ok, [["", "", "Hello", "", "0.0"]]} = 90 | WorksheetParser.parse(sheet_xml, workbook, type_conversion: false) 91 | end 92 | 93 | test "handles inline strings", %{workbook: workbook} do 94 | sheet_xml = TestFixtures.read!("xml/worksheetWithInlineStr.xml") 95 | 96 | expected = [["inline string"]] 97 | 98 | assert {:ok, rows} = WorksheetParser.parse(sheet_xml, workbook) 99 | 100 | assert expected == rows 101 | end 102 | 103 | test "should ignore rows based on skip_row?", %{ 104 | workbook: workbook 105 | } do 106 | sheet_xml = TestFixtures.read!("package/xl/worksheets/sheet4.xml") 107 | 108 | ignore_trimmed = fn row -> Enum.all?(row, &(String.trim(&1) == "")) end 109 | 110 | assert {:ok, rows} = WorksheetParser.parse(sheet_xml, workbook, skip_row?: ignore_trimmed) 111 | assert [["-", "-", "-", "-"]] == rows 112 | 113 | ignore_trimmed_or_dashes = fn row -> ignore_trimmed.(row) or Enum.all?(row, &(&1 == "-")) end 114 | 115 | assert {:ok, rows} = 116 | WorksheetParser.parse(sheet_xml, workbook, skip_row?: ignore_trimmed_or_dashes) 117 | 118 | assert [] == rows 119 | end 120 | 121 | test "should return cell structs instead of values when cell_data_format is :cell" do 122 | {:ok, package} = XlsxReader.open(TestFixtures.path("has_formulas.xlsx")) 123 | {:ok, sheets} = XlsxReader.sheets(package, cell_data_format: :cell) 124 | 125 | expected = [ 126 | {"sheet_1", 127 | [ 128 | [ 129 | %Cell{value: "abc", formula: nil, ref: "A1"}, 130 | %Cell{value: 123.0, formula: nil, ref: "B1"} 131 | ] 132 | ]}, 133 | {"sheet_2", 134 | [ 135 | [ 136 | %Cell{value: "def", formula: nil, ref: "A1"}, 137 | %Cell{value: 456.0, formula: nil, ref: "B1"}, 138 | "", 139 | %Cell{value: 466.0, formula: "SUM(B1, 10)", ref: "D1"} 140 | ] 141 | ]} 142 | ] 143 | 144 | assert expected == sheets 145 | end 146 | 147 | test "should return shared formulas as part of Cell struct", %{workbook: workbook} do 148 | sheet_xml = 149 | TestFixtures.read!("xml/worksheetWithSharedFormulas.xml") 150 | |> String.replace("\n", "") 151 | |> String.replace("\t", "") 152 | 153 | expected = [ 154 | [ 155 | %XlsxReader.Cell{value: "1", formula: nil, ref: "A1"}, 156 | %XlsxReader.Cell{value: "6", formula: "SUM(A1:A3)", ref: "B1"} 157 | ], 158 | [ 159 | %XlsxReader.Cell{value: "2", formula: nil, ref: "A2"}, 160 | %XlsxReader.Cell{value: "6", formula: "SUM(A1:A3)", ref: "B2"} 161 | ], 162 | [ 163 | %XlsxReader.Cell{value: "3", formula: nil, ref: "A3"}, 164 | %XlsxReader.Cell{value: "6", formula: "SUM(A1:A3)", ref: "B3"} 165 | ] 166 | ] 167 | 168 | assert {:ok, expected} == WorksheetParser.parse(sheet_xml, workbook, cell_data_format: :cell) 169 | end 170 | 171 | test "should include or exclude hidden sheets based on an option" do 172 | filepath = TestFixtures.path("hidden_sheets.xlsx") 173 | 174 | {:ok, package} = XlsxReader.open(filepath, exclude_hidden_sheets?: false) 175 | all_sheet_names = package |> XlsxReader.sheet_names() 176 | assert all_sheet_names == ["Sheet 1", "Sheet 2", "Sheet 3"] 177 | 178 | {:ok, package} = XlsxReader.open(filepath, exclude_hidden_sheets?: true) 179 | visible_sheet_names = package |> XlsxReader.sheet_names() 180 | assert visible_sheet_names == ["Sheet 1"] 181 | end 182 | end 183 | -------------------------------------------------------------------------------- /test/xlsx_reader/styles_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.StylesTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.Styles 5 | 6 | describe "get_style_type/2" do 7 | test "some known styles" do 8 | assert :number == Styles.get_style_type("0", %{}) 9 | assert :number == Styles.get_style_type("1", %{}) 10 | assert :number == Styles.get_style_type("2", %{}) 11 | assert :percentage == Styles.get_style_type("9", %{}) 12 | assert :date == Styles.get_style_type("14", %{}) 13 | assert :time == Styles.get_style_type("18", %{}) 14 | assert :unsupported == Styles.get_style_type("49", %{}) 15 | end 16 | 17 | test "supported custom formats" do 18 | assert :percentage = Styles.get_style_type("123", %{"123" => "0.0%"}) 19 | 20 | # ISO8601 date/time 21 | assert :date = Styles.get_style_type("123", %{"123" => "yyyy-mm-dd"}) 22 | assert :date_time = Styles.get_style_type("123", %{"123" => "yyyy-mm-dd hh:mm:ss"}) 23 | assert :date_time = Styles.get_style_type("123", %{"123" => "yyyy-mm-ddThh:mm:ssZ"}) 24 | 25 | # US date/time 26 | assert :date = Styles.get_style_type("123", %{"123" => "m/d/yyyy"}) 27 | assert :date_time = Styles.get_style_type("123", %{"123" => "m/d/yyyy h:mm"}) 28 | 29 | # Plain time 30 | assert :time = Styles.get_style_type("123", %{"123" => "hh:mm"}) 31 | end 32 | 33 | test "user-provided supported custom formats" do 34 | assert :date == Styles.get_style_type("123", %{"123" => "mmm yy"}, [{"mmm yy", :date}]) 35 | assert :date == Styles.get_style_type("123", %{"123" => "mmm yy"}, [{~r/mmm? yy/, :date}]) 36 | end 37 | 38 | test "unknown format" do 39 | assert nil == Styles.get_style_type("123", %{"456" => "0.0%"}) 40 | assert nil == Styles.get_style_type("123", %{"123" => "bogus"}) 41 | assert nil == Styles.get_style_type("123", %{"123" => "bogus"}, [{"mmm yy", :date}]) 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /test/xlsx_reader/zip_archive_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReader.ZipArchiveTest do 2 | use ExUnit.Case 3 | 4 | alias XlsxReader.ZipArchive 5 | 6 | describe "list/1" do 7 | test "lists the contents of a zip file" do 8 | zip_handle = ZipArchive.handle(TestFixtures.path("test.zip"), :path) 9 | 10 | assert {:ok, ["dir/subdir/file3.bin", "file1.txt", "file2.dat"]} = 11 | ZipArchive.list(zip_handle) 12 | end 13 | 14 | test "lists the contents of a zip buffer" do 15 | zip_handle = ZipArchive.handle(TestFixtures.read!("test.zip"), :binary) 16 | 17 | assert {:ok, ["dir/subdir/file3.bin", "file1.txt", "file2.dat"]} = 18 | ZipArchive.list(zip_handle) 19 | end 20 | 21 | test "invalid zip file" do 22 | zip_handle = ZipArchive.handle(TestFixtures.path("not_a_zip.zip"), :path) 23 | 24 | assert {:error, "invalid zip file"} = ZipArchive.list(zip_handle) 25 | end 26 | 27 | test "zip file not found" do 28 | zip_handle = ZipArchive.handle("__does_not_exist__", :path) 29 | 30 | assert {:error, "file not found"} = ZipArchive.list(zip_handle) 31 | end 32 | end 33 | 34 | describe "extract/2" do 35 | test "extracts a file from a zip file" do 36 | zip_handle = ZipArchive.handle(TestFixtures.path("test.zip"), :path) 37 | 38 | assert {:ok, "Contents of file1\n"} = ZipArchive.extract(zip_handle, "file1.txt") 39 | assert {:ok, "Contents of file2\n"} = ZipArchive.extract(zip_handle, "file2.dat") 40 | assert {:ok, "Contents of file3\n"} = ZipArchive.extract(zip_handle, "dir/subdir/file3.bin") 41 | 42 | assert {:error, "file \"bogus.bin\" not found in archive"} = 43 | ZipArchive.extract(zip_handle, "bogus.bin") 44 | end 45 | 46 | test "extracts a file from zip buffer" do 47 | zip_handle = ZipArchive.handle(TestFixtures.path("test.zip"), :path) 48 | 49 | assert {:ok, "Contents of file1\n"} = ZipArchive.extract(zip_handle, "file1.txt") 50 | assert {:ok, "Contents of file2\n"} = ZipArchive.extract(zip_handle, "file2.dat") 51 | assert {:ok, "Contents of file3\n"} = ZipArchive.extract(zip_handle, "dir/subdir/file3.bin") 52 | 53 | assert {:error, "file \"bogus.bin\" not found in archive"} = 54 | ZipArchive.extract(zip_handle, "bogus.bin") 55 | end 56 | 57 | test "invalid zip file" do 58 | zip_handle = ZipArchive.handle(TestFixtures.path("not_a_zip.zip"), :path) 59 | 60 | assert {:error, "invalid zip file"} = ZipArchive.extract(zip_handle, "file1.txt") 61 | end 62 | 63 | test "zip file not found" do 64 | zip_handle = ZipArchive.handle("__does_not_exist__", :path) 65 | 66 | assert {:error, "file not found"} = ZipArchive.extract(zip_handle, "file1.txt") 67 | end 68 | end 69 | end 70 | -------------------------------------------------------------------------------- /test/xlsx_reader_test.exs: -------------------------------------------------------------------------------- 1 | defmodule XlsxReaderTest do 2 | use ExUnit.Case 3 | 4 | test "README install version check" do 5 | app = :xlsx_reader 6 | 7 | app_version = "#{Application.spec(app, :vsn)}" 8 | readme = File.read!("README.md") 9 | [_, readme_versions] = Regex.run(~r/{:#{app}, "(.+)"}/, readme) 10 | 11 | assert Version.match?( 12 | app_version, 13 | readme_versions 14 | ), 15 | """ 16 | Install version constraint in README.md does not match to current app version. 17 | Current App Version: #{app_version} 18 | Readme Install Versions: #{readme_versions} 19 | """ 20 | end 21 | 22 | describe "open/2" do 23 | test "opens a xlsx file from the file system" do 24 | xlsx = TestFixtures.path("test.xlsx") 25 | 26 | assert {:ok, %XlsxReader.Package{}} = XlsxReader.open(xlsx) 27 | assert {:ok, %XlsxReader.Package{}} = XlsxReader.open(xlsx, source: :path) 28 | end 29 | 30 | test "open a xlsx file from memory" do 31 | xlsx = TestFixtures.read!("test.xlsx") 32 | 33 | assert {:ok, %XlsxReader.Package{}} = XlsxReader.open(xlsx, source: :binary) 34 | end 35 | 36 | test "rejects non-xlsx file" do 37 | xlsx = TestFixtures.path("test.zip") 38 | 39 | assert {:error, "invalid xlsx file"} = XlsxReader.open(xlsx) 40 | end 41 | 42 | test "rejects non-zip file" do 43 | xlsx = TestFixtures.path("not_a_zip.zip") 44 | 45 | assert {:error, "invalid zip file"} = XlsxReader.open(xlsx) 46 | end 47 | 48 | test "rejects relative and absolute path to directory" do 49 | relative_path = "test" 50 | absolute_path = Path.absname(relative_path) 51 | 52 | assert {:error, "invalid zip file"} = XlsxReader.open(relative_path) 53 | assert {:error, "invalid zip file"} = XlsxReader.open(absolute_path) 54 | end 55 | 56 | test "supported custom formats" do 57 | xlsx = TestFixtures.path("test.xlsx") 58 | 59 | assert {:ok, package} = 60 | XlsxReader.open(xlsx, 61 | supported_custom_formats: [ 62 | {"[$CHF]0.00", :string} 63 | ] 64 | ) 65 | 66 | {:ok, sheet} = XlsxReader.sheet(package, "Sheet 3") 67 | 68 | assert [ 69 | ["", _], 70 | ["date", _], 71 | ["datetime", _], 72 | ["time", _], 73 | ["percentage", _], 74 | ["money chf", "100"], 75 | ["money usd", _], 76 | ["ticked", _], 77 | ["not ticked", _], 78 | ["hyperlink", _] 79 | ] = sheet 80 | end 81 | end 82 | 83 | describe "sheet_names/1" do 84 | setup do 85 | {:ok, package} = XlsxReader.open(TestFixtures.path("test.xlsx")) 86 | 87 | {:ok, %{package: package}} 88 | end 89 | 90 | test "lists the sheets in workbook", %{package: package} do 91 | assert ["Sheet 1", "Sheet 2", "Sheet 3"] == XlsxReader.sheet_names(package) 92 | end 93 | end 94 | 95 | describe "sheet/3" do 96 | setup do 97 | {:ok, package} = XlsxReader.open(TestFixtures.path("test.xlsx")) 98 | 99 | {:ok, %{package: package}} 100 | end 101 | 102 | test "returns the contents of the sheet by name", %{package: package} do 103 | assert {:ok, 104 | [ 105 | ["A", "B", "C" | _], 106 | [1.0, 2.0, 3.0 | _] 107 | | _ 108 | ]} = XlsxReader.sheet(package, "Sheet 1") 109 | end 110 | 111 | test "type conversion off", %{package: package} do 112 | assert {:ok, 113 | [ 114 | _, 115 | ["date", "43784"] 116 | | _ 117 | ]} = XlsxReader.sheet(package, "Sheet 3", type_conversion: false) 118 | end 119 | 120 | test "number type", %{package: package} do 121 | assert {:ok, 122 | [ 123 | ["A", "B", "C" | _], 124 | [ 125 | %Decimal{coef: 1, exp: 0, sign: 1}, 126 | %Decimal{coef: 2, exp: 0, sign: 1}, 127 | %Decimal{coef: 3, exp: 0, sign: 1} | _ 128 | ] 129 | | _ 130 | ]} = XlsxReader.sheet(package, "Sheet 1", number_type: Decimal) 131 | end 132 | 133 | test "custom blank value", %{package: package} do 134 | assert {:ok, 135 | [ 136 | ["n/a", "n/a"] 137 | | _ 138 | ]} = XlsxReader.sheet(package, "Sheet 3", blank_value: "n/a") 139 | end 140 | 141 | test "skip empty rows", %{package: package} do 142 | assert {:ok, 143 | [ 144 | ["date" | _] 145 | | _ 146 | ]} = XlsxReader.sheet(package, "Sheet 3", empty_rows: false) 147 | end 148 | end 149 | 150 | describe "sheets/2" do 151 | setup do 152 | {:ok, package} = XlsxReader.open(TestFixtures.path("test.xlsx")) 153 | 154 | {:ok, %{package: package}} 155 | end 156 | 157 | test "load all sheets", %{package: package} do 158 | assert {:ok, 159 | [ 160 | {"Sheet 1", [["A", "B", "C" | _] | _]}, 161 | {"Sheet 2", [["", "", "", "", ""] | _]}, 162 | {"Sheet 3", [["", ""] | _]} 163 | ]} = XlsxReader.sheets(package) 164 | end 165 | 166 | test "filters sheets", %{package: package} do 167 | assert {:ok, 168 | [ 169 | {"Sheet 1", _}, 170 | {"Sheet 3", _} 171 | ]} = XlsxReader.sheets(package, only: ["Sheet 1", "Sheet 3"]) 172 | 173 | assert {:ok, 174 | [ 175 | {"Sheet 1", _}, 176 | {"Sheet 3", _} 177 | ]} = XlsxReader.sheets(package, only: [~r/Sheet [13]/]) 178 | 179 | assert {:ok, 180 | [ 181 | {"Sheet 1", _}, 182 | {"Sheet 3", _} 183 | ]} = XlsxReader.sheets(package, except: "Sheet 2") 184 | 185 | assert {:ok, 186 | [ 187 | {"Sheet 1", _}, 188 | {"Sheet 3", _} 189 | ]} = XlsxReader.sheets(package, only: ~r/Sheet \d+/, except: ["Sheet 2"]) 190 | end 191 | end 192 | 193 | describe "async_sheets/3" do 194 | setup do 195 | {:ok, package} = XlsxReader.open(TestFixtures.path("test.xlsx")) 196 | 197 | {:ok, %{package: package}} 198 | end 199 | 200 | test "load all sheets", %{package: package} do 201 | assert {:ok, 202 | [ 203 | {"Sheet 1", [["A", "B", "C" | _] | _]}, 204 | {"Sheet 2", [["", "", "", "", ""] | _]}, 205 | {"Sheet 3", [["", ""] | _]} 206 | ]} = XlsxReader.async_sheets(package) 207 | end 208 | 209 | test "filters sheets", %{package: package} do 210 | assert {:ok, 211 | [ 212 | {"Sheet 1", _}, 213 | {"Sheet 3", _} 214 | ]} = XlsxReader.async_sheets(package, only: ["Sheet 1", "Sheet 3"]) 215 | 216 | assert {:ok, 217 | [ 218 | {"Sheet 1", _}, 219 | {"Sheet 3", _} 220 | ]} = XlsxReader.async_sheets(package, only: [~r/Sheet [13]/]) 221 | 222 | assert {:ok, 223 | [ 224 | {"Sheet 1", _}, 225 | {"Sheet 3", _} 226 | ]} = XlsxReader.async_sheets(package, except: "Sheet 2") 227 | 228 | assert {:ok, 229 | [ 230 | {"Sheet 1", _}, 231 | {"Sheet 3", _} 232 | ]} = XlsxReader.async_sheets(package, only: ~r/Sheet \d+/, except: ["Sheet 2"]) 233 | end 234 | end 235 | end 236 | --------------------------------------------------------------------------------