├── .coveragerc ├── .github ├── CONTRIBUTING.md ├── release.yml └── workflows │ ├── docs-lint.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── AUTHORS ├── CODE_OF_CONDUCT.md ├── HISTORY.md ├── LICENSE ├── README.md ├── RELEASING.md ├── docs ├── Makefile ├── __init__.py ├── _templates │ ├── sidebarintro.html │ └── sidebarlogo.html ├── api.rst ├── conf.py ├── development.rst ├── formats.rst ├── index.rst ├── install.rst ├── intro.rst ├── krstyle.sty ├── requirements.txt └── tutorial.rst ├── pyproject.toml ├── pytest.ini ├── src └── tablib │ ├── __init__.py │ ├── _vendor │ ├── __init__.py │ └── dbfpy │ │ ├── __init__.py │ │ ├── dbf.py │ │ ├── dbfnew.py │ │ ├── fields.py │ │ ├── header.py │ │ ├── record.py │ │ └── utils.py │ ├── core.py │ ├── exceptions.py │ ├── formats │ ├── __init__.py │ ├── _cli.py │ ├── _csv.py │ ├── _dbf.py │ ├── _df.py │ ├── _html.py │ ├── _jira.py │ ├── _json.py │ ├── _latex.py │ ├── _ods.py │ ├── _rst.py │ ├── _sql.py │ ├── _tsv.py │ ├── _xls.py │ ├── _xlsx.py │ └── _yaml.py │ └── utils.py ├── tests ├── files │ ├── bad_dimensions.xlsx │ ├── book.ods │ ├── dates.xls │ ├── errors.xls │ ├── founders.xlsx │ ├── issue_524.yaml │ ├── ragged.ods │ ├── ragged.xlsx │ ├── unknown_value_type.ods │ └── xlsx_cell_values.xlsx ├── requirements.txt ├── test_tablib.py ├── test_tablib_dbfpy_packages_fields.py └── test_tablib_dbfpy_packages_utils.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | # .coveragerc to control coverage.py 2 | 3 | [report] 4 | # Regexes for lines to exclude from consideration 5 | exclude_lines = 6 | # Have to re-enable the standard pragma: 7 | pragma: no cover 8 | 9 | # Don't complain if non-runnable code isn't run: 10 | if __name__ == .__main__.: 11 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | [![Jazzband](https://jazzband.co/static/img/jazzband.svg)](https://jazzband.co/) 2 | 3 | This is a [Jazzband](https://jazzband.co/) project. By contributing you agree to abide 4 | by the [Contributor Code of Conduct](https://jazzband.co/about/conduct) and follow the 5 | [guidelines](https://jazzband.co/about/guidelines). 6 | 7 | If you'd like to contribute, simply fork 8 | [the repository](https://github.com/jazzband/tablib), commit your changes to a feature 9 | branch, and send a pull request to `master`. Make sure you add yourself to 10 | [AUTHORS](https://github.com/jazzband/tablib/blob/master/AUTHORS). 11 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | exclude: 3 | authors: 4 | - dependabot 5 | - pre-commit-ci 6 | -------------------------------------------------------------------------------- /.github/workflows/docs-lint.yml: -------------------------------------------------------------------------------- 1 | name: Docs and lint 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | env: 6 | FORCE_COLOR: 1 7 | PIP_DISABLE_PIP_VERSION_CHECK: 1 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | env: 18 | - TOXENV: docs 19 | - TOXENV: lint 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | 24 | - name: Set up Python 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: "3.x" 28 | cache: pip 29 | cache-dependency-path: "pyproject.toml" 30 | 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | python -m pip install --upgrade tox 35 | 36 | - name: Tox 37 | run: tox 38 | env: ${{ matrix.env }} 39 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | release: 8 | types: 9 | - published 10 | workflow_dispatch: 11 | 12 | jobs: 13 | build: 14 | if: github.repository_owner == 'jazzband' 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: "3.x" 26 | cache: pip 27 | cache-dependency-path: "pyproject.toml" 28 | 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install -U pip 32 | python -m pip install build twine 33 | 34 | - name: Build package 35 | run: | 36 | python -m build 37 | twine check dist/* 38 | 39 | - name: Upload packages to Jazzband 40 | if: github.event.action == 'published' 41 | uses: pypa/gh-action-pypi-publish@release/v1 42 | with: 43 | user: jazzband 44 | password: ${{ secrets.JAZZBAND_RELEASE_KEY }} 45 | repository-url: https://jazzband.co/projects/tablib/upload 46 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | permissions: 6 | contents: read 7 | 8 | env: 9 | FORCE_COLOR: 1 10 | PIP_DISABLE_PIP_VERSION_CHECK: 1 11 | 12 | jobs: 13 | test: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 19 | os: [ubuntu-latest, macOS-latest, windows-latest] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | allow-prereleases: true 29 | cache: pip 30 | cache-dependency-path: "pyproject.toml" 31 | 32 | - name: Install dependencies 33 | run: | 34 | python -m pip install --upgrade pip 35 | python -m pip install --upgrade tox 36 | python -m pip install -e . 37 | 38 | - name: Tox tests 39 | shell: bash 40 | run: | 41 | tox -e py 42 | 43 | - name: Upload coverage 44 | uses: codecov/codecov-action@v3 45 | with: 46 | name: ${{ matrix.os }} Python ${{ matrix.python-version }} 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # application builds 2 | build/* 3 | dist/* 4 | MANIFEST 5 | 6 | # python skin 7 | *.pyc 8 | *.pyo 9 | 10 | # osx noise 11 | .DS_Store 12 | profile 13 | 14 | # pycharm noise 15 | .idea 16 | .idea/* 17 | 18 | # vi noise 19 | *.swp 20 | docs/_build/* 21 | coverage.xml 22 | nosetests.xml 23 | junit-py25.xml 24 | junit-py26.xml 25 | junit-py27.xml 26 | 27 | # tox noise 28 | .tox 29 | 30 | # pyenv noise 31 | .python-version 32 | tablib.egg-info/* 33 | 34 | # Coverage 35 | .coverage 36 | htmlcov 37 | 38 | # setuptools noise 39 | .eggs 40 | *.egg-info 41 | 42 | # generated by setuptools-scm 43 | /src/tablib/_version.py 44 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.11.6 4 | hooks: 5 | - id: ruff 6 | args: [--exit-non-zero-on-fix] 7 | 8 | - repo: https://github.com/isidentical/teyit 9 | rev: 0.4.3 10 | hooks: 11 | - id: teyit 12 | 13 | - repo: https://github.com/pre-commit/pygrep-hooks 14 | rev: v1.10.0 15 | hooks: 16 | - id: rst-backticks 17 | 18 | - repo: https://github.com/pre-commit/pre-commit-hooks 19 | rev: v5.0.0 20 | hooks: 21 | - id: check-added-large-files 22 | - id: check-case-conflict 23 | - id: check-merge-conflict 24 | - id: check-toml 25 | - id: check-yaml 26 | - id: debug-statements 27 | - id: end-of-file-fixer 28 | - id: forbid-submodules 29 | - id: requirements-txt-fixer 30 | - id: trailing-whitespace 31 | 32 | - repo: meta 33 | hooks: 34 | - id: check-hooks-apply 35 | - id: check-useless-excludes 36 | 37 | ci: 38 | autoupdate_schedule: quarterly 39 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.10" 7 | 8 | sphinx: 9 | configuration: docs/conf.py 10 | 11 | python: 12 | install: 13 | - requirements: docs/requirements.txt 14 | - method: pip 15 | path: . 16 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Tablib was originally written by Kenneth Reitz and is now maintained 2 | by the Jazzband GitHub team. 3 | 4 | Here is a list of past and present much-appreciated contributors: 5 | 6 | Alex Gaynor 7 | Andrew Graham-Yooll 8 | Andrii Soldatenko 9 | Benjamin Wohlwend 10 | Bruno Soares 11 | Claude Paroz 12 | Daniel Santos 13 | Egor Osokin 14 | Erik Youngren 15 | Hugo van Kemenade 16 | Ian Stride 17 | Iuri de Silvio 18 | Jakub Janoszek 19 | James Douglass 20 | Joel Friedly 21 | Josh Ourisman 22 | Kenneth Reitz 23 | Luca Beltrame 24 | Luke Lee 25 | Marc Abramowitz 26 | Marco Dallagiacoma 27 | Maris Nartiss 28 | Mark Rogers 29 | Mark Walling 30 | Mathias Loesch 31 | Matthew Hegarty 32 | Matthias Dellweg 33 | Mike Waldner 34 | Peyman Salehi 35 | Rabin Nankhwa 36 | Tak Hogan 37 | Tommy Anthony 38 | Tsuyoshi Hombashi 39 | Tushar Makkar 40 | Yunis Yilmaz 41 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | As contributors and maintainers of the Jazzband projects, and in the interest of 4 | fostering an open and welcoming community, we pledge to respect all people who 5 | contribute through reporting issues, posting feature requests, updating documentation, 6 | submitting pull requests or patches, and other activities. 7 | 8 | We are committed to making participation in the Jazzband a harassment-free experience 9 | for everyone, regardless of the level of experience, gender, gender identity and 10 | expression, sexual orientation, disability, personal appearance, body size, race, 11 | ethnicity, age, religion, or nationality. 12 | 13 | Examples of unacceptable behavior by participants include: 14 | 15 | - The use of sexualized language or imagery 16 | - Personal attacks 17 | - Trolling or insulting/derogatory comments 18 | - Public or private harassment 19 | - Publishing other's private information, such as physical or electronic addresses, 20 | without explicit permission 21 | - Other unethical or unprofessional conduct 22 | 23 | The Jazzband roadies have the right and responsibility to remove, edit, or reject 24 | comments, commits, code, wiki edits, issues, and other contributions that are not 25 | aligned to this Code of Conduct, or to ban temporarily or permanently any contributor 26 | for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 27 | 28 | By adopting this Code of Conduct, the roadies commit themselves to fairly and 29 | consistently applying these principles to every aspect of managing the jazzband 30 | projects. Roadies who do not follow or enforce the Code of Conduct may be permanently 31 | removed from the Jazzband roadies. 32 | 33 | This code of conduct applies both within project spaces and in public spaces when an 34 | individual is representing the project or its community. 35 | 36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 37 | contacting the roadies at `roadies@jazzband.co`. All complaints will be reviewed and 38 | investigated and will result in a response that is deemed necessary and appropriate to 39 | the circumstances. Roadies are obligated to maintain confidentiality with regard to the 40 | reporter of an incident. 41 | 42 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 43 | 1.3.0, available at [https://contributor-covenant.org/version/1/3/0/][version] 44 | 45 | [homepage]: https://contributor-covenant.org 46 | [version]: https://contributor-covenant.org/version/1/3/0/ 47 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | # History 2 | 3 | ## 3.8.0 (2025-01-22) 4 | 5 | ### Improvements 6 | 7 | - Add support for exporting XLSX with column width (#516) 8 | - Remove redundant check from `Dataset.load()` (#604) 9 | 10 | ## 3.7.0 (2024-10-08) 11 | 12 | ### Improvements 13 | 14 | - Add support for Python 3.13 (#592) 15 | - Drop support for EOL Python 3.8 (#598) 16 | - Add styling to datetime, date and time values for ODS (#594) 17 | - Add styling for date/time types for XLS (#596) 18 | 19 | ### Bugfixes 20 | 21 | - Fix time and datetime export in ODS format (#595) 22 | - Avoid normalizing input twice in `import_set`/`book` (#591) 23 | 24 | ## 3.6.1 (2024-04-04) 25 | 26 | ### Bugfixes 27 | 28 | - Fix broken installs with pip failing to resolve the request for `tablib[html]` in some cases (#588). 29 | 30 | ## 3.6.0 (2024-03-23) 31 | 32 | ### Improvements 33 | 34 | - It's now possible to access a dataset row using its index without slicing (#24). 35 | - The dataset `transpose()` method can be called on datasets without headers. 36 | - The html format now supports importing from HTML content (#243) 37 | - The ODS format now supports importing from .ods files (#567). The support is 38 | still a bit experimental. 39 | - When adding rows to a dataset with dynamic columns, it's now possible to 40 | provide only static values, and dynamic column values will be automatically 41 | calculated and added to the row (#572). 42 | 43 | ### Changes 44 | 45 | - The html export format does not depend on MarkupPy any longer, therefore the 46 | tablib[html] install target was removed also. 47 | 48 | ### Bugfixes 49 | 50 | - Fix crash when loading a databook from an XLS file (#522). 51 | - `None` Python values are now converted to the empty string by the ODS formatter. 52 | - When applying formatters, the internal data is no longer mutated (#578). 53 | - Columns can be inserted even when a dataset has headers but no values (#583). 54 | 55 | ## 3.5.0 (2023-06-11) 56 | 57 | ### Improvements 58 | 59 | - Add support for Python 3.12 (#550) 60 | - Drop support for EOL Python 3.7 (#551) 61 | - Allow importing 'ragged' .xlsx files through dataset (#547) 62 | - Release: replace deprecated `repository_url` with `repository-url` (#545) 63 | 64 | ## 3.4.0 (2023-03-24) 65 | 66 | ### Improvements 67 | 68 | - Move setup to `pyproject.toml` (#542) 69 | - xlsx export: remove redundant code (#541) 70 | - xlsx export: support escape of formulae (#540) 71 | - Add <tbody> tags to HTML output (#539) 72 | - Check for type list and improve error msg (#524) 73 | 74 | ### Bugfixes 75 | 76 | - Fix bug when yaml file is empty (#535) 77 | - Fix linting issues raised by Flake8 (#536) 78 | 79 | ## 3.3.0 (2022-12-10) 80 | 81 | ### Improvements 82 | 83 | - Add support for Python 3.11 (#525). 84 | - ODS export: integers/floats/decimals are exported as numbers (#527). 85 | 86 | ## 3.2.1 (2022-04-09) 87 | 88 | ### Bugfixes 89 | 90 | - Support solo CR in text input imports (#518). 91 | 92 | ## 3.2.0 (2022-01-27) 93 | 94 | ### Changes 95 | 96 | - Dropped Python 3.6 support (#513). 97 | 98 | ### Bugfixes 99 | 100 | - Corrected order of arguments to a regex call in `safe_xlsx_sheet_title` (#510). 101 | 102 | ## 3.1.0 (2021-10-26) 103 | 104 | ### Improvements 105 | 106 | - Add support for Python 3.10 (#504). 107 | - The csv, xls, and xlsx formats gained support for the `skip_lines` keyword 108 | argument for their `import_set()` method to be able to skip the nth first 109 | lines of a read file (#497). 110 | 111 | ### Bugfixes 112 | 113 | - Avoided mutable parameter defaults (#494). 114 | - Specify build backend for editable installs (#501). 115 | - Doubled sample size passed to `csv.Sniffer()` in `_csv.detect()` (#503). 116 | 117 | ## 3.0.0 (2020-12-05) 118 | 119 | ### Breaking changes 120 | 121 | - Dropped Python 3.5 support. 122 | - JSON-exported data is no longer forced to ASCII characters. 123 | - YAML-exported data is no longer forced to ASCII characters. 124 | 125 | ### Improvements 126 | 127 | - Added Python 3.9 support. 128 | - Added read_only option to xlsx file reader (#482). 129 | 130 | ### Bugfixes 131 | 132 | - Prevented crash in rst export with only-space strings (#469). 133 | 134 | ## 2.0.0 (2020-05-16) 135 | 136 | ### Breaking changes 137 | 138 | - The `Row.lpush/rpush` logic was reversed. `lpush` was appending while `rpush` 139 | and `append` were prepending. This was fixed (reversed behavior). If you 140 | counted on the broken behavior, please update your code (#453). 141 | 142 | ### Bugfixes 143 | 144 | - Fixed minimal openpyxl dependency version to 2.6.0 (#457). 145 | - Dates from xls files are now read as Python datetime objects (#373). 146 | - Allow import of "ragged" xlsx files (#465). 147 | 148 | ### Improvements 149 | 150 | - When importing an xlsx file, Tablib will now read cell values instead of formulas (#462). 151 | 152 | ## 1.1.0 (2020-02-13) 153 | 154 | ### Deprecations 155 | 156 | - Upcoming breaking change in Tablib 2.0.0: the `Row.lpush/rpush` logic is reversed. 157 | `lpush` is appending while `rpush` and `append` are prepending. The broken behavior 158 | will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you 159 | count on the broken behavior, please update your code when you upgrade to Tablib 2.x. 160 | 161 | ### Improvements 162 | 163 | - Tablib is now able to import CSV content where not all rows have the same 164 | length. Missing columns on any line receive the empty string (#226). 165 | 166 | ## 1.0.0 (2020-01-13) 167 | 168 | ### Breaking changes 169 | 170 | - Dropped Python 2 support 171 | - Dependencies are now all optional. To install `tablib` as before with all 172 | possible supported formats, run `pip install tablib[all]` 173 | 174 | ### Improvements 175 | 176 | - Formats can now be dynamically registered through the 177 | `tablib.formats.registry.refister` API (#256). 178 | - Tablib methods expecting data input (`detect_format`, `import_set`, 179 | `Dataset.load`, `Databook.load`) now accepts file-like objects in addition 180 | to raw strings and bytestrings (#440). 181 | 182 | ### Bugfixes 183 | 184 | - Fixed a crash when exporting an empty string with the ReST format (#368) 185 | - Error cells from imported .xls files contain now the error string (#202) 186 | 187 | ## 0.14.0 (2019-10-19) 188 | 189 | ### Deprecations 190 | 191 | - The 0.14.x series will be the last to support Python 2 192 | 193 | ### Breaking changes 194 | 195 | - Dropped Python 3.4 support 196 | 197 | ### Improvements 198 | 199 | - Added Python 3.7 and 3.8 support 200 | - The project is now maintained by the Jazzband team, https://jazzband.co 201 | - Improved format autodetection and added autodetection for the odf format. 202 | - Added search to all documentation pages 203 | - Open xlsx workbooks in read-only mode (#316) 204 | - Unpin requirements 205 | - Only install backports.csv on Python 2 206 | 207 | ### Bugfixes 208 | 209 | - Fixed `DataBook().load` parameter ordering (first stream, then format). 210 | - Fixed a regression for xlsx exports where non-string values were forced to 211 | strings (#314) 212 | - Fixed xlsx format detection (which was often detected as `xls` format) 213 | 214 | ## 0.13.0 (2019-03-08) 215 | 216 | - Added reStructuredText output capability (#336) 217 | - Added Jira output capability 218 | - Stopped calling openpyxl deprecated methods (accessing cells, removing sheets) 219 | (openpyxl minimal version is now 2.4.0) 220 | - Fixed a circular dependency issue in JSON output (#332) 221 | - Fixed Unicode error for the CSV export on Python 2 (#215) 222 | - Removed usage of optional `ujson` (#311) 223 | - Dropped Python 3.3 support 224 | 225 | ## 0.12.1 (2017-09-01) 226 | 227 | - Favor `Dataset.export()` over `Dataset.` syntax in docs 228 | - Make Panda dependency optional 229 | 230 | ## 0.12.0 (2017-08-27) 231 | 232 | - Add initial Panda DataFrame support 233 | - Dropped Python 2.6 support 234 | 235 | ## 0.11.5 (2017-06-13) 236 | 237 | - Use `yaml.safe_load` for importing yaml. 238 | 239 | ## 0.11.4 (2017-01-23) 240 | 241 | - Use built-in `json` package if available 242 | - Support Python 3.5+ in classifiers 243 | 244 | ### Bugfixes 245 | 246 | - Fixed textual representation for Dataset with no headers 247 | - Handle decimal types 248 | 249 | ## 0.11.3 (2016-02-16) 250 | 251 | - Release fix. 252 | 253 | ## 0.11.2 (2016-02-16) 254 | 255 | ### Bugfixes 256 | 257 | - Fix export only formats. 258 | - Fix for xlsx output. 259 | 260 | ## 0.11.1 (2016-02-07) 261 | 262 | ### Bugfixes 263 | 264 | - Fixed packaging error on Python 3. 265 | 266 | 267 | ## 0.11.0 (2016-02-07) 268 | 269 | ### New Formats! 270 | 271 | - Added LaTeX table export format (`Dataset.latex`). 272 | - Support for dBase (DBF) files (`Dataset.dbf`). 273 | 274 | ### Improvements 275 | 276 | - New import/export interface (`Dataset.export()`, `Dataset.load()`). 277 | - CSV custom delimiter support (`Dataset.export('csv', delimiter='$')`). 278 | - Adding ability to remove duplicates to all rows in a dataset (`Dataset.remove_duplicates()`). 279 | - Added a mechanism to avoid `datetime.datetime` issues when serializing data. 280 | - New `detect_format()` function (mostly for internal use). 281 | - Update the vendored unicodecsv to fix `None` handling. 282 | - Only freeze the headers row, not the headers columns (xls). 283 | 284 | ### Breaking Changes 285 | 286 | - `detect()` function removed. 287 | 288 | ### Bugfixes 289 | 290 | - Fix XLSX import. 291 | - Bugfix for `Dataset.transpose().transpose()`. 292 | 293 | 294 | ## 0.10.0 (2014-05-27) 295 | 296 | * Unicode Column Headers 297 | * ALL the bugfixes! 298 | 299 | ## 0.9.11 (2011-06-30) 300 | 301 | * Bugfixes 302 | 303 | ## 0.9.10 (2011-06-22) 304 | 305 | * Bugfixes 306 | 307 | ## 0.9.9 (2011-06-21) 308 | 309 | * Dataset API Changes 310 | * `stack_rows` => `stack`, `stack_columns` => `stack_cols` 311 | * column operations have their own methods now (`append_col`, `insert_col`) 312 | * List-style `pop()` 313 | * Redis-style `rpush`, `lpush`, `rpop`, `lpop`, `rpush_col`, and `lpush_col` 314 | 315 | ## 0.9.8 (2011-05-22) 316 | 317 | * OpenDocument Spreadsheet support (.ods) 318 | * Full Unicode TSV support 319 | 320 | 321 | ## 0.9.7 (2011-05-12) 322 | 323 | * Full XLSX Support! 324 | * Pickling Bugfix 325 | * Compat Module 326 | 327 | 328 | ## 0.9.6 (2011-05-12) 329 | 330 | * `seperators` renamed to `separators` 331 | * Full unicode CSV support 332 | 333 | 334 | ## 0.9.5 (2011-03-24) 335 | 336 | * Python 3.1, Python 3.2 Support (same code base!) 337 | * Formatter callback support 338 | * Various bug fixes 339 | 340 | 341 | 342 | ## 0.9.4 (2011-02-18) 343 | 344 | * Python 2.5 Support! 345 | * Tox Testing for 2.5, 2.6, 2.7 346 | * AnyJSON Integrated 347 | * OrderedDict support 348 | * Caved to community pressure (spaces) 349 | 350 | 351 | ## 0.9.3 (2011-01-31) 352 | 353 | * Databook duplication leak fix. 354 | * HTML Table output. 355 | * Added column sorting. 356 | 357 | 358 | ## 0.9.2 (2010-11-17) 359 | 360 | * Transpose method added to Datasets. 361 | * New frozen top row in Excel output. 362 | * Pickling support for Datasets and Rows. 363 | * Support for row/column stacking. 364 | 365 | 366 | ## 0.9.1 (2010-11-04) 367 | 368 | * Minor reference shadowing bugfix. 369 | 370 | 371 | ## 0.9.0 (2010-11-04) 372 | 373 | * Massive documentation update! 374 | * Tablib.org! 375 | * Row tagging and Dataset filtering! 376 | * Column insert/delete support 377 | * Column append API change (header required) 378 | * Internal Changes (Row object and use thereof) 379 | 380 | 381 | ## 0.8.5 (2010-10-06) 382 | 383 | * New import system. All dependencies attempt to load from site-packages, 384 | then fallback on tenderized modules. 385 | 386 | 387 | ## 0.8.4 (2010-10-04) 388 | 389 | * Updated XLS output: Only wrap if '\\n' in cell. 390 | 391 | 392 | ## 0.8.3 (2010-10-04) 393 | 394 | * Ability to append new column passing a callable 395 | as the value that will be applied to every row. 396 | 397 | 398 | ## 0.8.2 (2010-10-04) 399 | 400 | * Added alignment wrapping to written cells. 401 | * Added separator support to XLS. 402 | 403 | 404 | ## 0.8.1 (2010-09-28) 405 | 406 | * Packaging Fix 407 | 408 | 409 | ## 0.8.0 (2010-09-25) 410 | 411 | * New format plugin system! 412 | * Imports! ELEGANT Imports! 413 | * Tests. Lots of tests. 414 | 415 | 416 | ## 0.7.1 (2010-09-20) 417 | 418 | * Reverting methods back to properties. 419 | * Windows bug compensated in documentation. 420 | 421 | 422 | ## 0.7.0 (2010-09-20) 423 | 424 | * Renamed DataBook Databook for consistency. 425 | * Export properties changed to methods (XLS filename / StringIO bug). 426 | * Optional Dataset.xls(path='filename') support (for writing on windows). 427 | * Added utf-8 on the worksheet level. 428 | 429 | 430 | ## 0.6.4 (2010-09-19) 431 | 432 | * Updated unicode export for XLS. 433 | * More exhaustive unit tests. 434 | 435 | 436 | ## 0.6.3 (2010-09-14) 437 | 438 | * Added Dataset.append() support for columns. 439 | 440 | 441 | ## 0.6.2 (2010-09-13) 442 | 443 | * Fixed Dataset.append() error on empty dataset. 444 | * Updated Dataset.headers property w/ validation. 445 | * Added Testing Fixtures. 446 | 447 | ## 0.6.1 (2010-09-12) 448 | 449 | * Packaging hotfixes. 450 | 451 | 452 | ## 0.6.0 (2010-09-11) 453 | 454 | * Public Release. 455 | * Export Support for XLS, JSON, YAML, and CSV. 456 | * DataBook Export for XLS, JSON, and YAML. 457 | * Python Dict Property Support. 458 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2016 Kenneth Reitz 2 | Copyright 2019 Jazzband 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tablib: format-agnostic tabular dataset library 2 | 3 | [![Jazzband](https://jazzband.co/static/img/badge.svg)](https://jazzband.co/) 4 | [![PyPI version](https://img.shields.io/pypi/v/tablib.svg)](https://pypi.org/project/tablib/) 5 | [![Supported Python versions](https://img.shields.io/pypi/pyversions/tablib.svg)](https://pypi.org/project/tablib/) 6 | [![PyPI downloads](https://img.shields.io/pypi/dm/tablib.svg)](https://pypistats.org/packages/tablib) 7 | [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions) 8 | [![codecov](https://codecov.io/gh/jazzband/tablib/branch/master/graph/badge.svg)](https://codecov.io/gh/jazzband/tablib) 9 | [![GitHub](https://img.shields.io/github/license/jazzband/tablib.svg)](LICENSE) 10 | 11 | _____ ______ ___________ ______ 12 | __ /_______ ____ /_ ___ /___(_)___ /_ 13 | _ __/_ __ `/__ __ \__ / __ / __ __ \ 14 | / /_ / /_/ / _ /_/ /_ / _ / _ /_/ / 15 | \__/ \__,_/ /_.___/ /_/ /_/ /_.___/ 16 | 17 | 18 | Tablib is a format-agnostic tabular dataset library, written in Python. 19 | 20 | Output formats supported: 21 | 22 | - Excel (Sets + Books) 23 | - JSON (Sets + Books) 24 | - YAML (Sets + Books) 25 | - Pandas DataFrames (Sets) 26 | - HTML (Sets) 27 | - Jira (Sets) 28 | - LaTeX (Sets) 29 | - TSV (Sets) 30 | - ODS (Sets) 31 | - CSV (Sets) 32 | - DBF (Sets) 33 | - SQL (Sets) 34 | 35 | Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a 36 | joke. Pull requests are welcome.) 37 | 38 | Tablib documentation is graciously hosted on https://tablib.readthedocs.io 39 | 40 | It is also available in the ``docs`` directory of the source distribution. 41 | 42 | Make sure to check out [Tablib on PyPI](https://pypi.org/project/tablib/)! 43 | 44 | ## Contribute 45 | 46 | Please see the [contributing guide](https://github.com/jazzband/tablib/blob/master/.github/CONTRIBUTING.md). 47 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | # Release checklist 2 | 3 | Jazzband guidelines: https://jazzband.co/about/releases 4 | 5 | * [ ] Get master to the appropriate code release state. 6 | [GitHub Actions](https://github.com/jazzband/tablib/actions) 7 | should pass on master. 8 | [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions) 9 | 10 | * [ ] Check [HISTORY.md](https://github.com/jazzband/tablib/blob/master/HISTORY.md), 11 | update version number and release date 12 | 13 | * [ ] Create new GitHub release: https://github.com/jazzband/tablib/releases/new 14 | * Tag: 15 | * Click "Choose a tag" 16 | * Enter new tag: "v3.4.0" 17 | * Click "**Create new tag: v3.4.0** on publish" 18 | * Title: Leave blank, will be same as tag 19 | * Click "Generate release notes" and edit as required 20 | * Click "Publish release" 21 | 22 | * [ ] Once GitHub Actions has built and uploaded distributions, check files at 23 | [Jazzband](https://jazzband.co/projects/tablib) and release to 24 | [PyPI](https://pypi.org/pypi/tablib) 25 | 26 | * [ ] Check installation: 27 | ```bash 28 | pip uninstall -y tablib && pip install -U tablib 29 | ``` 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | 15 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " singlehtml to make a single large HTML file" 22 | @echo " pickle to make pickle files" 23 | @echo " json to make JSON files" 24 | @echo " htmlhelp to make HTML files and a HTML help project" 25 | @echo " qthelp to make HTML files and a qthelp project" 26 | @echo " devhelp to make HTML files and a Devhelp project" 27 | @echo " epub to make an epub" 28 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 29 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 30 | @echo " text to make text files" 31 | @echo " man to make manual pages" 32 | @echo " changes to make an overview of all changed/added/deprecated items" 33 | @echo " linkcheck to check all external links for integrity" 34 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 35 | 36 | clean: 37 | -rm -rf $(BUILDDIR)/* 38 | 39 | html: 40 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 41 | @echo 42 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 43 | 44 | dirhtml: 45 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 48 | 49 | singlehtml: 50 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 51 | @echo 52 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 53 | 54 | pickle: 55 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 56 | @echo 57 | @echo "Build finished; now you can process the pickle files." 58 | 59 | json: 60 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 61 | @echo 62 | @echo "Build finished; now you can process the JSON files." 63 | 64 | htmlhelp: 65 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 66 | @echo 67 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 68 | ".hhp project file in $(BUILDDIR)/htmlhelp." 69 | 70 | qthelp: 71 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 72 | @echo 73 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 74 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 75 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Tablib.qhcp" 76 | @echo "To view the help file:" 77 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Tablib.qhc" 78 | 79 | devhelp: 80 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 81 | @echo 82 | @echo "Build finished." 83 | @echo "To view the help file:" 84 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Tablib" 85 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Tablib" 86 | @echo "# devhelp" 87 | 88 | epub: 89 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 90 | @echo 91 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 92 | 93 | latex: 94 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 95 | @echo 96 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 97 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 98 | "(use \`make latexpdf' here to do that automatically)." 99 | 100 | latexpdf: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo "Running LaTeX files through pdflatex..." 103 | make -C $(BUILDDIR)/latex all-pdf 104 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 105 | 106 | text: 107 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 108 | @echo 109 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 110 | 111 | man: 112 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 113 | @echo 114 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 115 | 116 | changes: 117 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 118 | @echo 119 | @echo "The overview file is in $(BUILDDIR)/changes." 120 | 121 | linkcheck: 122 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 123 | @echo 124 | @echo "Link check complete; look for any errors in the above output " \ 125 | "or in $(BUILDDIR)/linkcheck/output.txt." 126 | 127 | doctest: 128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 129 | @echo "Testing of doctests in the sources finished, look at the " \ 130 | "results in $(BUILDDIR)/doctest/output.txt." 131 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/docs/__init__.py -------------------------------------------------------------------------------- /docs/_templates/sidebarintro.html: -------------------------------------------------------------------------------- 1 |

About Tablib

2 |

3 | Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export. 4 |

5 |

Useful Links

6 | 13 | -------------------------------------------------------------------------------- /docs/_templates/sidebarlogo.html: -------------------------------------------------------------------------------- 1 |

About Tablib

2 |

3 | Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export. 4 |

5 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | === 4 | API 5 | === 6 | 7 | 8 | .. module:: tablib 9 | 10 | This part of the documentation covers all the interfaces of Tablib. For 11 | parts where Tablib depends on external libraries, we document the most 12 | important right here and provide links to the canonical documentation. 13 | 14 | 15 | -------------- 16 | Dataset Object 17 | -------------- 18 | 19 | 20 | .. autoclass:: Dataset 21 | :inherited-members: 22 | 23 | 24 | --------------- 25 | Databook Object 26 | --------------- 27 | 28 | 29 | .. autoclass:: Databook 30 | :inherited-members: 31 | 32 | 33 | --------- 34 | Functions 35 | --------- 36 | 37 | 38 | .. autofunction:: detect_format 39 | 40 | .. autofunction:: import_set 41 | 42 | 43 | ---------- 44 | Exceptions 45 | ---------- 46 | 47 | 48 | .. automodule:: tablib.exceptions 49 | :members: 50 | 51 | 52 | Now, go start some :ref:`Tablib Development `. 53 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # 2 | # Tablib documentation build configuration file, created by 3 | # sphinx-quickstart on Tue Oct 5 15:25:21 2010. 4 | # 5 | # This file is execfile()d with the current directory set to its containing dir. 6 | # 7 | # Note that not all possible configuration values are present in this 8 | # autogenerated file. 9 | # 10 | # All configuration values have a default; values that are commented out 11 | # serve to show the default. 12 | import tablib 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | # sys.path.insert(0, os.path.abspath('..')) 18 | # -- General configuration ----------------------------------------------------- 19 | 20 | # If your documentation needs a minimal Sphinx version, state it here. 21 | # needs_sphinx = '1.0' 22 | 23 | # Add any Sphinx extension module names here, as strings. They can be extensions 24 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 25 | extensions = [ 26 | 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 27 | 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx' 28 | ] 29 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | # source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = 'Tablib' 45 | copyright = '2019 Jazzband' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The full version, including alpha/beta/rc tags. 52 | release = tablib.__version__ 53 | # The short X.Y version. 54 | version = '.'.join(tablib.__version__.split('.')[:2]) 55 | # for example take major/minor 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # language = None 60 | 61 | # There are two options for replacing |today|: either, you set today to some 62 | # non-false value, then it is used: 63 | # today = '' 64 | # Else, today_fmt is used as the format for a strftime call. 65 | # today_fmt = '%B %d, %Y' 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | exclude_patterns = ['_build'] 70 | 71 | # The reST default role (used for this markup: `text`) to use for all documents. 72 | # default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | add_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | # add_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | # show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | # pygments_style = '' 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | # modindex_common_prefix = [] 90 | 91 | 92 | # -- Options for HTML output --------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | html_theme = 'alabaster' 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # html_theme_options = {} 102 | 103 | # Add any paths that contain custom themes here, relative to this directory. 104 | # html_theme_path = [] 105 | 106 | # The name for this set of Sphinx documents. If None, it defaults to 107 | # " v documentation". 108 | # html_title = None 109 | 110 | # A shorter title for the navigation bar. Default is the same as html_title. 111 | # html_short_title = None 112 | 113 | # The name of an image file (relative to this directory) to place at the top 114 | # of the sidebar. 115 | # html_logo = None 116 | 117 | # The name of an image file (within the static path) to use as favicon of the 118 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 119 | # pixels large. 120 | # html_favicon = None 121 | 122 | # Add any paths that contain custom static files (such as style sheets) here, 123 | # relative to this directory. They are copied after the builtin static files, 124 | # so a file named "default.css" will overwrite the builtin "default.css". 125 | # html_static_path = ['static'] 126 | 127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 128 | # using the given strftime format. 129 | # html_last_updated_fmt = '%b %d, %Y' 130 | 131 | # If true, SmartyPants will be used to convert quotes and dashes to 132 | # typographically correct entities. 133 | html_use_smartypants = True 134 | 135 | # Custom sidebar templates, maps document names to template names. 136 | html_sidebars = { 137 | 'index': ['sidebarintro.html', 'sourcelink.html', 'searchbox.html'], 138 | '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 139 | 'sourcelink.html', 'searchbox.html'] 140 | } 141 | 142 | # Additional templates that should be rendered to pages, maps page names to 143 | # template names. 144 | # html_additional_pages = {} 145 | 146 | # If false, no module index is generated. 147 | # html_domain_indices = True 148 | 149 | # If false, no index is generated. 150 | # html_use_index = True 151 | 152 | # If true, the index is split into individual pages for each letter. 153 | # html_split_index = False 154 | 155 | # If true, links to the reST sources are added to the pages. 156 | html_show_sourcelink = True 157 | 158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 159 | html_show_sphinx = False 160 | 161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 162 | # html_show_copyright = True 163 | 164 | # If true, an OpenSearch description file will be output, and all pages will 165 | # contain a tag referring to it. The value of this option must be the 166 | # base URL from which the finished HTML is served. 167 | # html_use_opensearch = '' 168 | 169 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 170 | # html_file_suffix = None 171 | 172 | # Output file base name for HTML help builder. 173 | htmlhelp_basename = 'Tablibdoc' 174 | 175 | 176 | # -- Options for LaTeX output -------------------------------------------------- 177 | 178 | # The paper size ('letter' or 'a4'). 179 | # latex_paper_size = 'letter' 180 | 181 | # The font size ('10pt', '11pt' or '12pt'). 182 | # latex_font_size = '10pt' 183 | 184 | # Grouping the document tree into LaTeX files. List of tuples 185 | # (source start file, target name, title, author, documentclass [howto/manual]). 186 | latex_documents = [ 187 | ('index', 'Tablib.tex', 'Tablib Documentation', 188 | 'Jazzband', 'manual'), 189 | ] 190 | 191 | latex_use_modindex = False 192 | 193 | latex_elements = { 194 | 'papersize': 'a4paper', 195 | 'pointsize': '12pt', 196 | } 197 | latex_use_parts = True 198 | 199 | # The name of an image file (relative to this directory) to place at the top of 200 | # the title page. 201 | # latex_logo = None 202 | 203 | # For "manual" documents, if this is true, then toplevel headings are parts, 204 | # not chapters. 205 | # latex_use_parts = False 206 | 207 | # If true, show page references after internal links. 208 | # latex_show_pagerefs = False 209 | 210 | # If true, show URL addresses after external links. 211 | # latex_show_urls = False 212 | 213 | # Additional stuff for the LaTeX preamble. 214 | # latex_preamble = '' 215 | 216 | # Documents to append as an appendix to all manuals. 217 | # latex_appendices = [] 218 | 219 | # If false, no module index is generated. 220 | # latex_domain_indices = True 221 | 222 | 223 | # -- Options for manual page output -------------------------------------------- 224 | 225 | # One entry per manual page. List of tuples 226 | # (source start file, name, description, authors, manual section). 227 | man_pages = [ 228 | ('index', 'tablib', 'Tablib Documentation', 229 | ['Jazzband'], 1) 230 | ] 231 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | .. _development: 2 | 3 | Development 4 | =========== 5 | 6 | Tablib is under active development, and contributors are welcome. 7 | 8 | If you have a feature request, suggestion, or bug report, please open a new 9 | issue on GitHub_. To submit patches, please send a pull request on GitHub_. 10 | 11 | .. _GitHub: https://github.com/jazzband/tablib/ 12 | 13 | 14 | 15 | .. _design: 16 | 17 | --------------------- 18 | Design Considerations 19 | --------------------- 20 | 21 | Tablib was developed with a few :pep:`20` idioms in mind. 22 | 23 | #. Beautiful is better than ugly. 24 | #. Explicit is better than implicit. 25 | #. Simple is better than complex. 26 | #. Complex is better than complicated. 27 | #. Readability counts. 28 | 29 | A few other things to keep in mind: 30 | 31 | #. Keep your code DRY. 32 | #. Strive to be as simple (to use) as possible. 33 | 34 | .. _scm: 35 | 36 | -------------- 37 | Source Control 38 | -------------- 39 | 40 | 41 | Tablib source is controlled with Git_, the lean, mean, distributed source 42 | control machine. 43 | 44 | The repository is publicly accessible. 45 | 46 | .. code-block:: console 47 | 48 | git clone git://github.com/jazzband/tablib.git 49 | 50 | The project is hosted on **GitHub**. 51 | 52 | GitHub: 53 | https://github.com/jazzband/tablib 54 | 55 | 56 | Git Branch Structure 57 | ++++++++++++++++++++ 58 | 59 | Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . 60 | Git-flow_ is a great tool for managing the repository. I highly recommend it. 61 | 62 | ``master`` 63 | Current production release (|version|) on PyPi. 64 | 65 | Each release is tagged. 66 | 67 | When submitting patches, please place your feature/change in its own branch prior to opening a pull request on GitHub_. 68 | 69 | 70 | .. _Git: https://git-scm.org 71 | .. _`Successful Git Branching Model`: https://nvie.com/posts/a-successful-git-branching-model/ 72 | .. _git-flow: https://github.com/nvie/gitflow 73 | 74 | 75 | .. _newformats: 76 | 77 | ------------------ 78 | Adding New Formats 79 | ------------------ 80 | 81 | Tablib welcomes new format additions! Format suggestions include: 82 | 83 | * MySQL Dump 84 | 85 | 86 | Coding by Convention 87 | ++++++++++++++++++++ 88 | 89 | Tablib features a micro-framework for adding format support. 90 | The easiest way to understand it is to use it. 91 | So, let's define our own format, named *xxx*. 92 | 93 | From version 1.0, Tablib formats are class-based and can be dynamically 94 | registered. 95 | 96 | 1. Write your custom format class:: 97 | 98 | class MyXXXFormatClass: 99 | title = 'xxx' 100 | 101 | @classmethod 102 | def export_set(cls, dset): 103 | .... 104 | # returns string representation of given dataset 105 | 106 | @classmethod 107 | def export_book(cls, dbook): 108 | .... 109 | # returns string representation of given databook 110 | 111 | @classmethod 112 | def import_set(cls, dset, in_stream): 113 | ... 114 | # populates given Dataset with given datastream 115 | 116 | @classmethod 117 | def import_book(cls, dbook, in_stream): 118 | ... 119 | # returns Databook instance 120 | 121 | @classmethod 122 | def detect(cls, stream): 123 | ... 124 | # returns True if given stream is parsable as xxx 125 | 126 | .. admonition:: Excluding Support 127 | 128 | If the format excludes support for an import/export mechanism (*e.g.* 129 | :class:`csv ` excludes 130 | :class:`Databook ` support), 131 | simply don't define the respective class methods. 132 | Appropriate errors will be raised. 133 | 134 | 2. Register your class:: 135 | 136 | from tablib.formats import registry 137 | 138 | registry.register('xxx', MyXXXFormatClass()) 139 | 140 | 3. From then on, you should be able to use your new custom format as if it were 141 | a built-in Tablib format, e.g. using ``dataset.export('xxx')`` will use the 142 | ``MyXXXFormatClass.export_set`` method. 143 | 144 | .. _testing: 145 | 146 | -------------- 147 | Testing Tablib 148 | -------------- 149 | 150 | Testing is crucial to Tablib's stability. 151 | This stable project is used in production by many companies and developers, 152 | so it is important to be certain that every version released is fully operational. 153 | When developing a new feature for Tablib, be sure to write proper tests for it as well. 154 | 155 | When developing a feature for Tablib, 156 | the easiest way to test your changes for potential issues is to simply run the test suite directly. 157 | 158 | .. code-block:: console 159 | 160 | $ tox 161 | 162 | ---------------------- 163 | Continuous Integration 164 | ---------------------- 165 | 166 | Every pull request is automatically tested and inspected upon receipt with `GitHub Actions`_. 167 | If you broke the build, you will receive an email accordingly. 168 | 169 | Anyone may view the build status and history at any time. 170 | 171 | https://github.com/jazzband/tablib/actions 172 | 173 | Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets. 174 | 175 | .. _`GitHub Actions`: https://github.com/jazzband/tablib/actions 176 | 177 | 178 | .. _docs: 179 | 180 | ----------------- 181 | Building the Docs 182 | ----------------- 183 | 184 | Documentation is written in the powerful, flexible, 185 | and standard Python documentation format, `reStructured Text`_. 186 | Documentation builds are powered by the powerful Pocoo project, Sphinx_. 187 | The :ref:`API Documentation ` is mostly documented inline throughout the module. 188 | 189 | The Docs live in ``tablib/docs``. 190 | In order to build them, you will first need to install Sphinx. 191 | 192 | .. code-block:: console 193 | 194 | $ pip install sphinx 195 | 196 | 197 | Then, to build an HTML version of the docs, simply run the following from the ``docs`` directory: 198 | 199 | .. code-block:: console 200 | 201 | $ make html 202 | 203 | Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, 204 | ready for publication on most web servers. 205 | 206 | You can also generate the documentation in **epub**, **latex**, **json**, *&c* similarly. 207 | 208 | .. _`reStructured Text`: http://docutils.sourceforge.net/rst.html 209 | .. _Sphinx: http://sphinx.pocoo.org 210 | .. _`GitHub Pages`: https://pages.github.com 211 | 212 | ---------- 213 | 214 | Make sure to check out the :ref:`API Documentation `. 215 | -------------------------------------------------------------------------------- /docs/formats.rst: -------------------------------------------------------------------------------- 1 | .. _formats: 2 | 3 | ======= 4 | Formats 5 | ======= 6 | 7 | Tablib supports a wide variety of different tabular formats, both for input and 8 | output. Moreover, you can :ref:`register your own formats `. 9 | 10 | cli 11 | === 12 | 13 | The ``cli`` format is currently export-only. The exports produce a representation 14 | table suited to a terminal. 15 | 16 | When exporting to a CLI you can pass the table format with the ``tablefmt`` 17 | parameter, the supported formats are:: 18 | 19 | >>> import tabulate 20 | >>> list(tabulate._table_formats) 21 | ['simple', 'plain', 'grid', 'fancy_grid', 'github', 'pipe', 'orgtbl', 22 | 'jira', 'presto', 'psql', 'rst', 'mediawiki', 'moinmoin', 'youtrack', 23 | 'html', 'latex', 'latex_raw', 'latex_booktabs', 'tsv', 'textile'] 24 | 25 | For example:: 26 | 27 | dataset.export("cli", tablefmt="github") 28 | dataset.export("cli", tablefmt="grid") 29 | 30 | This format is optional, install Tablib with ``pip install "tablib[cli]"`` to 31 | make the format available. 32 | 33 | csv 34 | === 35 | 36 | When you import CSV data, you can specify if the first line of your data source 37 | is headers with the ``headers`` boolean parameter (defaults to ``True``):: 38 | 39 | import tablib 40 | 41 | tablib.import_set(your_data_stream, format='csv', headers=False) 42 | 43 | It is also possible to provide the ``skip_lines`` parameter for the number of 44 | lines that should be skipped before starting to read data. 45 | 46 | .. versionchanged:: 3.1.0 47 | 48 | The ``skip_lines`` parameter was added. 49 | 50 | When exporting with the ``csv`` format, the top row will contain headers, if 51 | they have been set. Otherwise, the top row will contain the first row of the 52 | dataset. 53 | 54 | When importing a CSV data source or exporting a dataset as CSV, you can pass any 55 | parameter supported by the :py:func:`csv.reader` and :py:func:`csv.writer` 56 | functions. For example:: 57 | 58 | tablib.import_set(your_data_stream, format='csv', dialect='unix') 59 | 60 | dataset.export('csv', delimiter=' ', quotechar='|') 61 | 62 | .. admonition:: Line endings 63 | 64 | Exporting uses \\r\\n line endings by default so, make sure to include 65 | ``newline=''`` otherwise you will get a blank line between each row 66 | when you open the file in Excel:: 67 | 68 | with open('output.csv', 'w', newline='') as f: 69 | f.write(dataset.export('csv')) 70 | 71 | If you do not do this, and you export the file on Windows, your 72 | CSV file will open in Excel with a blank line between each row. 73 | 74 | dbf 75 | === 76 | 77 | Import/export using the dBASE_ format. 78 | 79 | .. admonition:: Binary Warning 80 | 81 | The ``dbf`` format contains binary data, so make sure to write in binary 82 | mode:: 83 | 84 | with open('output.dbf', 'wb') as f: 85 | f.write(dataset.export('dbf') 86 | 87 | .. _dBASE: https://en.wikipedia.org/wiki/DBase 88 | 89 | df (DataFrame) 90 | ============== 91 | 92 | Import/export using the pandas_ DataFrame format. This format is optional, 93 | install Tablib with ``pip install "tablib[pandas]"`` to make the format available. 94 | 95 | .. _pandas: https://pandas.pydata.org/ 96 | 97 | html 98 | ==== 99 | 100 | The exports produce an HTML page with the data in a ````. If headers have 101 | been set, they will be used as table headers (``thead``). 102 | 103 | When you import HTML, you can specify a specific table to import by providing 104 | the ``table_id`` argument:: 105 | 106 | import tablib 107 | 108 | tablib.import_set(your_html, format='html', table_id='some_table_id') 109 | 110 | Otherwise, the first table found will be imported. 111 | 112 | .. versionchanged:: 3.6.0 113 | 114 | The ability to import HTML was added. The dependency on MarkupPy was dropped. 115 | 116 | jira 117 | ==== 118 | 119 | The ``jira`` format is currently export-only. Exports format the dataset 120 | according to the Jira table syntax:: 121 | 122 | ||heading 1||heading 2||heading 3|| 123 | |col A1|col A2|col A3| 124 | |col B1|col B2|col B3| 125 | 126 | json 127 | ==== 128 | 129 | Import/export using the JSON_ format. If headers have been set, a JSON list of 130 | objects will be returned. If no headers have been set, a JSON list of lists 131 | (rows) will be returned instead. 132 | 133 | Import assumes (for now) that headers exist. 134 | 135 | .. _JSON: http://json.org/ 136 | 137 | latex 138 | ===== 139 | 140 | Import/export using the LaTeX_ format. This format is export-only. 141 | If a title has been set, it will be exported as the table caption. 142 | 143 | .. _LaTeX: https://www.latex-project.org/ 144 | 145 | ods 146 | === 147 | 148 | Import/export data in OpenDocument Spreadsheet format. 149 | 150 | .. versionadded:: 3.6.0 151 | 152 | Import functionality was added. 153 | 154 | This format is optional, install Tablib with ``pip install "tablib[ods]"`` to 155 | make the format available. 156 | 157 | The ``import_set()`` method also supports a ``skip_lines`` parameter that you 158 | can set to a number of lines that should be skipped before starting to read 159 | data. 160 | 161 | .. admonition:: Binary Warning 162 | 163 | :class:`Dataset.ods` contains binary data, so make sure to write in binary mode:: 164 | 165 | with open('output.ods', 'wb') as f: 166 | f.write(data.ods) 167 | 168 | rst 169 | === 170 | 171 | Export data as a reStructuredText_ table representation of a dataset. The 172 | ``rst`` format is export-only. 173 | 174 | Exporting returns a simple table if the text in the first column is never 175 | wrapped, otherwise returns a grid table:: 176 | 177 | >>> from tablib import Dataset 178 | >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) 179 | >>> data = Dataset() 180 | >>> data.headers = ['A', 'B', 'A and B'] 181 | >>> for a, b in bits: 182 | ... data.append([bool(a), bool(b), bool(a * b)]) 183 | >>> table = data.export('rst') 184 | >>> table.split('\\n') == [ 185 | ... '===== ===== =====', 186 | ... ' A B A and', 187 | ... ' B ', 188 | ... '===== ===== =====', 189 | ... 'False False False', 190 | ... 'True False False', 191 | ... 'False True False', 192 | ... 'True True True ', 193 | ... '===== ===== =====', 194 | ... ] 195 | True 196 | 197 | .. _reStructuredText: http://docutils.sourceforge.net/rst.html 198 | 199 | tsv 200 | === 201 | 202 | A variant of the csv_ format with tabulators as fields separators. 203 | 204 | xls 205 | === 206 | 207 | Import/export data in Legacy Excel Spreadsheet representation. 208 | 209 | This format is optional, install Tablib with ``pip install "tablib[xls]"`` to 210 | make the format available. 211 | 212 | Its ``import_set()`` method also supports a ``skip_lines`` parameter that you 213 | can set to a number of lines that should be skipped before starting to read 214 | data. 215 | 216 | .. versionchanged:: 3.1.0 217 | 218 | The ``skip_lines`` parameter for ``import_set()`` was added. 219 | 220 | .. note:: 221 | 222 | XLS files are limited to a maximum of 65,000 rows. Use xlsx_ to avoid this 223 | limitation. 224 | 225 | .. admonition:: Binary Warning 226 | 227 | The ``xls`` file format is binary, so make sure to write in binary mode:: 228 | 229 | with open('output.xls', 'wb') as f: 230 | f.write(data.export('xls')) 231 | 232 | xlsx 233 | ==== 234 | 235 | Import/export data in Excel 07+ Spreadsheet representation. 236 | 237 | This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to 238 | make the format available. 239 | 240 | The ``import_set()`` and ``import_book()`` methods accept keyword 241 | argument ``read_only``. If its value is ``True`` (the default), the 242 | XLSX data source is read lazily. Lazy reading generally reduces time 243 | and memory consumption, especially for large spreadsheets. However, 244 | it relies on the XLSX data source declaring correct dimensions. Some 245 | programs generate XLSX files with incorrect dimensions. Such files 246 | may need to be loaded with this optimization turned off by passing 247 | ``read_only=False``. 248 | 249 | The ``import_set()`` method also supports a ``skip_lines`` parameter that you 250 | can set to a number of lines that should be skipped before starting to read 251 | data. 252 | 253 | The ``export_set()`` method supports a ``column_width`` parameter. Depending 254 | on the value passed, the column width will be set accordingly. It can be 255 | either ``None``, an integer, or default "adaptive". If "adaptive" is passed, 256 | the column width will be unique and will be calculated based on values' length. 257 | For example:: 258 | 259 | data = tablib.Dataset() 260 | data.export('xlsx', column_width='adaptive') 261 | 262 | .. versionchanged:: 3.8.0 263 | The ``column_width`` parameter for ``export_set()`` was added. 264 | 265 | .. versionchanged:: 3.1.0 266 | 267 | The ``skip_lines`` parameter for ``import_set()`` was added. 268 | 269 | .. note:: 270 | 271 | When reading an ``xlsx`` file containing formulas in its cells, Tablib will 272 | read the cell values, not the cell formulas. 273 | 274 | .. versionchanged:: 2.0.0 275 | 276 | Reads cell values instead of formulas. 277 | 278 | You can export data to xlsx format by calling :meth:`export('xlsx') <.export>`. 279 | There are optional parameters to control the export. 280 | For available parameters, see :meth:`tablib.formats._xlsx.XLSXFormat.export_set`. 281 | 282 | .. admonition:: Binary Warning 283 | 284 | The ``xlsx`` file format is binary, so make sure to write in binary mode:: 285 | 286 | with open('output.xlsx', 'wb') as f: 287 | f.write(data.export('xlsx')) 288 | 289 | yaml 290 | ==== 291 | 292 | Import/export data in the YAML_ format. 293 | When exporting, if headers have been set, a YAML list of objects will be 294 | returned. If no headers have been set, a YAML list of lists (rows) will be 295 | returned instead. 296 | 297 | Import assumes (for now) that headers exist. 298 | 299 | This format is optional, install Tablib with ``pip install "tablib[yaml]"`` to 300 | make the format available. 301 | 302 | .. _YAML: https://yaml.org 303 | 304 | sql 305 | === 306 | 307 | .. versionadded:: 3.9.0 308 | 309 | The ``sql`` format is export-only. It produces SQL INSERT statements (one per row) 310 | assuming the target table already exists with the same columns. 311 | The table name can be passed as an argument or will be taken from the dataset's title (or defaults to ``export_table``). 312 | Columns can be passed as an argument or will be taken from the dataset's headers. 313 | Values are rendered as ANSI SQL literals. 314 | Additionally the argument ``commit`` can be passed to add a ``COMMIT;`` statement at the end. 315 | 316 | - ``NULL`` for null values 317 | - ``TRUE``/``FALSE`` for booleans 318 | - ``DATE 'YYYY-MM-DD'`` for date values 319 | - ``TIMESTAMP 'YYYY-MM-DD HH:MM:SS'`` for timestamp values 320 | - Numeric literals for ints/floats/decimals 321 | - Single-quoted strings with embedded quotes escaped 322 | 323 | Example:: 324 | 325 | import datetime 326 | from tablib import Dataset 327 | 328 | data = Dataset(title='users') 329 | data.headers = ['id', 'name', 'joined'] 330 | data.append([1, 'Alice', datetime.date(2021,1,1)]) 331 | 332 | print(data.export('sql')) 333 | print(data.export('sql', table='\"User_Updates\"', columns=['id', 'username', 'update_date'], commit=True)) 334 | 335 | Output:: 336 | 337 | INSERT INTO users (id,name,joined) VALUES (1, 'Alice', DATE '2021-01-01'); 338 | 339 | INSERT INTO "User_Updates" (id,username,update_date) VALUES (1, 'Alice', DATE '2021-01-01'); 340 | COMMIT; 341 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Tablib documentation master file, created by 2 | sphinx-quickstart on Tue Oct 5 15:25:21 2010. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root ``toctree`` directive. 5 | 6 | Tablib: Pythonic Tabular Datasets 7 | ================================= 8 | 9 | Release v\ |version|. (:ref:`Installation `) 10 | 11 | .. Contents: 12 | .. 13 | .. .. toctree:: 14 | .. :maxdepth: 2 15 | .. 16 | 17 | .. Indices and tables 18 | .. ================== 19 | .. 20 | .. * :ref:`genindex` 21 | .. * :ref:`modindex` 22 | .. * :ref:`search` 23 | 24 | 25 | Tablib is an `MIT Licensed `_ format-agnostic tabular dataset library, written in Python. 26 | It allows you to import, export, and manipulate tabular data sets. 27 | Advanced features include segregation, dynamic columns, tags & filtering, 28 | and seamless format import & export. 29 | 30 | :: 31 | 32 | >>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age']) 33 | >>> for i in [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]: 34 | ... data.append(i) 35 | 36 | 37 | >>> print(data.export('json')) 38 | [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] 39 | 40 | >>> print(data.export('yaml')) 41 | - {Age: 22, First Name: Kenneth, Last Name: Reitz} 42 | - {Age: 21, First Name: Bessie, Last Name: Monke} 43 | 44 | >>> data.export('xlsx') 45 | 46 | 47 | >>> data.export('df') 48 | First Name Last Name Age 49 | 0 Kenneth Reitz 22 50 | 1 Bessie Monke 21 51 | 52 | 53 | Testimonials 54 | ------------ 55 | 56 | `National Geographic `_, 57 | `Digg, Inc `_, 58 | `Northrop Grumman `_, 59 | `Discovery Channel `_, 60 | and `The Sunlight Foundation `_ use Tablib internally. 61 | 62 | 63 | 64 | **Greg Thorton** 65 | Tablib by @kennethreitz saved my life. 66 | I had to consolidate like 5 huge poorly maintained lists of domains and data. 67 | It was a breeze! 68 | 69 | **Dave Coutts** 70 | It's turning into one of my most used modules of 2010. 71 | You really hit a sweet spot for managing tabular data with a minimal amount of code and effort. 72 | 73 | **Joshua Ourisman** 74 | Tablib has made it so much easier to deal with the inevitable 'I want an Excel file!' requests from clients... 75 | 76 | **Brad Montgomery** 77 | I think you nailed the "Python Zen" with tablib. 78 | Thanks again for an awesome lib! 79 | 80 | 81 | User's Guide 82 | ------------ 83 | 84 | This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for getting the most out of your datasets. 85 | 86 | .. toctree:: 87 | :maxdepth: 2 88 | 89 | intro 90 | 91 | .. toctree:: 92 | :maxdepth: 2 93 | 94 | install 95 | 96 | .. toctree:: 97 | :maxdepth: 2 98 | 99 | tutorial 100 | 101 | .. toctree:: 102 | :maxdepth: 2 103 | 104 | formats 105 | 106 | .. toctree:: 107 | :maxdepth: 2 108 | 109 | development 110 | 111 | 112 | API Reference 113 | ------------- 114 | 115 | If you are looking for information on a specific function, class or 116 | method, this part of the documentation is for you. 117 | 118 | .. toctree:: 119 | :maxdepth: 2 120 | 121 | api 122 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. 7 | 8 | 9 | .. _installing: 10 | 11 | ----------------- 12 | Installing Tablib 13 | ----------------- 14 | 15 | Distribute & Pip 16 | ---------------- 17 | 18 | Of course, the recommended way to install Tablib is with `pip `_: 19 | 20 | .. code-block:: console 21 | 22 | $ pip install tablib 23 | 24 | You can also choose to install more dependencies to have more import/export 25 | formats available: 26 | 27 | .. code-block:: console 28 | 29 | $ pip install "tablib[xlsx]" 30 | 31 | Or all possible formats: 32 | 33 | .. code-block:: console 34 | 35 | $ pip install "tablib[all]" 36 | 37 | which is equivalent to: 38 | 39 | .. code-block:: console 40 | 41 | $ pip install "tablib[html, pandas, ods, xls, xlsx, yaml]" 42 | 43 | ------------------- 44 | Download the Source 45 | ------------------- 46 | 47 | You can also install Tablib from source. 48 | The latest release (|version|) is available from GitHub. 49 | 50 | * tarball_ 51 | * zipball_ 52 | 53 | .. _ 54 | 55 | Once you have a copy of the source, 56 | you can embed it in your Python package, 57 | or install it into your site-packages easily. 58 | 59 | .. code-block:: console 60 | 61 | $ python setup.py install 62 | 63 | 64 | To download the full source history from Git, see :ref:`Source Control `. 65 | 66 | .. _tarball: https://github.com/jazzband/tablib/tarball/master 67 | .. _zipball: https://github.com/jazzband/tablib/zipball/master 68 | 69 | 70 | .. _updates: 71 | 72 | Staying Updated 73 | --------------- 74 | 75 | The latest version of Tablib will always be available here: 76 | 77 | * PyPI: https://pypi.org/project/tablib/ 78 | * GitHub: https://github.com/jazzband/tablib/ 79 | 80 | When a new version is available, upgrading is simple:: 81 | 82 | $ pip install tablib --upgrade 83 | 84 | 85 | Now, go get a :ref:`Quick Start `. 86 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | .. _intro: 2 | 3 | Introduction 4 | ============ 5 | 6 | This part of the documentation covers all the interfaces of Tablib. 7 | Tablib is a format-agnostic tabular dataset library, written in Python. 8 | It allows you to Pythonically import, export, and manipulate tabular data sets. 9 | Advanced features include segregation, dynamic columns, tags/filtering, and 10 | seamless format import/export. 11 | 12 | 13 | Philosophy 14 | ---------- 15 | 16 | Tablib was developed with a few :pep:`20` idioms in mind. 17 | 18 | #. Beautiful is better than ugly. 19 | #. Explicit is better than implicit. 20 | #. Simple is better than complex. 21 | #. Complex is better than complicated. 22 | #. Readability counts. 23 | 24 | All contributions to Tablib should keep these important rules in mind. 25 | 26 | .. _license: 27 | 28 | Tablib License 29 | -------------- 30 | 31 | Tablib is released under terms of `The MIT License`_. 32 | 33 | Copyright 2017 Kenneth Reitz 34 | 35 | Permission is hereby granted, free of charge, to any person obtaining a copy 36 | of this software and associated documentation files (the "Software"), to deal 37 | in the Software without restriction, including without limitation the rights 38 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 39 | copies of the Software, and to permit persons to whom the Software is 40 | furnished to do so, subject to the following conditions: 41 | 42 | The above copyright notice and this permission notice shall be included in 43 | all copies or substantial portions of the Software. 44 | 45 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 46 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 47 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 48 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 49 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 50 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 51 | THE SOFTWARE. 52 | 53 | .. _`The MIT License`: https://opensource.org/licenses/mit-license.php 54 | 55 | Now, go :ref:`install Tablib `. 56 | -------------------------------------------------------------------------------- /docs/krstyle.sty: -------------------------------------------------------------------------------- 1 | \definecolor{TitleColor}{rgb}{0,0,0} 2 | \definecolor{InnerLinkColor}{rgb}{0,0,0} 3 | 4 | \renewcommand{\maketitle}{% 5 | \begin{titlepage}% 6 | \let\footnotesize\small 7 | \let\footnoterule\relax 8 | \ifsphinxpdfoutput 9 | \begingroup 10 | % This \def is required to deal with multi-line authors; it 11 | % changes \\ to ', ' (comma-space), making it pass muster for 12 | % generating document info in the PDF file. 13 | \def\\{, } 14 | \pdfinfo{ 15 | /Author (\@author) 16 | /Title (\@title) 17 | } 18 | \endgroup 19 | \fi 20 | \begin{flushright}% 21 | %\sphinxlogo% 22 | {\center 23 | \vspace*{3cm} 24 | \includegraphics{logo.pdf} 25 | \vspace{3cm} 26 | \par 27 | {\rm\Huge \@title \par}% 28 | {\em\LARGE \py@release\releaseinfo \par} 29 | {\large 30 | \@date \par 31 | \py@authoraddress \par 32 | }}% 33 | \end{flushright}%\par 34 | \@thanks 35 | \end{titlepage}% 36 | \cleardoublepage% 37 | \setcounter{footnote}{0}% 38 | \let\thanks\relax\let\maketitle\relax 39 | %\gdef\@thanks{}\gdef\@author{}\gdef\@title{} 40 | } 41 | 42 | \fancypagestyle{normal}{ 43 | \fancyhf{} 44 | \fancyfoot[LE,RO]{{\thepage}} 45 | \fancyfoot[LO]{{\nouppercase{\rightmark}}} 46 | \fancyfoot[RE]{{\nouppercase{\leftmark}}} 47 | \fancyhead[LE,RO]{{ \@title, \py@release}} 48 | \renewcommand{\headrulewidth}{0.4pt} 49 | \renewcommand{\footrulewidth}{0.4pt} 50 | } 51 | 52 | \fancypagestyle{plain}{ 53 | \fancyhf{} 54 | \fancyfoot[LE,RO]{{\thepage}} 55 | \renewcommand{\headrulewidth}{0pt} 56 | \renewcommand{\footrulewidth}{0.4pt} 57 | } 58 | 59 | \titleformat{\section}{\Large}% 60 | {\py@TitleColor\thesection}{0.5em}{\py@TitleColor}{\py@NormalColor} 61 | \titleformat{\subsection}{\large}% 62 | {\py@TitleColor\thesubsection}{0.5em}{\py@TitleColor}{\py@NormalColor} 63 | \titleformat{\subsubsection}{}% 64 | {\py@TitleColor\thesubsubsection}{0.5em}{\py@TitleColor}{\py@NormalColor} 65 | \titleformat{\paragraph}{\large}% 66 | {\py@TitleColor}{0em}{\py@TitleColor}{\py@NormalColor} 67 | 68 | \ChNameVar{\raggedleft\normalsize} 69 | \ChNumVar{\raggedleft \bfseries\Large} 70 | \ChTitleVar{\raggedleft \rm\Huge} 71 | 72 | \renewcommand\thepart{\@Roman\c@part} 73 | \renewcommand\part{% 74 | \pagestyle{empty} 75 | \if@noskipsec \leavevmode \fi 76 | \cleardoublepage 77 | \vspace*{6cm}% 78 | \@afterindentfalse 79 | \secdef\@part\@spart} 80 | 81 | \def\@part[#1]#2{% 82 | \ifnum \c@secnumdepth >\m@ne 83 | \refstepcounter{part}% 84 | \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% 85 | \else 86 | \addcontentsline{toc}{part}{#1}% 87 | \fi 88 | {\parindent \z@ %\center 89 | \interlinepenalty \@M 90 | \normalfont 91 | \ifnum \c@secnumdepth >\m@ne 92 | \rm\Large \partname~\thepart 93 | \par\nobreak 94 | \fi 95 | \MakeUppercase{\rm\Huge #2}% 96 | \markboth{}{}\par}% 97 | \nobreak 98 | \vskip 8ex 99 | \@afterheading} 100 | \def\@spart#1{% 101 | {\parindent \z@ %\center 102 | \interlinepenalty \@M 103 | \normalfont 104 | \huge \bfseries #1\par}% 105 | \nobreak 106 | \vskip 3ex 107 | \@afterheading} 108 | 109 | % use inconsolata font 110 | \usepackage{inconsolata} 111 | 112 | % fix single quotes, for inconsolata. (does not work) 113 | %%\usepackage{textcomp} 114 | %%\begingroup 115 | %% \catcode`'=\active 116 | %% \g@addto@macro\@noligs{\let'\textsinglequote} 117 | %% \endgroup 118 | %%\endinput 119 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==6.1.3 2 | -------------------------------------------------------------------------------- /docs/tutorial.rst: -------------------------------------------------------------------------------- 1 | .. _quickstart: 2 | 3 | ========== 4 | Quickstart 5 | ========== 6 | 7 | 8 | Eager to get started? 9 | This page gives a good introduction in how to get started with Tablib. 10 | This assumes you already have Tablib installed. 11 | If you do not, head over to the :ref:`Installation ` section. 12 | 13 | First, make sure that: 14 | 15 | * Tablib is :ref:`installed ` 16 | * Tablib is :ref:`up-to-date ` 17 | 18 | 19 | Let's get started with some simple use cases and examples. 20 | 21 | 22 | 23 | ------------------ 24 | Creating a Dataset 25 | ------------------ 26 | 27 | 28 | A :class:`Dataset ` is nothing more than what its name implies—a set of data. 29 | 30 | Creating your own instance of the :class:`tablib.Dataset` object is simple. :: 31 | 32 | data = tablib.Dataset() 33 | 34 | You can now start filling this :class:`Dataset ` object with data. 35 | 36 | .. admonition:: Example Context 37 | 38 | From here on out, if you see ``data``, assume that it's a fresh 39 | :class:`Dataset ` object. 40 | 41 | 42 | 43 | ----------- 44 | Adding Rows 45 | ----------- 46 | 47 | 48 | Let's say you want to collect a simple list of names. :: 49 | 50 | # collection of names 51 | names = ['Kenneth Reitz', 'Bessie Monke'] 52 | 53 | for name in names: 54 | # split name appropriately 55 | fname, lname = name.split() 56 | 57 | # add names to Dataset 58 | data.append([fname, lname]) 59 | 60 | You can get a nice, Pythonic view of the dataset at any time with :class:`Dataset.dict`:: 61 | 62 | >>> data.dict 63 | [('Kenneth', 'Reitz'), ('Bessie', 'Monke')] 64 | 65 | 66 | 67 | -------------- 68 | Adding Headers 69 | -------------- 70 | 71 | 72 | It's time to enhance our :class:`Dataset` by giving our columns some titles. 73 | To do so, set :class:`Dataset.headers`. :: 74 | 75 | data.headers = ['First Name', 'Last Name'] 76 | 77 | Now our data looks a little different. :: 78 | 79 | >>> data.dict 80 | [{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, 81 | {'Last Name': 'Monke', 'First Name': 'Bessie'}] 82 | 83 | 84 | 85 | 86 | -------------- 87 | Adding Columns 88 | -------------- 89 | 90 | 91 | Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: 92 | 93 | data.append_col([22, 20], header='Age') 94 | 95 | Let's view the data now. :: 96 | 97 | >>> data.dict 98 | [{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, 99 | {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 20}] 100 | 101 | It's that easy. 102 | 103 | 104 | -------------- 105 | Importing Data 106 | -------------- 107 | Creating a :class:`tablib.Dataset` object by importing a pre-existing file is simple. :: 108 | 109 | with open('data.csv', 'r') as fh: 110 | imported_data = Dataset().load(fh) 111 | 112 | This detects what sort of data is being passed in, and uses an appropriate formatter to do the import. So you can import from a variety of different file types. 113 | 114 | .. admonition:: Source without headers 115 | 116 | When the format is :class:`csv `, :class:`tsv `, :class:`dbf `, :class:`xls ` or :class:`xlsx `, and the data source does not have headers, the import should be done as follows :: 117 | 118 | with open('data.csv', 'r') as fh: 119 | imported_data = Dataset().load(fh, headers=False) 120 | 121 | -------------- 122 | Exporting Data 123 | -------------- 124 | 125 | Tablib's killer feature is the ability to export your :class:`Dataset` objects into a number of formats. 126 | 127 | **Comma-Separated Values** :: 128 | 129 | >>> data.export('csv') 130 | Last Name,First Name,Age 131 | Reitz,Kenneth,22 132 | Monke,Bessie,20 133 | 134 | **JavaScript Object Notation** :: 135 | 136 | >>> data.export('json') 137 | [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}] 138 | 139 | 140 | **YAML Ain't Markup Language** :: 141 | 142 | >>> data.export('yaml') 143 | - {Age: 22, First Name: Kenneth, Last Name: Reitz} 144 | - {Age: 20, First Name: Bessie, Last Name: Monke} 145 | 146 | 147 | **Microsoft Excel** :: 148 | 149 | >>> data.export('xls') 150 | 151 | 152 | 153 | **Pandas DataFrame** :: 154 | 155 | >>> data.export('df') 156 | First Name Last Name Age 157 | 0 Kenneth Reitz 22 158 | 1 Bessie Monke 21 159 | 160 | 161 | ------------------------ 162 | Selecting Rows & Columns 163 | ------------------------ 164 | 165 | 166 | You can slice and dice your data, just like a standard Python list. :: 167 | 168 | >>> data[0] 169 | ('Kenneth', 'Reitz', 22) 170 | >>> data[0:2] 171 | [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 20)] 172 | 173 | You can also access a row using its index without slicing. :: 174 | 175 | >>> data.get(0) 176 | ('Kenneth', 'Reitz', 22) 177 | 178 | If we had a set of data consisting of thousands of rows, 179 | it could be useful to get a list of values in a column. 180 | To do so, we access the :class:`Dataset` as if it were a standard Python dictionary. :: 181 | 182 | >>> data['First Name'] 183 | ['Kenneth', 'Bessie'] 184 | 185 | You can also access the column using its index. :: 186 | 187 | >>> data.headers 188 | ['Last Name', 'First Name', 'Age'] 189 | >>> data.get_col(1) 190 | ['Kenneth', 'Bessie'] 191 | 192 | Let's find the average age. :: 193 | 194 | >>> ages = data['Age'] 195 | >>> float(sum(ages)) / len(ages) 196 | 21.0 197 | 198 | 199 | 200 | ----------------------- 201 | Removing Rows & Columns 202 | ----------------------- 203 | 204 | It's easier than you could imagine. Delete a column:: 205 | 206 | >>> del data['Col Name'] 207 | 208 | Delete a range of rows:: 209 | 210 | >>> del data[0:12] 211 | 212 | 213 | ============== 214 | Advanced Usage 215 | ============== 216 | 217 | This part of the documentation services to give you an idea that are otherwise hard to extract from the :ref:`API Documentation `. 218 | 219 | And now for something completely different. 220 | 221 | 222 | .. _dyncols: 223 | 224 | --------------- 225 | Dynamic Columns 226 | --------------- 227 | 228 | Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. 229 | A dynamic column is a single callable object (*e.g.* a function). 230 | 231 | Let's add a dynamic column to our :class:`Dataset` object. 232 | In this example, we have a function that generates a random grade for our students. :: 233 | 234 | import random 235 | 236 | def random_grade(row): 237 | """Returns a random integer for entry.""" 238 | return (random.randint(60,100)/100.0) 239 | 240 | data.append_col(random_grade, header='Grade') 241 | 242 | Let's have a look at our data. :: 243 | 244 | >>> data.export('yaml') 245 | - {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz} 246 | - {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke} 247 | 248 | 249 | Let's remove that column. :: 250 | 251 | >>> del data['Grade'] 252 | 253 | 254 | When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. 255 | You can use this to perform calculations against your data row. 256 | 257 | For example, we can use the data available in the row to guess the gender of a student. :: 258 | 259 | def guess_gender(row): 260 | """Calculates gender of given student data row.""" 261 | m_names = ('Kenneth', 'Mike', 'Yuri') 262 | f_names = ('Bessie', 'Samantha', 'Heather') 263 | 264 | name = row[0] 265 | 266 | if name in m_names: 267 | return 'Male' 268 | elif name in f_names: 269 | return 'Female' 270 | else: 271 | return 'Unknown' 272 | 273 | Adding this function to our dataset as a dynamic column would result in: :: 274 | 275 | >>> data.export('yaml') 276 | - {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz} 277 | - {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke} 278 | 279 | When you add new rows to a dataset that contains dynamic columns, you should 280 | either provide all values in the row, or only the non-dynamic values and then 281 | the dynamic values will be automatically generated using the function initially 282 | provided for the column calculation. 283 | 284 | ..versionchanged:: 3.6.0 285 | 286 | In older versions, you could only add new rows with fully-populated rows, 287 | including dynamic columns. 288 | 289 | .. _tags: 290 | 291 | ---------------------------- 292 | Filtering Datasets with Tags 293 | ---------------------------- 294 | 295 | When constructing a :class:`Dataset` object, 296 | you can add tags to rows by specifying the ``tags`` parameter. 297 | This allows you to filter your :class:`Dataset` later. 298 | This can be useful to separate rows of data based on arbitrary criteria 299 | (*e.g.* origin) that you don't want to include in your :class:`Dataset`. 300 | 301 | Let's tag some students. :: 302 | 303 | students = tablib.Dataset() 304 | 305 | students.headers = ['first', 'last'] 306 | 307 | students.rpush(['Kenneth', 'Reitz'], tags=['male', 'technical']) 308 | students.rpush(['Daniel', 'Dupont'], tags=['male', 'creative' ]) 309 | students.rpush(['Bessie', 'Monke'], tags=['female', 'creative']) 310 | 311 | Now that we have extra meta-data on our rows, we can easily filter our :class:`Dataset`. Let's just see Female students. :: 312 | 313 | >>> students.filter(['female']).yaml 314 | - {first: Bessie, Last: Monke} 315 | 316 | By default, when you pass a list of tags you get filter type or. :: 317 | 318 | >>> students.filter(['female', 'creative']).yaml 319 | - {first: Daniel, Last: Dupont} 320 | - {first: Bessie, Last: Monke} 321 | 322 | Using chaining you can get a filter type and. :: 323 | 324 | >>> students.filter(['female']).filter(['creative']).yaml 325 | - {first: Bessie, Last: Monke} 326 | 327 | It's that simple. The original :class:`Dataset` is untouched. 328 | 329 | Open an Excel Workbook and read first sheet 330 | ------------------------------------------- 331 | 332 | Open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet). :: 333 | 334 | data = tablib.Dataset() 335 | with open('my_excel_file.xlsx', 'rb') as fh: 336 | data.load(fh, 'xlsx') 337 | print(data) 338 | 339 | Excel Workbook With Multiple Sheets 340 | ------------------------------------ 341 | 342 | When dealing with a large number of :class:`Datasets ` in spreadsheet format, 343 | it's quite common to group multiple spreadsheets into a single Excel file, known as a Workbook. 344 | Tablib makes it extremely easy to build workbooks with the handy :class:`Databook` class. 345 | 346 | Let's say we have 3 different :class:`Datasets `. 347 | All we have to do is add them to a :class:`Databook` object... :: 348 | 349 | book = tablib.Databook((data1, data2, data3)) 350 | 351 | ... and export to Excel just like :class:`Datasets `. :: 352 | 353 | with open('students.xls', 'wb') as f: 354 | f.write(book.export('xls')) 355 | 356 | The resulting ``students.xls`` file will contain a separate spreadsheet for each :class:`Dataset` object in the :class:`Databook`. 357 | 358 | .. admonition:: Binary Warning 359 | 360 | Make sure to open the output file in binary mode. 361 | 362 | 363 | .. _separators: 364 | 365 | ---------- 366 | Separators 367 | ---------- 368 | 369 | When constructing a spreadsheet, 370 | it's often useful to create a blank row containing information on the upcoming data. So, 371 | 372 | :: 373 | 374 | daniel_tests = [ 375 | ('11/24/09', 'Math 101 Mid-term Exam', 56.), 376 | ('05/24/10', 'Math 101 Final Exam', 62.) 377 | ] 378 | 379 | suzie_tests = [ 380 | ('11/24/09', 'Math 101 Mid-term Exam', 56.), 381 | ('05/24/10', 'Math 101 Final Exam', 62.) 382 | ] 383 | 384 | # Create new dataset 385 | tests = tablib.Dataset() 386 | tests.headers = ['Date', 'Test Name', 'Grade'] 387 | 388 | # Daniel's Tests 389 | tests.append_separator('Daniel\'s Scores') 390 | 391 | for test_row in daniel_tests: 392 | tests.append(test_row) 393 | 394 | # Susie's Tests 395 | tests.append_separator('Susie\'s Scores') 396 | 397 | for test_row in suzie_tests: 398 | tests.append(test_row) 399 | 400 | # Write spreadsheet to disk 401 | with open('grades.xls', 'wb') as f: 402 | f.write(tests.export('xls')) 403 | 404 | The resulting **tests.xls** will have the following layout: 405 | 406 | 407 | Daniel's Scores: 408 | * '11/24/09', 'Math 101 Mid-term Exam', 56. 409 | * '05/24/10', 'Math 101 Final Exam', 62. 410 | 411 | Suzie's Scores: 412 | * '11/24/09', 'Math 101 Mid-term Exam', 56. 413 | * '05/24/10', 'Math 101 Final Exam', 62. 414 | 415 | 416 | 417 | .. admonition:: Format Support 418 | 419 | At this time, only :class:`Excel ` output supports separators. 420 | 421 | ---- 422 | 423 | Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. 424 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=58", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "tablib" 7 | description = "Format agnostic tabular data library (XLS, JSON, YAML, CSV, etc.)" 8 | readme = "README.md" 9 | license = {text = "MIT License"} 10 | authors = [ 11 | {name = "Kenneth Reitz", email = "me@kennethreitz.org"} 12 | ] 13 | maintainers = [ 14 | {name = "Jazzband Team", email = "roadies@jazzband.co"}, 15 | {name = "Hugo van Kemenade"}, 16 | {name = "Claude Paroz", email = "claude@2xlibre.net"}, 17 | ] 18 | requires-python = ">=3.9" 19 | classifiers = [ 20 | "Development Status :: 5 - Production/Stable", 21 | "Intended Audience :: Developers", 22 | "Natural Language :: English", 23 | "License :: OSI Approved :: MIT License", 24 | "Programming Language :: Python", 25 | "Programming Language :: Python :: 3 :: Only", 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 3.9", 28 | "Programming Language :: Python :: 3.10", 29 | "Programming Language :: Python :: 3.11", 30 | "Programming Language :: Python :: 3.12", 31 | "Programming Language :: Python :: 3.13", 32 | "Programming Language :: Python :: 3.14", 33 | ] 34 | dynamic = ["version"] 35 | 36 | [project.optional-dependencies] 37 | all = [ 38 | "odfpy", 39 | "openpyxl>=2.6.0", 40 | "pandas", 41 | "pyyaml", 42 | "tabulate", 43 | "xlrd", 44 | "xlwt", 45 | ] 46 | cli = ["tabulate"] 47 | html = [] 48 | ods = ["odfpy"] 49 | pandas = ["pandas"] 50 | xls = ["xlrd", "xlwt"] 51 | xlsx = ["openpyxl>=2.6.0"] 52 | yaml = ["pyyaml"] 53 | 54 | [project.urls] 55 | homepage = "https://tablib.readthedocs.io" 56 | documentation = "https://tablib.readthedocs.io" 57 | repository = "https://github.com/jazzband/tablib" 58 | changelog = "https://github.com/jazzband/tablib/blob/master/HISTORY.md" 59 | 60 | [tool.setuptools_scm] 61 | write_to = "src/tablib/_version.py" 62 | 63 | [tool.ruff] 64 | fix = true 65 | line-length = 99 66 | 67 | lint.select = [ 68 | "C4", # flake8-comprehensions 69 | "E", # pycodestyle errors 70 | "F", # pyflakes 71 | "I", # isort 72 | "ISC", # flake8-implicit-str-concat 73 | "PGH", # pygrep-hooks 74 | "PIE", # flake8-pie 75 | "RUF022", # unsorted-dunder-all 76 | "RUF100", # unused noqa (yesqa) 77 | "UP", # pyupgrade 78 | "W", # pycodestyle warnings 79 | "YTT", # flake8-2020 80 | ] 81 | lint.ignore = [ 82 | "E203", # Whitespace before ':' 83 | "E221", # Multiple spaces before operator 84 | "E226", # Missing whitespace around arithmetic operator 85 | "E241", # Multiple spaces after ',' 86 | "E741", # Ambiguous variable name 87 | "PIE790", # Unnecessary pass statement 88 | "UP038", # Makes code slower and more verbose 89 | ] 90 | lint.isort.known-first-party = [ "tablib" ] 91 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = .git .* 3 | addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html 4 | -------------------------------------------------------------------------------- /src/tablib/__init__.py: -------------------------------------------------------------------------------- 1 | """ Tablib. """ 2 | try: 3 | # Generated by setuptools-scm. 4 | from ._version import version as __version__ 5 | except ImportError: 6 | # Some broken installation. 7 | __version__ = None 8 | 9 | 10 | from .core import ( # noqa: F401 11 | Databook, 12 | Dataset, 13 | InvalidDatasetType, 14 | InvalidDimensions, 15 | UnsupportedFormat, 16 | detect_format, 17 | import_book, 18 | import_set, 19 | ) 20 | -------------------------------------------------------------------------------- /src/tablib/_vendor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/src/tablib/_vendor/__init__.py -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/src/tablib/_vendor/dbfpy/__init__.py -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/dbf.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | from . import header, record 4 | from .utils import INVALID_VALUE 5 | 6 | __version__ = "$Revision: 1.7 $"[11:-2] 7 | __date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] 8 | __author__ = "Jeff Kunce " 9 | __all__ = ["Dbf"] 10 | 11 | """DBF accessing helpers. 12 | 13 | FIXME: more documentation needed 14 | 15 | Examples: 16 | 17 | Create new table, setup structure, add records: 18 | 19 | dbf = Dbf(filename, new=True) 20 | dbf.addField( 21 | ("NAME", "C", 15), 22 | ("SURNAME", "C", 25), 23 | ("INITIALS", "C", 10), 24 | ("BIRTHDATE", "D"), 25 | ) 26 | for (n, s, i, b) in ( 27 | ("John", "Miller", "YC", (1980, 10, 11)), 28 | ("Andy", "Larkin", "", (1980, 4, 11)), 29 | ): 30 | rec = dbf.newRecord() 31 | rec["NAME"] = n 32 | rec["SURNAME"] = s 33 | rec["INITIALS"] = i 34 | rec["BIRTHDATE"] = b 35 | rec.store() 36 | dbf.close() 37 | 38 | Open existed dbf, read some data: 39 | 40 | dbf = Dbf(filename, True) 41 | for rec in dbf: 42 | for fldName in dbf.fieldNames: 43 | print('%s:\t %s (%s)' % (fldName, rec[fldName], 44 | type(rec[fldName]))) 45 | print() 46 | dbf.close() 47 | 48 | """ 49 | """History (most recent first): 50 | 11-feb-2007 [als] export INVALID_VALUE; 51 | Dbf: added .ignoreErrors, .INVALID_VALUE 52 | 04-jul-2006 [als] added export declaration 53 | 20-dec-2005 [yc] removed fromStream and newDbf methods: 54 | use argument of __init__ call must be used instead; 55 | added class fields pointing to the header and 56 | record classes. 57 | 17-dec-2005 [yc] split to several modules; reimplemented 58 | 13-dec-2005 [yc] adapted to the changes of the `strutil` module. 59 | 13-sep-2002 [als] support FoxPro Timestamp datatype 60 | 15-nov-1999 [jjk] documentation updates, add demo 61 | 24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks 62 | 08-jun-1998 [jjk] fix problems, add more features 63 | 20-feb-1998 [jjk] fix problems, add more features 64 | 19-feb-1998 [jjk] add create/write capabilities 65 | 18-feb-1998 [jjk] from dbfload.py 66 | """ 67 | 68 | 69 | class Dbf: 70 | """DBF accessor. 71 | 72 | FIXME: 73 | docs and examples needed (dont' forget to tell 74 | about problems adding new fields on the fly) 75 | 76 | Implementation notes: 77 | ``_new`` field is used to indicate whether this is 78 | a new data table. `addField` could be used only for 79 | the new tables! If at least one record was appended 80 | to the table it's structure couldn't be changed. 81 | 82 | """ 83 | 84 | __slots__ = ("name", "header", "stream", 85 | "_changed", "_new", "_ignore_errors") 86 | 87 | HeaderClass = header.DbfHeader 88 | RecordClass = record.DbfRecord 89 | INVALID_VALUE = INVALID_VALUE 90 | 91 | # initialization and creation helpers 92 | 93 | def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): 94 | """Initialize instance. 95 | 96 | Arguments: 97 | f: 98 | Filename or file-like object. 99 | new: 100 | True if new data table must be created. Assume 101 | data table exists if this argument is False. 102 | readOnly: 103 | if ``f`` argument is a string file will 104 | be opend in read-only mode; in other cases 105 | this argument is ignored. This argument is ignored 106 | even if ``new`` argument is True. 107 | headerObj: 108 | `header.DbfHeader` instance or None. If this argument 109 | is None, new empty header will be used with the 110 | all fields set by default. 111 | ignoreErrors: 112 | if set, failing field value conversion will return 113 | ``INVALID_VALUE`` instead of raising conversion error. 114 | 115 | """ 116 | if isinstance(f, str): 117 | # a filename 118 | self.name = f 119 | if new: 120 | # new table (table file must be 121 | # created or opened and truncated) 122 | self.stream = open(f, "w+b") 123 | else: 124 | # table file must exist 125 | self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) 126 | else: 127 | # a stream 128 | self.name = getattr(f, "name", "") 129 | self.stream = f 130 | if new: 131 | # if this is a new table, header will be empty 132 | self.header = self.HeaderClass() 133 | else: 134 | # or instantiated using stream 135 | self.header = self.HeaderClass.fromStream(self.stream) 136 | self.ignoreErrors = ignoreErrors 137 | self._new = bool(new) 138 | self._changed = False 139 | 140 | # properties 141 | 142 | closed = property(lambda self: self.stream.closed) 143 | recordCount = property(lambda self: self.header.recordCount) 144 | fieldNames = property( 145 | lambda self: [_fld.name for _fld in self.header.fields]) 146 | fieldDefs = property(lambda self: self.header.fields) 147 | changed = property(lambda self: self._changed or self.header.changed) 148 | 149 | def ignoreErrors(self, value): 150 | """Update `ignoreErrors` flag on the header object and self""" 151 | self.header.ignoreErrors = self._ignore_errors = bool(value) 152 | 153 | ignoreErrors = property( 154 | lambda self: self._ignore_errors, 155 | ignoreErrors, 156 | doc="""Error processing mode for DBF field value conversion 157 | 158 | if set, failing field value conversion will return 159 | ``INVALID_VALUE`` instead of raising conversion error. 160 | 161 | """) 162 | 163 | # protected methods 164 | 165 | def _fixIndex(self, index): 166 | """Return fixed index. 167 | 168 | This method fails if index isn't a numeric object 169 | (long or int). Or index isn't in a valid range 170 | (less or equal to the number of records in the db). 171 | 172 | If ``index`` is a negative number, it will be 173 | treated as a negative indexes for list objects. 174 | 175 | Return: 176 | Return value is numeric object maning valid index. 177 | 178 | """ 179 | if not isinstance(index, int): 180 | raise TypeError("Index must be a numeric object") 181 | if index < 0: 182 | # index from the right side 183 | # fix it to the left-side index 184 | index += len(self) + 1 185 | if index >= len(self): 186 | raise IndexError("Record index out of range") 187 | return index 188 | 189 | # interface methods 190 | 191 | def close(self): 192 | self.flush() 193 | self.stream.close() 194 | 195 | def flush(self): 196 | """Flush data to the associated stream.""" 197 | if self.changed: 198 | self.header.setCurrentDate() 199 | self.header.write(self.stream) 200 | self.stream.flush() 201 | self._changed = False 202 | 203 | def indexOfFieldName(self, name): 204 | """Index of field named ``name``.""" 205 | # FIXME: move this to header class 206 | names = [f.name for f in self.header.fields] 207 | return names.index(name.upper()) 208 | 209 | def newRecord(self): 210 | """Return new record, which belong to this table.""" 211 | return self.RecordClass(self) 212 | 213 | def append(self, record): 214 | """Append ``record`` to the database.""" 215 | record.index = self.header.recordCount 216 | record._write() 217 | self.header.recordCount += 1 218 | self._changed = True 219 | self._new = False 220 | 221 | def addField(self, *defs): 222 | """Add field definitions. 223 | 224 | For more information see `header.DbfHeader.addField`. 225 | 226 | """ 227 | if self._new: 228 | self.header.addField(*defs) 229 | else: 230 | raise TypeError("At least one record was added, " 231 | "structure can't be changed") 232 | 233 | # 'magic' methods (representation and sequence interface) 234 | 235 | def __repr__(self): 236 | return f"Dbf stream '{self.stream}'\n" + repr(self.header) 237 | 238 | def __len__(self): 239 | """Return number of records.""" 240 | return self.recordCount 241 | 242 | def __getitem__(self, index): 243 | """Return `DbfRecord` instance.""" 244 | return self.RecordClass.fromStream(self, self._fixIndex(index)) 245 | 246 | def __setitem__(self, index, record): 247 | """Write `DbfRecord` instance to the stream.""" 248 | record.index = self._fixIndex(index) 249 | record._write() 250 | self._changed = True 251 | self._new = False 252 | 253 | # def __del__(self): 254 | # """Flush stream upon deletion of the object.""" 255 | # self.flush() 256 | 257 | 258 | def demo_read(filename): 259 | _dbf = Dbf(filename, True) 260 | for _rec in _dbf: 261 | print() 262 | print(repr(_rec)) 263 | _dbf.close() 264 | 265 | 266 | def demo_create(filename): 267 | _dbf = Dbf(filename, new=True) 268 | _dbf.addField( 269 | ("NAME", "C", 15), 270 | ("SURNAME", "C", 25), 271 | ("INITIALS", "C", 10), 272 | ("BIRTHDATE", "D"), 273 | ) 274 | for (_n, _s, _i, _b) in ( 275 | ("John", "Miller", "YC", (1981, 1, 2)), 276 | ("Andy", "Larkin", "AL", (1982, 3, 4)), 277 | ("Bill", "Clinth", "", (1983, 5, 6)), 278 | ("Bobb", "McNail", "", (1984, 7, 8)), 279 | ): 280 | _rec = _dbf.newRecord() 281 | _rec["NAME"] = _n 282 | _rec["SURNAME"] = _s 283 | _rec["INITIALS"] = _i 284 | _rec["BIRTHDATE"] = _b 285 | _rec.store() 286 | print(repr(_dbf)) 287 | _dbf.close() 288 | 289 | 290 | if __name__ == '__main__': 291 | import sys 292 | 293 | _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" 294 | demo_create(_name) 295 | demo_read(_name) 296 | -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/dbfnew.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | __version__ = "$Revision: 1.4 $"[11:-2] 4 | __date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] 5 | 6 | __all__ = ["dbf_new"] 7 | 8 | from .dbf import Dbf 9 | from .fields import ( 10 | DbfCharacterFieldDef, 11 | DbfDateFieldDef, 12 | DbfDateTimeFieldDef, 13 | DbfLogicalFieldDef, 14 | DbfNumericFieldDef, 15 | ) 16 | from .header import DbfHeader 17 | from .record import DbfRecord 18 | 19 | """.DBF creation helpers. 20 | 21 | Note: this is a legacy interface. New code should use Dbf class 22 | for table creation (see examples in dbf.py) 23 | 24 | TODO: 25 | - handle Memo fields. 26 | - check length of the fields according to the 27 | `https://www.clicketyclick.dk/databases/xbase/format/data_types.html` 28 | 29 | """ 30 | """History (most recent first) 31 | 04-jul-2006 [als] added export declaration; 32 | updated for dbfpy 2.0 33 | 15-dec-2005 [yc] define dbf_new.__slots__ 34 | 14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; 35 | dbf_new now is a new class (inherited from object) 36 | ??-jun-2000 [--] added by Hans Fiby 37 | """ 38 | 39 | 40 | class _FieldDefinition: 41 | """Field definition. 42 | 43 | This is a simple structure, which contains ``name``, ``type``, 44 | ``len``, ``dec`` and ``cls`` fields. 45 | 46 | Objects also implement get/setitem magic functions, so fields 47 | could be accessed via sequence interface, where 'name' has 48 | index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and 49 | 'cls' could be located at index 4. 50 | 51 | """ 52 | 53 | __slots__ = "name", "type", "len", "dec", "cls" 54 | 55 | # WARNING: be attentive - dictionaries are mutable! 56 | FLD_TYPES = { 57 | # type: (cls, len) 58 | "C": (DbfCharacterFieldDef, None), 59 | "N": (DbfNumericFieldDef, None), 60 | "L": (DbfLogicalFieldDef, 1), 61 | # FIXME: support memos 62 | # "M": (DbfMemoFieldDef), 63 | "D": (DbfDateFieldDef, 8), 64 | # FIXME: I'm not sure length should be 14 characters! 65 | # but temporary I use it, cuz date is 8 characters 66 | # and time 6 (hhmmss) 67 | "T": (DbfDateTimeFieldDef, 14), 68 | } 69 | 70 | def __init__(self, name, type, len=None, dec=0): 71 | _cls, _len = self.FLD_TYPES[type] 72 | if _len is None: 73 | if len is None: 74 | raise ValueError("Field length must be defined") 75 | _len = len 76 | self.name = name 77 | self.type = type 78 | self.len = _len 79 | self.dec = dec 80 | self.cls = _cls 81 | 82 | def getDbfField(self): 83 | """Return `DbfFieldDef` instance from the current definition.""" 84 | return self.cls(self.name, self.len, self.dec) 85 | 86 | def appendToHeader(self, dbfh): 87 | """Create a `DbfFieldDef` instance and append it to the dbf header. 88 | 89 | Arguments: 90 | dbfh: `DbfHeader` instance. 91 | 92 | """ 93 | _dbff = self.getDbfField() 94 | dbfh.addField(_dbff) 95 | 96 | 97 | class dbf_new: 98 | """New .DBF creation helper. 99 | 100 | Example Usage: 101 | 102 | dbfn = dbf_new() 103 | dbfn.add_field("name",'C',80) 104 | dbfn.add_field("price",'N',10,2) 105 | dbfn.add_field("date",'D',8) 106 | dbfn.write("tst.dbf") 107 | 108 | Note: 109 | This module cannot handle Memo-fields, 110 | they are special. 111 | 112 | """ 113 | 114 | __slots__ = ("fields",) 115 | 116 | FieldDefinitionClass = _FieldDefinition 117 | 118 | def __init__(self): 119 | self.fields = [] 120 | 121 | def add_field(self, name, typ, len, dec=0): 122 | """Add field definition. 123 | 124 | Arguments: 125 | name: 126 | field name (str object). field name must not 127 | contain ASCII NULs and it's length shouldn't 128 | exceed 10 characters. 129 | typ: 130 | type of the field. this must be a single character 131 | from the "CNLMDT" set meaning character, numeric, 132 | logical, memo, date and date/time respectively. 133 | len: 134 | length of the field. this argument is used only for 135 | the character and numeric fields. all other fields 136 | have fixed length. 137 | FIXME: use None as a default for this argument? 138 | dec: 139 | decimal precision. used only for the numric fields. 140 | 141 | """ 142 | self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) 143 | 144 | def write(self, filename): 145 | """Create empty .DBF file using current structure.""" 146 | _dbfh = DbfHeader() 147 | _dbfh.setCurrentDate() 148 | for _fldDef in self.fields: 149 | _fldDef.appendToHeader(_dbfh) 150 | 151 | _dbfStream = open(filename, "wb") 152 | _dbfh.write(_dbfStream) 153 | _dbfStream.close() 154 | 155 | 156 | if __name__ == '__main__': 157 | # create a new DBF-File 158 | dbfn = dbf_new() 159 | dbfn.add_field("name", 'C', 80) 160 | dbfn.add_field("price", 'N', 10, 2) 161 | dbfn.add_field("date", 'D', 8) 162 | dbfn.write("tst.dbf") 163 | # test new dbf 164 | print("*** created tst.dbf: ***") 165 | dbft = Dbf('tst.dbf', readOnly=0) 166 | print(repr(dbft)) 167 | # add a record 168 | rec = DbfRecord(dbft) 169 | rec['name'] = 'something' 170 | rec['price'] = 10.5 171 | rec['date'] = (2000, 1, 12) 172 | rec.store() 173 | # add another record 174 | rec = DbfRecord(dbft) 175 | rec['name'] = 'foo and bar' 176 | rec['price'] = 12234 177 | rec['date'] = (1992, 7, 15) 178 | rec.store() 179 | 180 | # show the records 181 | print("*** inserted 2 records into tst.dbf: ***") 182 | print(repr(dbft)) 183 | for i1 in range(len(dbft)): 184 | rec = dbft[i1] 185 | for fldName in dbft.fieldNames: 186 | print(f'{fldName}:\t {rec[fldName]}') 187 | print() 188 | dbft.close() 189 | -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/fields.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import struct 3 | from functools import total_ordering 4 | 5 | from . import utils 6 | 7 | __version__ = "$Revision: 1.14 $"[11:-2] 8 | __date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] 9 | 10 | __all__ = ["lookupFor"] # field classes added at the end of the module 11 | 12 | 13 | """DBF fields definitions. 14 | 15 | TODO: 16 | - make memos work 17 | """ 18 | """History (most recent first): 19 | 26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes 20 | 05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date 21 | 16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point 22 | in the value to select float or integer return type 23 | 13-mar-2008 [als] check field name length in constructor 24 | 11-feb-2007 [als] handle value conversion errors 25 | 10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() 26 | 01-dec-2006 [als] Timestamp columns use None for empty values 27 | 31-oct-2006 [als] support field types 'F' (float), 'I' (integer) 28 | and 'Y' (currency); 29 | automate export and registration of field classes 30 | 04-jul-2006 [als] added export declaration 31 | 10-mar-2006 [als] decode empty values for Date and Logical fields; 32 | show field name in errors 33 | 10-mar-2006 [als] fix Numeric value decoding: according to spec, 34 | value always is string representation of the number; 35 | ensure that encoded Numeric value fits into the field 36 | 20-dec-2005 [yc] use field names in upper case 37 | 15-dec-2005 [yc] field definitions moved from `dbf`. 38 | """ 39 | 40 | # abstract definitions 41 | 42 | 43 | @total_ordering 44 | class DbfFieldDef: 45 | """Abstract field definition. 46 | 47 | Child classes must override ``type`` class attribute to provide datatype 48 | information of the field definition. For more info about types visit 49 | `https://www.clicketyclick.dk/databases/xbase/format/data_types.html` 50 | 51 | Also child classes must override ``defaultValue`` field to provide 52 | default value for the field value. 53 | 54 | If child class has fixed length ``length`` class attribute must be 55 | overridden and set to the valid value. None value means, that field 56 | isn't of fixed length. 57 | 58 | Note: ``name`` field must not be changed after instantiation. 59 | 60 | """ 61 | 62 | __slots__ = ("name", "decimalCount", "start", "end", "ignoreErrors") 63 | 64 | # length of the field, None in case of variable-length field, 65 | # or a number if this field is a fixed-length field 66 | length = None 67 | 68 | # field type. for more information about fields types visit 69 | # `https://www.clicketyclick.dk/databases/xbase/format/data_types.html` 70 | # must be overridden in child classes 71 | typeCode = None 72 | 73 | # default value for the field. this field must be 74 | # overridden in child classes 75 | defaultValue = None 76 | 77 | def __init__(self, name, length=None, decimalCount=None, 78 | start=None, stop=None, ignoreErrors=False): 79 | """Initialize instance.""" 80 | assert self.typeCode is not None, "Type code must be overridden" 81 | assert self.defaultValue is not None, "Default value must be overridden" 82 | # fix arguments 83 | if len(name) > 10: 84 | raise ValueError(f"Field name \"{name}\" is too long") 85 | name = str(name).upper() 86 | if self.__class__.length is None: 87 | if length is None: 88 | raise ValueError(f"[{name}] Length isn't specified") 89 | length = int(length) 90 | if length <= 0: 91 | raise ValueError(f"[{name}] Length must be a positive integer") 92 | else: 93 | length = self.length 94 | if decimalCount is None: 95 | decimalCount = 0 96 | # set fields 97 | self.name = name 98 | # FIXME: validate length according to the specification at 99 | # https://www.clicketyclick.dk/databases/xbase/format/data_types.html 100 | self.length = length 101 | self.decimalCount = decimalCount 102 | self.ignoreErrors = ignoreErrors 103 | self.start = start 104 | self.end = stop 105 | 106 | def __eq__(self, other): 107 | return repr(self) == repr(other) 108 | 109 | def __ne__(self, other): 110 | return repr(self) != repr(other) 111 | 112 | def __lt__(self, other): 113 | return repr(self) < repr(other) 114 | 115 | def __hash__(self): 116 | return hash(self.name) 117 | 118 | def fromString(cls, string, start, ignoreErrors=False): 119 | """Decode dbf field definition from the string data. 120 | 121 | Arguments: 122 | string: 123 | a string, dbf definition is decoded from. length of 124 | the string must be 32 bytes. 125 | start: 126 | position in the database file. 127 | ignoreErrors: 128 | initial error processing mode for the new field (boolean) 129 | 130 | """ 131 | assert len(string) == 32 132 | _length = string[16] 133 | return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, 134 | string[17], start, start + _length, ignoreErrors=ignoreErrors) 135 | fromString = classmethod(fromString) 136 | 137 | def toString(self): 138 | """Return encoded field definition. 139 | 140 | Return: 141 | Return value is a string object containing encoded 142 | definition of this field. 143 | 144 | """ 145 | _name = self.name.ljust(11, '\0') 146 | return ( 147 | _name + 148 | self.typeCode + 149 | # data address 150 | chr(0) * 4 + 151 | chr(self.length) + 152 | chr(self.decimalCount) + 153 | chr(0) * 14 154 | ) 155 | 156 | def __repr__(self): 157 | return "%-10s %1s %3d %3d" % self.fieldInfo() # noqa: UP031 158 | 159 | def fieldInfo(self): 160 | """Return field information. 161 | 162 | Return: 163 | Return value is a (name, type, length, decimals) tuple. 164 | 165 | """ 166 | return self.name, self.typeCode, self.length, self.decimalCount 167 | 168 | def rawFromRecord(self, record): 169 | """Return a "raw" field value from the record string.""" 170 | return record[self.start:self.end] 171 | 172 | def decodeFromRecord(self, record): 173 | """Return decoded field value from the record string.""" 174 | try: 175 | return self.decodeValue(self.rawFromRecord(record)) 176 | except Exception: 177 | if self.ignoreErrors: 178 | return utils.INVALID_VALUE 179 | else: 180 | raise 181 | 182 | def decodeValue(self, value): 183 | """Return decoded value from string value. 184 | 185 | This method shouldn't be used publicly. It's called from the 186 | `decodeFromRecord` method. 187 | 188 | This is an abstract method and it must be overridden in child classes. 189 | """ 190 | raise NotImplementedError 191 | 192 | def encodeValue(self, value): 193 | """Return str object containing encoded field value. 194 | 195 | This is an abstract method and it must be overridden in child classes. 196 | """ 197 | raise NotImplementedError 198 | 199 | # real classes 200 | 201 | 202 | class DbfCharacterFieldDef(DbfFieldDef): 203 | """Definition of the character field.""" 204 | 205 | typeCode = "C" 206 | defaultValue = b'' 207 | 208 | def decodeValue(self, value): 209 | """Return string object. 210 | 211 | Return value is a ``value`` argument with stripped right spaces. 212 | 213 | """ 214 | return value.rstrip(b' ').decode('utf-8') 215 | 216 | def encodeValue(self, value): 217 | """Return raw data string encoded from a ``value``.""" 218 | return str(value)[:self.length].ljust(self.length) 219 | 220 | 221 | class DbfNumericFieldDef(DbfFieldDef): 222 | """Definition of the numeric field.""" 223 | 224 | typeCode = "N" 225 | # XXX: now I'm not sure it was a good idea to make a class field 226 | # `defaultValue` instead of a generic method as it was implemented 227 | # previously -- it's ok with all types except number, cuz 228 | # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. 229 | defaultValue = 0 230 | 231 | def decodeValue(self, value): 232 | """Return a number decoded from ``value``. 233 | 234 | If decimals is zero, value will be decoded as an integer; 235 | or as a float otherwise. 236 | 237 | Return: 238 | Return value is a int (long) or float instance. 239 | 240 | """ 241 | value = value.strip(b' \0') 242 | if b'.' in value: 243 | # a float (has decimal separator) 244 | return float(value) 245 | elif value: 246 | # must be an integer 247 | return int(value) 248 | else: 249 | return 0 250 | 251 | def encodeValue(self, value): 252 | """Return string containing encoded ``value``.""" 253 | _rv = ("%*.*f" % (self.length, self.decimalCount, value)) # noqa: UP031 254 | if len(_rv) > self.length: 255 | _ppos = _rv.find(".") 256 | if 0 <= _ppos <= self.length: 257 | _rv = _rv[:self.length] 258 | else: 259 | raise ValueError( 260 | f"[{self.name}] Numeric overflow: {_rv} (field width: {self.length})" 261 | ) 262 | return _rv 263 | 264 | 265 | class DbfFloatFieldDef(DbfNumericFieldDef): 266 | """Definition of the float field - same as numeric.""" 267 | 268 | typeCode = "F" 269 | 270 | 271 | class DbfIntegerFieldDef(DbfFieldDef): 272 | """Definition of the integer field.""" 273 | 274 | typeCode = "I" 275 | length = 4 276 | defaultValue = 0 277 | 278 | def decodeValue(self, value): 279 | """Return an integer number decoded from ``value``.""" 280 | return struct.unpack("= 1: 412 | _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) 413 | _rv += datetime.timedelta(0, _msecs / 1000.0) 414 | else: 415 | # empty date 416 | _rv = None 417 | return _rv 418 | 419 | def encodeValue(self, value): 420 | """Return a string-encoded ``value``.""" 421 | if value: 422 | value = utils.getDateTime(value) 423 | # LE byteorder 424 | _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, 425 | (value.hour * 3600 + value.minute * 60 + value.second) * 1000) 426 | else: 427 | _rv = "\0" * self.length 428 | assert len(_rv) == self.length 429 | return _rv 430 | 431 | 432 | _fieldsRegistry = {} 433 | 434 | 435 | def registerField(fieldCls): 436 | """Register field definition class. 437 | 438 | ``fieldCls`` should be subclass of the `DbfFieldDef`. 439 | 440 | Use `lookupFor` to retrieve field definition class 441 | by the type code. 442 | 443 | """ 444 | assert fieldCls.typeCode is not None, "Type code isn't defined" 445 | # XXX: use fieldCls.typeCode.upper()? in case of any decign 446 | # don't forget to look to the same comment in ``lookupFor`` method 447 | _fieldsRegistry[fieldCls.typeCode] = fieldCls 448 | 449 | 450 | def lookupFor(typeCode): 451 | """Return field definition class for the given type code. 452 | 453 | ``typeCode`` must be a single character. That type should be 454 | previously registered. 455 | 456 | Use `registerField` to register new field class. 457 | 458 | Return: 459 | Return value is a subclass of the `DbfFieldDef`. 460 | 461 | """ 462 | # XXX: use typeCode.upper()? in case of any decign don't 463 | # forget to look to the same comment in ``registerField`` 464 | return _fieldsRegistry[chr(typeCode)] 465 | 466 | # register generic types 467 | 468 | 469 | for (_name, _val) in list(globals().items()): 470 | if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ 471 | and (_name != "DbfFieldDef"): 472 | __all__.append(_name) 473 | registerField(_val) 474 | del _name, _val 475 | -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/header.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import io 3 | import struct 4 | import sys 5 | 6 | from . import fields 7 | from .utils import getDate 8 | 9 | __version__ = "$Revision: 1.6 $"[11:-2] 10 | __date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] 11 | 12 | __all__ = ["DbfHeader"] 13 | 14 | 15 | """DBF header definition. 16 | 17 | TODO: 18 | - handle encoding of the character fields 19 | (encoding information stored in the DBF header) 20 | 21 | """ 22 | """History (most recent first): 23 | 16-sep-2010 [als] fromStream: fix century of the last update field 24 | 11-feb-2007 [als] added .ignoreErrors 25 | 10-feb-2007 [als] added __getitem__: return field definitions 26 | by field name or field number (zero-based) 27 | 04-jul-2006 [als] added export declaration 28 | 15-dec-2005 [yc] created 29 | """ 30 | 31 | 32 | class DbfHeader: 33 | """Dbf header definition. 34 | 35 | For more information about dbf header format visit 36 | `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` 37 | 38 | Examples: 39 | Create an empty dbf header and add some field definitions: 40 | dbfh = DbfHeader() 41 | dbfh.addField(("name", "C", 10)) 42 | dbfh.addField(("date", "D")) 43 | dbfh.addField(DbfNumericFieldDef("price", 5, 2)) 44 | Create a dbf header with field definitions: 45 | dbfh = DbfHeader([ 46 | ("name", "C", 10), 47 | ("date", "D"), 48 | DbfNumericFieldDef("price", 5, 2), 49 | ]) 50 | 51 | """ 52 | 53 | __slots__ = ("signature", "fields", "lastUpdate", "recordLength", 54 | "recordCount", "headerLength", "changed", "_ignore_errors") 55 | 56 | # instance construction and initialization methods 57 | 58 | def __init__(self, fields=None, headerLength=0, recordLength=0, 59 | recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False): 60 | """Initialize instance. 61 | 62 | Arguments: 63 | fields: 64 | a list of field definitions; 65 | recordLength: 66 | size of the records; 67 | headerLength: 68 | size of the header; 69 | recordCount: 70 | number of records stored in DBF; 71 | signature: 72 | version number (aka signature). using 0x03 as a default meaning 73 | "File without DBT". for more information about this field visit 74 | ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` 75 | lastUpdate: 76 | date of the DBF's update. this could be a string ('yymmdd' or 77 | 'yyyymmdd'), timestamp (int or float), datetime/date value, 78 | a sequence (assuming (yyyy, mm, dd, ...)) or an object having 79 | callable ``ticks`` field. 80 | ignoreErrors: 81 | error processing mode for DBF fields (boolean) 82 | 83 | """ 84 | self.signature = signature 85 | if fields is None: 86 | self.fields = [] 87 | else: 88 | self.fields = list(fields) 89 | self.lastUpdate = getDate(lastUpdate) 90 | self.recordLength = recordLength 91 | self.headerLength = headerLength 92 | self.recordCount = recordCount 93 | self.ignoreErrors = ignoreErrors 94 | # XXX: I'm not sure this is safe to 95 | # initialize `self.changed` in this way 96 | self.changed = bool(self.fields) 97 | 98 | # @classmethod 99 | def fromString(cls, string): 100 | """Return header instance from the string object.""" 101 | return cls.fromStream(io.StringIO(str(string))) 102 | fromString = classmethod(fromString) 103 | 104 | # @classmethod 105 | def fromStream(cls, stream): 106 | """Return header object from the stream.""" 107 | stream.seek(0) 108 | first_32 = stream.read(32) 109 | if not isinstance(first_32, bytes): 110 | _data = bytes(first_32, sys.getfilesystemencoding()) 111 | _data = first_32 112 | (_cnt, _hdrLen, _recLen) = struct.unpack(" DbfRecord._write(); 20 | added delete() method. 21 | 16-dec-2005 [yc] record definition moved from `dbf`. 22 | """ 23 | 24 | 25 | class DbfRecord: 26 | """DBF record. 27 | 28 | Instances of this class shouldn't be created manually, 29 | use `dbf.Dbf.newRecord` instead. 30 | 31 | Class implements mapping/sequence interface, so 32 | fields could be accessed via their names or indexes 33 | (names is a preferred way to access fields). 34 | 35 | Hint: 36 | Use `store` method to save modified record. 37 | 38 | Examples: 39 | Add new record to the database: 40 | db = Dbf(filename) 41 | rec = db.newRecord() 42 | rec["FIELD1"] = value1 43 | rec["FIELD2"] = value2 44 | rec.store() 45 | Or the same, but modify existed 46 | (second in this case) record: 47 | db = Dbf(filename) 48 | rec = db[2] 49 | rec["FIELD1"] = value1 50 | rec["FIELD2"] = value2 51 | rec.store() 52 | 53 | """ 54 | 55 | __slots__ = "dbf", "index", "deleted", "fieldData" 56 | 57 | # creation and initialization 58 | 59 | def __init__(self, dbf, index=None, deleted=False, data=None): 60 | """Instance initialization. 61 | 62 | Arguments: 63 | dbf: 64 | A `Dbf.Dbf` instance this record belongs to. 65 | index: 66 | An integer record index or None. If this value is 67 | None, record will be appended to the DBF. 68 | deleted: 69 | Boolean flag indicating whether this record 70 | is a deleted record. 71 | data: 72 | A sequence or None. This is a data of the fields. 73 | If this argument is None, default values will be used. 74 | 75 | """ 76 | self.dbf = dbf 77 | # XXX: I'm not sure ``index`` is necessary 78 | self.index = index 79 | self.deleted = deleted 80 | if data is None: 81 | self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] 82 | else: 83 | self.fieldData = list(data) 84 | 85 | # XXX: validate self.index before calculating position? 86 | position = property(lambda self: self.dbf.header.headerLength + 87 | self.index * self.dbf.header.recordLength) 88 | 89 | def rawFromStream(cls, dbf, index): 90 | """Return raw record contents read from the stream. 91 | 92 | Arguments: 93 | dbf: 94 | A `Dbf.Dbf` instance containing the record. 95 | index: 96 | Index of the record in the records' container. 97 | This argument can't be None in this call. 98 | 99 | Return value is a string containing record data in DBF format. 100 | 101 | """ 102 | # XXX: may be write smth assuming, that current stream 103 | # position is the required one? it could save some 104 | # time required to calculate where to seek in the file 105 | dbf.stream.seek(dbf.header.headerLength + index * dbf.header.recordLength) 106 | return dbf.stream.read(dbf.header.recordLength) 107 | 108 | rawFromStream = classmethod(rawFromStream) 109 | 110 | def fromStream(cls, dbf, index): 111 | """Return a record read from the stream. 112 | 113 | Arguments: 114 | dbf: 115 | A `Dbf.Dbf` instance new record should belong to. 116 | index: 117 | Index of the record in the records' container. 118 | This argument can't be None in this call. 119 | 120 | Return value is an instance of the current class. 121 | 122 | """ 123 | return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) 124 | 125 | fromStream = classmethod(fromStream) 126 | 127 | def fromString(cls, dbf, string, index=None): 128 | """Return record read from the string object. 129 | 130 | Arguments: 131 | dbf: 132 | A `Dbf.Dbf` instance new record should belong to. 133 | string: 134 | A string new record should be created from. 135 | index: 136 | Index of the record in the container. If this 137 | argument is None, record will be appended. 138 | 139 | Return value is an instance of the current class. 140 | 141 | """ 142 | return cls(dbf, index, string[0] == "*", 143 | [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) 144 | 145 | fromString = classmethod(fromString) 146 | 147 | # object representation 148 | 149 | def __repr__(self): 150 | _template = "%%%ds: %%s (%%s)" % max(len(_fld) for _fld in self.dbf.fieldNames) # noqa: UP031 151 | _rv = [] 152 | for _fld in self.dbf.fieldNames: 153 | _val = self[_fld] 154 | if _val is utils.INVALID_VALUE: 155 | _rv.append(_template % (_fld, "None", "value cannot be decoded")) 156 | else: 157 | _rv.append(_template % (_fld, _val, type(_val))) 158 | return "\n".join(_rv) 159 | 160 | # protected methods 161 | 162 | def _write(self): 163 | """Write data to the dbf stream. 164 | 165 | Note: 166 | This isn't a public method, it's better to 167 | use 'store' instead publicly. 168 | Be design ``_write`` method should be called 169 | only from the `Dbf` instance. 170 | 171 | """ 172 | self._validateIndex(False) 173 | self.dbf.stream.seek(self.position) 174 | self.dbf.stream.write(bytes(self.toString(), 175 | sys.getfilesystemencoding())) 176 | # FIXME: may be move this write somewhere else? 177 | # why we should check this condition for each record? 178 | if self.index == len(self.dbf): 179 | # this is the last record, 180 | # we should write SUB (ASCII 26) 181 | self.dbf.stream.write(b"\x1A") 182 | 183 | # utility methods 184 | 185 | def _validateIndex(self, allowUndefined=True, checkRange=False): 186 | """Valid ``self.index`` value. 187 | 188 | If ``allowUndefined`` argument is True functions does nothing 189 | in case of ``self.index`` pointing to None object. 190 | 191 | """ 192 | if self.index is None: 193 | if not allowUndefined: 194 | raise ValueError("Index is undefined") 195 | elif self.index < 0: 196 | raise ValueError(f"Index can't be negative ({self.index})") 197 | elif checkRange and self.index <= self.dbf.header.recordCount: 198 | raise ValueError( 199 | f"There are only {self.dbf.header.recordCount} records in the DBF" 200 | ) 201 | 202 | # interface methods 203 | 204 | def store(self): 205 | """Store current record in the DBF. 206 | 207 | If ``self.index`` is None, this record will be appended to the 208 | records of the DBF this records belongs to; or replaced otherwise. 209 | 210 | """ 211 | self._validateIndex() 212 | if self.index is None: 213 | self.index = len(self.dbf) 214 | self.dbf.append(self) 215 | else: 216 | self.dbf[self.index] = self 217 | 218 | def delete(self): 219 | """Mark method as deleted.""" 220 | self.deleted = True 221 | 222 | def toString(self): 223 | """Return string packed record values.""" 224 | return "".join([" *"[self.deleted]] + [ 225 | _def.encodeValue(_dat) for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) 226 | ]) 227 | 228 | def asList(self): 229 | """Return a flat list of fields. 230 | 231 | Note: 232 | Change of the list's values won't change 233 | real values stored in this object. 234 | 235 | """ 236 | return self.fieldData[:] 237 | 238 | def asDict(self): 239 | """Return a dictionary of fields. 240 | 241 | Note: 242 | Change of the dicts's values won't change 243 | real values stored in this object. 244 | 245 | """ 246 | return dict(list(zip(self.dbf.fieldNames, self.fieldData))) 247 | 248 | def __getitem__(self, key): 249 | """Return value by field name or field index.""" 250 | if isinstance(key, int): 251 | # integer index of the field 252 | return self.fieldData[key] 253 | # assuming string field name 254 | return self.fieldData[self.dbf.indexOfFieldName(key)] 255 | 256 | def __setitem__(self, key, value): 257 | """Set field value by integer index of the field or string name.""" 258 | if isinstance(key, int): 259 | # integer index of the field 260 | return self.fieldData[key] 261 | # assuming string field name 262 | self.fieldData[self.dbf.indexOfFieldName(key)] = value 263 | -------------------------------------------------------------------------------- /src/tablib/_vendor/dbfpy/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | 4 | __version__ = "$Revision: 1.4 $"[11:-2] 5 | __date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] 6 | 7 | 8 | """String utilities. 9 | 10 | TODO: 11 | - allow strings in getDateTime routine; 12 | """ 13 | """History (most recent first): 14 | 11-feb-2007 [als] added INVALID_VALUE 15 | 10-feb-2007 [als] allow date strings padded with spaces instead of zeroes 16 | 20-dec-2005 [yc] handle long objects in getDate/getDateTime 17 | 16-dec-2005 [yc] created from ``strutil`` module. 18 | """ 19 | 20 | 21 | def unzfill(str): 22 | """Return a string without ASCII NULs. 23 | 24 | This function searchers for the first NUL (ASCII 0) occurrence 25 | and truncates string till that position. 26 | 27 | """ 28 | try: 29 | return str[:str.index(b'\0')] 30 | except ValueError: 31 | return str 32 | 33 | 34 | def getDate(date=None): 35 | """Return `datetime.date` instance. 36 | 37 | Type of the ``date`` argument could be one of the following: 38 | None: 39 | use current date value; 40 | datetime.date: 41 | this value will be returned; 42 | datetime.datetime: 43 | the result of the date.date() will be returned; 44 | string: 45 | assuming "%Y%m%d" or "%y%m%dd" format; 46 | number: 47 | assuming it's a timestamp (returned for example 48 | by the time.time() call; 49 | sequence: 50 | assuming (year, month, day, ...) sequence; 51 | 52 | Additionally, if ``date`` has callable ``ticks`` attribute, 53 | it will be used and result of the called would be treated 54 | as a timestamp value. 55 | 56 | """ 57 | if date is None: 58 | # use current value 59 | return datetime.date.today() 60 | if isinstance(date, datetime.date): 61 | return date 62 | if isinstance(date, datetime.datetime): 63 | return date.date() 64 | if isinstance(date, (int, float)): 65 | # date is a timestamp 66 | return datetime.date.fromtimestamp(date) 67 | if isinstance(date, str): 68 | date = date.replace(" ", "0") 69 | if len(date) == 6: 70 | # yymmdd 71 | return datetime.date(*time.strptime(date, "%y%m%d")[:3]) 72 | # yyyymmdd 73 | return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) 74 | if hasattr(date, "__getitem__"): 75 | # a sequence (assuming date/time tuple) 76 | return datetime.date(*date[:3]) 77 | return datetime.date.fromtimestamp(date.ticks()) 78 | 79 | 80 | def getDateTime(value=None): 81 | """Return `datetime.datetime` instance. 82 | 83 | Type of the ``value`` argument could be one of the following: 84 | None: 85 | use current date value; 86 | datetime.date: 87 | result will be converted to the `datetime.datetime` instance 88 | using midnight; 89 | datetime.datetime: 90 | ``value`` will be returned as is; 91 | string: 92 | *** CURRENTLY NOT SUPPORTED ***; 93 | number: 94 | assuming it's a timestamp (returned for example 95 | by the time.time() call; 96 | sequence: 97 | assuming (year, month, day, ...) sequence; 98 | 99 | Additionally, if ``value`` has callable ``ticks`` attribute, 100 | it will be used and result of the called would be treated 101 | as a timestamp value. 102 | 103 | """ 104 | if value is None: 105 | # use current value 106 | return datetime.datetime.today() 107 | if isinstance(value, datetime.datetime): 108 | return value 109 | if isinstance(value, datetime.date): 110 | return datetime.datetime.fromordinal(value.toordinal()) 111 | if isinstance(value, (int, float)): 112 | # value is a timestamp 113 | return datetime.datetime.fromtimestamp(value) 114 | if isinstance(value, str): 115 | raise NotImplementedError("Strings aren't currently implemented") 116 | if hasattr(value, "__getitem__"): 117 | # a sequence (assuming date/time tuple) 118 | return datetime.datetime(*tuple(value)[:6]) 119 | return datetime.datetime.fromtimestamp(value.ticks()) 120 | 121 | 122 | class classproperty(property): 123 | """Works in the same way as a ``property``, but for the classes.""" 124 | 125 | def __get__(self, obj, cls): 126 | return self.fget(cls) 127 | 128 | 129 | class _InvalidValue: 130 | 131 | """Value returned from DBF records when field validation fails 132 | 133 | The value is not equal to anything except for itself 134 | and equal to all empty values: None, 0, empty string etc. 135 | In other words, invalid value is equal to None and not equal 136 | to None at the same time. 137 | 138 | This value yields zero upon explicit conversion to a number type, 139 | empty string for string types, and False for boolean. 140 | 141 | """ 142 | 143 | def __eq__(self, other): 144 | return not other 145 | 146 | def __ne__(self, other): 147 | return other is not self 148 | 149 | def __bool__(self): 150 | return False 151 | 152 | def __int__(self): 153 | return 0 154 | __long__ = __int__ 155 | 156 | def __float__(self): 157 | return 0.0 158 | 159 | def __str__(self): 160 | return "" 161 | 162 | def __repr__(self): 163 | return "" 164 | 165 | 166 | # invalid value is a constant singleton 167 | INVALID_VALUE = _InvalidValue() 168 | 169 | # vim: set et sts=4 sw=4 : 170 | -------------------------------------------------------------------------------- /src/tablib/exceptions.py: -------------------------------------------------------------------------------- 1 | class TablibException(Exception): 2 | """Tablib common exception.""" 3 | 4 | 5 | class InvalidDatasetType(TablibException, TypeError): 6 | """Only Datasets can be added to a Databook.""" 7 | 8 | 9 | class InvalidDimensions(TablibException, ValueError): 10 | """The size of the column or row doesn't fit the table dimensions.""" 11 | 12 | 13 | class InvalidDatasetIndex(TablibException, IndexError): 14 | """Outside of Dataset size.""" 15 | 16 | 17 | class HeadersNeeded(TablibException, AttributeError): 18 | """Header parameter must be given when appending a column to this Dataset.""" 19 | 20 | 21 | class UnsupportedFormat(TablibException, NotImplementedError): 22 | """Format not supported.""" 23 | -------------------------------------------------------------------------------- /src/tablib/formats/__init__.py: -------------------------------------------------------------------------------- 1 | """ Tablib - formats 2 | """ 3 | from functools import partialmethod 4 | from importlib import import_module 5 | from importlib.util import find_spec 6 | 7 | from ..exceptions import UnsupportedFormat 8 | from ..utils import normalize_input 9 | from ._csv import CSVFormat 10 | from ._json import JSONFormat 11 | from ._tsv import TSVFormat 12 | 13 | uninstalled_format_messages = { 14 | "cli": {"package_name": "tabulate package", "extras_name": "cli"}, 15 | "df": {"package_name": "pandas package", "extras_name": "pandas"}, 16 | "ods": {"package_name": "odfpy package", "extras_name": "ods"}, 17 | "xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"}, 18 | "xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"}, 19 | "yaml": {"package_name": "pyyaml package", "extras_name": "yaml"}, 20 | } 21 | 22 | 23 | def load_format_class(dotted_path): 24 | try: 25 | module_path, class_name = dotted_path.rsplit('.', 1) 26 | return getattr(import_module(module_path), class_name) 27 | except (ValueError, AttributeError) as err: 28 | raise ImportError(f"Unable to load format class '{dotted_path}' ({err})") 29 | 30 | 31 | class FormatDescriptorBase: 32 | def __init__(self, key, format_or_path): 33 | self.key = key 34 | self._format_path = None 35 | if isinstance(format_or_path, str): 36 | self._format = None 37 | self._format_path = format_or_path 38 | else: 39 | self._format = format_or_path 40 | 41 | def ensure_format_loaded(self): 42 | if self._format is None: 43 | self._format = load_format_class(self._format_path) 44 | 45 | 46 | class ImportExportBookDescriptor(FormatDescriptorBase): 47 | def __get__(self, obj, cls, **kwargs): 48 | self.ensure_format_loaded() 49 | return self._format.export_book(obj, **kwargs) 50 | 51 | def __set__(self, obj, val): 52 | self.ensure_format_loaded() 53 | return self._format.import_book(obj, normalize_input(val)) 54 | 55 | 56 | class ImportExportSetDescriptor(FormatDescriptorBase): 57 | def __get__(self, obj, cls, **kwargs): 58 | self.ensure_format_loaded() 59 | return self._format.export_set(obj, **kwargs) 60 | 61 | def __set__(self, obj, val): 62 | self.ensure_format_loaded() 63 | return self._format.import_set(obj, normalize_input(val)) 64 | 65 | 66 | class Registry: 67 | _formats = {} 68 | 69 | def register(self, key, format_or_path): 70 | from ..core import Databook, Dataset 71 | 72 | # Create Databook. read or read/write properties 73 | setattr(Databook, key, ImportExportBookDescriptor(key, format_or_path)) 74 | 75 | # Create Dataset. read or read/write properties, 76 | # and Dataset.get_/set_ methods. 77 | setattr(Dataset, key, ImportExportSetDescriptor(key, format_or_path)) 78 | try: 79 | setattr(Dataset, f'get_{key}', partialmethod(Dataset._get_in_format, key)) 80 | setattr(Dataset, f'set_{key}', partialmethod(Dataset._set_in_format, key)) 81 | except AttributeError: 82 | setattr(Dataset, f'get_{key}', partialmethod(Dataset._get_in_format, key)) 83 | 84 | self._formats[key] = format_or_path 85 | 86 | def register_builtins(self): 87 | # Registration ordering matters for autodetection. 88 | self.register('json', JSONFormat()) 89 | # xlsx before as xls (xlrd) can also read xlsx 90 | if find_spec('openpyxl'): 91 | self.register('xlsx', 'tablib.formats._xlsx.XLSXFormat') 92 | if find_spec('xlrd') and find_spec('xlwt'): 93 | self.register('xls', 'tablib.formats._xls.XLSFormat') 94 | if find_spec('yaml'): 95 | self.register('yaml', 'tablib.formats._yaml.YAMLFormat') 96 | self.register('csv', CSVFormat()) 97 | self.register('tsv', TSVFormat()) 98 | if find_spec('odf'): 99 | self.register('ods', 'tablib.formats._ods.ODSFormat') 100 | self.register('dbf', 'tablib.formats._dbf.DBFFormat') 101 | self.register('html', 'tablib.formats._html.HTMLFormat') 102 | self.register('jira', 'tablib.formats._jira.JIRAFormat') 103 | self.register('latex', 'tablib.formats._latex.LATEXFormat') 104 | if find_spec('pandas'): 105 | self.register('df', 'tablib.formats._df.DataFrameFormat') 106 | self.register('rst', 'tablib.formats._rst.ReSTFormat') 107 | self.register('sql', 'tablib.formats._sql.SQLFormat') 108 | if find_spec('tabulate'): 109 | self.register('cli', 'tablib.formats._cli.CLIFormat') 110 | 111 | def formats(self): 112 | for key, frm in self._formats.items(): 113 | if isinstance(frm, str): 114 | self._formats[key] = load_format_class(frm) 115 | yield self._formats[key] 116 | 117 | def get_format(self, key): 118 | if key not in self._formats: 119 | if key in uninstalled_format_messages: 120 | raise UnsupportedFormat( 121 | "The '{key}' format is not available. You may want to install the " 122 | "{package_name} (or `pip install \"tablib[{extras_name}]\"`).".format( 123 | **uninstalled_format_messages[key], key=key 124 | ) 125 | ) 126 | raise UnsupportedFormat(f"Tablib has no format '{key}' or it is not registered.") 127 | if isinstance(self._formats[key], str): 128 | self._formats[key] = load_format_class(self._formats[key]) 129 | return self._formats[key] 130 | 131 | 132 | registry = Registry() 133 | -------------------------------------------------------------------------------- /src/tablib/formats/_cli.py: -------------------------------------------------------------------------------- 1 | """Tablib - Command-line Interface table export support. 2 | 3 | Generates a representation for CLI from the dataset. 4 | Wrapper for tabulate library. 5 | """ 6 | from tabulate import tabulate as Tabulate 7 | 8 | 9 | class CLIFormat: 10 | """ Class responsible to export to CLI Format """ 11 | title = 'cli' 12 | DEFAULT_FMT = 'plain' 13 | 14 | @classmethod 15 | def export_set(cls, dataset, **kwargs): 16 | """Returns CLI representation of a Dataset.""" 17 | if dataset.headers: 18 | kwargs.setdefault('headers', dataset.headers) 19 | kwargs.setdefault('tablefmt', cls.DEFAULT_FMT) 20 | return Tabulate(dataset, **kwargs) 21 | -------------------------------------------------------------------------------- /src/tablib/formats/_csv.py: -------------------------------------------------------------------------------- 1 | """ Tablib - *SV Support. 2 | """ 3 | 4 | import csv 5 | from io import StringIO 6 | 7 | 8 | class CSVFormat: 9 | title = 'csv' 10 | extensions = ('csv',) 11 | 12 | DEFAULT_DELIMITER = ',' 13 | 14 | @classmethod 15 | def export_stream_set(cls, dataset, **kwargs): 16 | """Returns CSV representation of Dataset as file-like.""" 17 | stream = StringIO() 18 | 19 | kwargs.setdefault('delimiter', cls.DEFAULT_DELIMITER) 20 | 21 | _csv = csv.writer(stream, **kwargs) 22 | 23 | for row in dataset._package(dicts=False): 24 | _csv.writerow(row) 25 | 26 | stream.seek(0) 27 | return stream 28 | 29 | @classmethod 30 | def export_set(cls, dataset, **kwargs): 31 | """Returns CSV representation of Dataset.""" 32 | stream = cls.export_stream_set(dataset, **kwargs) 33 | return stream.getvalue() 34 | 35 | @classmethod 36 | def import_set(cls, dset, in_stream, headers=True, skip_lines=0, **kwargs): 37 | """Returns dataset from CSV stream.""" 38 | 39 | dset.wipe() 40 | 41 | kwargs.setdefault('delimiter', cls.DEFAULT_DELIMITER) 42 | 43 | rows = csv.reader(in_stream, **kwargs) 44 | for i, row in enumerate(rows): 45 | if i < skip_lines: 46 | continue 47 | if i == skip_lines and headers: 48 | dset.headers = row 49 | elif row: 50 | if i > 0 and len(row) < dset.width: 51 | row += [''] * (dset.width - len(row)) 52 | dset.append(row) 53 | 54 | @classmethod 55 | def detect(cls, stream, delimiter=None): 56 | """Returns True if given stream is valid CSV.""" 57 | try: 58 | csv.Sniffer().sniff(stream.read(2048), delimiters=delimiter or cls.DEFAULT_DELIMITER) 59 | return True 60 | except Exception: 61 | return False 62 | -------------------------------------------------------------------------------- /src/tablib/formats/_dbf.py: -------------------------------------------------------------------------------- 1 | """ Tablib - DBF Support. 2 | """ 3 | import io 4 | import os 5 | import tempfile 6 | 7 | from .._vendor.dbfpy import dbf, dbfnew 8 | from .._vendor.dbfpy import record as dbfrecord 9 | 10 | 11 | class DBFFormat: 12 | title = 'dbf' 13 | extensions = ('csv',) 14 | 15 | DEFAULT_ENCODING = 'utf-8' 16 | 17 | @classmethod 18 | def export_set(cls, dataset): 19 | """Returns DBF representation of a Dataset""" 20 | new_dbf = dbfnew.dbf_new() 21 | temp_file, temp_uri = tempfile.mkstemp() 22 | 23 | # create the appropriate fields based on the contents of the first row 24 | first_row = dataset[0] 25 | for fieldname, field_value in zip(dataset.headers, first_row): 26 | if type(field_value) in [int, float]: 27 | new_dbf.add_field(fieldname, 'N', 10, 8) 28 | else: 29 | new_dbf.add_field(fieldname, 'C', 80) 30 | 31 | new_dbf.write(temp_uri) 32 | 33 | dbf_file = dbf.Dbf(temp_uri, readOnly=0) 34 | for row in dataset: 35 | record = dbfrecord.DbfRecord(dbf_file) 36 | for fieldname, field_value in zip(dataset.headers, row): 37 | record[fieldname] = field_value 38 | record.store() 39 | 40 | dbf_file.close() 41 | dbf_stream = open(temp_uri, 'rb') 42 | stream = io.BytesIO(dbf_stream.read()) 43 | dbf_stream.close() 44 | os.close(temp_file) 45 | os.remove(temp_uri) 46 | return stream.getvalue() 47 | 48 | @classmethod 49 | def import_set(cls, dset, in_stream): 50 | """Returns a dataset from a DBF stream.""" 51 | 52 | dset.wipe() 53 | _dbf = dbf.Dbf(in_stream) 54 | dset.headers = _dbf.fieldNames 55 | for record in range(_dbf.recordCount): 56 | row = [_dbf[record][f] for f in _dbf.fieldNames] 57 | dset.append(row) 58 | 59 | @classmethod 60 | def detect(cls, stream): 61 | """Returns True if the given stream is valid DBF""" 62 | try: 63 | dbf.Dbf(stream, readOnly=True) 64 | return True 65 | except Exception: 66 | return False 67 | -------------------------------------------------------------------------------- /src/tablib/formats/_df.py: -------------------------------------------------------------------------------- 1 | """ Tablib - DataFrame Support. 2 | """ 3 | 4 | try: 5 | from pandas import DataFrame 6 | except ImportError: 7 | DataFrame = None 8 | 9 | 10 | class DataFrameFormat: 11 | title = 'df' 12 | extensions = ('df',) 13 | 14 | @classmethod 15 | def detect(cls, stream): 16 | """Returns True if given stream is a DataFrame.""" 17 | if DataFrame is None: 18 | return False 19 | elif isinstance(stream, DataFrame): 20 | return True 21 | try: 22 | DataFrame(stream.read()) 23 | return True 24 | except ValueError: 25 | return False 26 | 27 | @classmethod 28 | def export_set(cls, dset): 29 | """Returns DataFrame representation of DataBook.""" 30 | if DataFrame is None: 31 | raise NotImplementedError( 32 | 'DataFrame Format requires `pandas` to be installed.' 33 | ' Try `pip install "tablib[pandas]"`.') 34 | dataframe = DataFrame(dset.dict, columns=dset.headers) 35 | return dataframe 36 | 37 | @classmethod 38 | def import_set(cls, dset, in_stream): 39 | """Returns dataset from DataFrame.""" 40 | dset.wipe() 41 | dset.dict = in_stream.to_dict(orient='records') 42 | -------------------------------------------------------------------------------- /src/tablib/formats/_html.py: -------------------------------------------------------------------------------- 1 | """ Tablib - HTML export support. 2 | """ 3 | from html.parser import HTMLParser 4 | from xml.etree import ElementTree as ET 5 | 6 | 7 | class HTMLFormat: 8 | BOOK_ENDINGS = 'h3' 9 | 10 | title = 'html' 11 | extensions = ('html', ) 12 | 13 | @classmethod 14 | def export_set(cls, dataset): 15 | """HTML representation of a Dataset.""" 16 | 17 | table = ET.Element('table') 18 | if dataset.headers is not None: 19 | head = ET.Element('thead') 20 | tr = ET.Element('tr') 21 | for header in dataset.headers: 22 | th = ET.Element('th') 23 | th.text = str(header) if header is not None else '' 24 | tr.append(th) 25 | head.append(tr) 26 | table.append(head) 27 | 28 | body = ET.Element('tbody') 29 | for row in dataset: 30 | tr = ET.Element('tr') 31 | for item in row: 32 | td = ET.Element('td') 33 | td.text = str(item) if item is not None else '' 34 | tr.append(td) 35 | body.append(tr) 36 | table.append(body) 37 | 38 | return ET.tostring(table, method='html', encoding='unicode') 39 | 40 | @classmethod 41 | def export_book(cls, databook): 42 | """HTML representation of a Databook.""" 43 | 44 | result = '' 45 | for i, dset in enumerate(databook._datasets): 46 | title = dset.title if dset.title else f'Set {i}' 47 | result += f'<{cls.BOOK_ENDINGS}>{title}\n' 48 | result += dset.html 49 | result += '\n' 50 | 51 | return result 52 | 53 | @classmethod 54 | def import_set(cls, dset, in_stream, table_id=None): 55 | """Returns dataset from HTML content.""" 56 | 57 | dset.wipe() 58 | parser = TablibHTMLParser(dset, table_id=table_id) 59 | parser.feed(in_stream.read()) 60 | if not parser.table_found: 61 | if table_id: 62 | raise ValueError(f'No
found with id="{table_id}" in input HTML') 63 | else: 64 | raise ValueError('No
found in input HTML') 65 | 66 | 67 | class TablibHTMLParser(HTMLParser): 68 | def __init__(self, dataset, *args, table_id=None, **kwargs): 69 | super().__init__(*args, **kwargs) 70 | self.dset = dataset 71 | self.table_id = table_id 72 | self.table_found = False 73 | self.table_open = False 74 | self.thead_open = False 75 | self.cell_open = False 76 | self.headers = [] 77 | self.current_row = [] 78 | self.current_data = '' 79 | 80 | def handle_starttag(self, tag, attrs): 81 | if ( 82 | tag == 'table' and not self.table_found and 83 | (not self.table_id or dict(attrs).get('id') == self.table_id) 84 | ): 85 | self.table_open = True 86 | self.table_found = True 87 | elif self.table_open: 88 | if tag == 'thead': 89 | self.thead_open = True 90 | elif tag in ['td', 'th']: 91 | self.cell_open = True 92 | 93 | def handle_endtag(self, tag): 94 | if not self.table_open: 95 | return 96 | if tag == 'table': 97 | self.table_open = False 98 | elif tag == 'thead': 99 | self.thead_open = False 100 | self.dset.headers = self.headers 101 | elif tag == 'tr' and self.current_row: 102 | self.dset.append(self.current_row) 103 | self.current_row = [] 104 | elif tag in ['td', 'th']: 105 | if self.thead_open: 106 | self.headers.append(self.current_data) 107 | else: 108 | self.current_row.append(self.current_data) 109 | self.cell_open = False 110 | self.current_data = '' 111 | 112 | def handle_data(self, data): 113 | if self.cell_open: 114 | self.current_data += data 115 | -------------------------------------------------------------------------------- /src/tablib/formats/_jira.py: -------------------------------------------------------------------------------- 1 | """Tablib - Jira table export support. 2 | 3 | Generates a Jira table from the dataset. 4 | """ 5 | 6 | 7 | class JIRAFormat: 8 | title = 'jira' 9 | 10 | @classmethod 11 | def export_set(cls, dataset): 12 | """Formats the dataset according to the Jira table syntax: 13 | 14 | ||heading 1||heading 2||heading 3|| 15 | |col A1|col A2|col A3| 16 | |col B1|col B2|col B3| 17 | 18 | :param dataset: dataset to serialize 19 | :type dataset: tablib.core.Dataset 20 | """ 21 | 22 | header = cls._get_header(dataset.headers) if dataset.headers else '' 23 | body = cls._get_body(dataset) 24 | return f'{header}\n{body}' if header else body 25 | 26 | @classmethod 27 | def _get_body(cls, dataset): 28 | return '\n'.join([cls._serialize_row(row) for row in dataset]) 29 | 30 | @classmethod 31 | def _get_header(cls, headers): 32 | return cls._serialize_row(headers, delimiter='||') 33 | 34 | @classmethod 35 | def _serialize_row(cls, row, delimiter='|'): 36 | return '{}{}{}'.format( 37 | delimiter, 38 | delimiter.join([str(item) if item else ' ' for item in row]), 39 | delimiter 40 | ) 41 | -------------------------------------------------------------------------------- /src/tablib/formats/_json.py: -------------------------------------------------------------------------------- 1 | """ Tablib - JSON Support 2 | """ 3 | import decimal 4 | import json 5 | from uuid import UUID 6 | 7 | import tablib 8 | 9 | 10 | def serialize_objects_handler(obj): 11 | if isinstance(obj, (decimal.Decimal, UUID)): 12 | return str(obj) 13 | elif hasattr(obj, 'isoformat'): 14 | return obj.isoformat() 15 | else: 16 | return obj 17 | 18 | 19 | class JSONFormat: 20 | title = 'json' 21 | extensions = ('json', 'jsn') 22 | 23 | @classmethod 24 | def export_set(cls, dataset): 25 | """Returns JSON representation of Dataset.""" 26 | return json.dumps( 27 | dataset.dict, default=serialize_objects_handler, ensure_ascii=False 28 | ) 29 | 30 | @classmethod 31 | def export_book(cls, databook): 32 | """Returns JSON representation of Databook.""" 33 | return json.dumps( 34 | databook._package(), default=serialize_objects_handler, ensure_ascii=False 35 | ) 36 | 37 | @classmethod 38 | def import_set(cls, dset, in_stream): 39 | """Returns dataset from JSON stream.""" 40 | 41 | dset.wipe() 42 | dset.dict = json.load(in_stream) 43 | 44 | @classmethod 45 | def import_book(cls, dbook, in_stream): 46 | """Returns databook from JSON stream.""" 47 | 48 | dbook.wipe() 49 | for sheet in json.load(in_stream): 50 | data = tablib.Dataset() 51 | data.title = sheet['title'] 52 | data.dict = sheet['data'] 53 | dbook.add_sheet(data) 54 | 55 | @classmethod 56 | def detect(cls, stream): 57 | """Returns True if given stream is valid JSON.""" 58 | try: 59 | json.load(stream) 60 | return True 61 | except (TypeError, ValueError): 62 | return False 63 | -------------------------------------------------------------------------------- /src/tablib/formats/_latex.py: -------------------------------------------------------------------------------- 1 | """Tablib - LaTeX table export support. 2 | 3 | Generates a LaTeX booktabs-style table from the dataset. 4 | """ 5 | import re 6 | 7 | 8 | class LATEXFormat: 9 | title = 'latex' 10 | extensions = ('tex',) 11 | 12 | TABLE_TEMPLATE = """\ 13 | %% Note: add \\usepackage{booktabs} to your preamble 14 | %% 15 | \\begin{table}[!htbp] 16 | \\centering 17 | %(CAPTION)s 18 | \\begin{tabular}{%(COLSPEC)s} 19 | \\toprule 20 | %(HEADER)s 21 | %(MIDRULE)s 22 | %(BODY)s 23 | \\bottomrule 24 | \\end{tabular} 25 | \\end{table} 26 | """ 27 | 28 | TEX_RESERVED_SYMBOLS_MAP = { 29 | '\\': '\\textbackslash{}', 30 | '{': '\\{', 31 | '}': '\\}', 32 | '$': '\\$', 33 | '&': '\\&', 34 | '#': '\\#', 35 | '^': '\\textasciicircum{}', 36 | '_': '\\_', 37 | '~': '\\textasciitilde{}', 38 | '%': '\\%', 39 | } 40 | 41 | TEX_RESERVED_SYMBOLS_RE = re.compile( 42 | '({})'.format('|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys())))) 43 | 44 | @classmethod 45 | def export_set(cls, dataset): 46 | """Returns LaTeX representation of dataset 47 | 48 | :param dataset: dataset to serialize 49 | :type dataset: tablib.core.Dataset 50 | """ 51 | 52 | caption = f'\\caption{{{dataset.title}}}' if dataset.title else '%' 53 | colspec = cls._colspec(dataset.width) 54 | header = cls._serialize_row(dataset.headers) if dataset.headers else '' 55 | midrule = cls._midrule(dataset.width) 56 | body = '\n'.join([cls._serialize_row(row) for row in dataset]) 57 | return cls.TABLE_TEMPLATE % {'CAPTION': caption, 'COLSPEC': colspec, 58 | 'HEADER': header, 'MIDRULE': midrule, 'BODY': body} 59 | 60 | @classmethod 61 | def _colspec(cls, dataset_width): 62 | """Generates the column specification for the LaTeX `tabular` environment 63 | based on the dataset width. 64 | 65 | The first column is justified to the left, all further columns are aligned 66 | to the right. 67 | 68 | .. note:: This is only a heuristic and most probably has to be fine-tuned 69 | post export. Column alignment should depend on the data type, e.g., textual 70 | content should usually be aligned to the left while numeric content almost 71 | always should be aligned to the right. 72 | 73 | :param dataset_width: width of the dataset 74 | """ 75 | 76 | spec = 'l' 77 | for _ in range(1, dataset_width): 78 | spec += 'r' 79 | return spec 80 | 81 | @classmethod 82 | def _midrule(cls, dataset_width): 83 | """Generates the table `midrule`, which may be composed of several 84 | `cmidrules`. 85 | 86 | :param dataset_width: width of the dataset to serialize 87 | """ 88 | 89 | if not dataset_width or dataset_width == 1: 90 | return '\\midrule' 91 | return ' '.join([cls._cmidrule(colindex, dataset_width) for colindex in 92 | range(1, dataset_width + 1)]) 93 | 94 | @classmethod 95 | def _cmidrule(cls, colindex, dataset_width): 96 | """Generates the `cmidrule` for a single column with appropriate trimming 97 | based on the column position. 98 | 99 | :param colindex: Column index 100 | :param dataset_width: width of the dataset 101 | """ 102 | 103 | rule = '\\cmidrule(%s){%d-%d}' 104 | if colindex == 1: 105 | # Rule of first column is trimmed on the right 106 | return rule % ('r', colindex, colindex) 107 | if colindex == dataset_width: 108 | # Rule of last column is trimmed on the left 109 | return rule % ('l', colindex, colindex) 110 | # Inner columns are trimmed on the left and right 111 | return rule % ('lr', colindex, colindex) 112 | 113 | @classmethod 114 | def _serialize_row(cls, row): 115 | """Returns string representation of a single row. 116 | 117 | :param row: single dataset row 118 | """ 119 | 120 | new_row = [cls._escape_tex_reserved_symbols(str(item)) if item else '' 121 | for item in row] 122 | return 6 * ' ' + ' & '.join(new_row) + ' \\\\' 123 | 124 | @classmethod 125 | def _escape_tex_reserved_symbols(cls, string): 126 | """Escapes all TeX reserved symbols ('_', '~', etc.) in a string. 127 | 128 | :param string: String to escape 129 | """ 130 | def replace(match): 131 | return cls.TEX_RESERVED_SYMBOLS_MAP[match.group()] 132 | return cls.TEX_RESERVED_SYMBOLS_RE.sub(replace, string) 133 | -------------------------------------------------------------------------------- /src/tablib/formats/_ods.py: -------------------------------------------------------------------------------- 1 | """ Tablib - ODF Support. 2 | """ 3 | 4 | import datetime as dt 5 | import numbers 6 | from io import BytesIO 7 | 8 | from odf import number, opendocument, style, table, text 9 | 10 | import tablib 11 | 12 | bold = style.Style(name="bold", family="paragraph") 13 | bold.addElement(style.TextProperties( 14 | fontweight="bold", 15 | fontweightasian="bold", 16 | fontweightcomplex="bold", 17 | )) 18 | 19 | 20 | def set_date_style(style): 21 | style.addElement(number.Year(style="long")) 22 | style.addElement(number.Text(text="-")) 23 | style.addElement(number.Month(style="long")) 24 | style.addElement(number.Text(text="-")) 25 | style.addElement(number.Day(style="long")) 26 | 27 | 28 | def set_time_style(style): 29 | style.addElement(number.Hours(style="long")) 30 | style.addElement(number.Text(text=":")) 31 | style.addElement(number.Minutes(style="long")) 32 | style.addElement(number.Text(text=":")) 33 | style.addElement(number.Seconds(style="long", decimalplaces="0")) 34 | 35 | 36 | date_style = number.DateStyle(name="date-style1") 37 | set_date_style(date_style) 38 | ds = style.Style( 39 | name="ds1", 40 | datastylename="date-style1", 41 | parentstylename="Default", 42 | family="table-cell", 43 | ) 44 | 45 | time_style = number.DateStyle(name="time-style1") 46 | set_time_style(time_style) 47 | ts = style.Style( 48 | name="ts1", 49 | datastylename="time-style1", 50 | parentstylename="Default", 51 | family="table-cell", 52 | ) 53 | 54 | datetime_style = number.DateStyle(name="datetime-style1") 55 | set_date_style(datetime_style) 56 | datetime_style.addElement(number.Text(text=" ")) 57 | set_time_style(datetime_style) 58 | dts = style.Style( 59 | name="dts1", 60 | datastylename="datetime-style1", 61 | parentstylename="Default", 62 | family="table-cell", 63 | ) 64 | 65 | 66 | class ODSFormat: 67 | title = 'ods' 68 | extensions = ('ods',) 69 | 70 | @classmethod 71 | def export_set(cls, dataset): 72 | """Returns ODF representation of Dataset.""" 73 | 74 | wb = opendocument.OpenDocumentSpreadsheet() 75 | wb.automaticstyles.addElement(bold) 76 | wb.styles.addElement(date_style) 77 | wb.automaticstyles.addElement(ds) 78 | wb.styles.addElement(time_style) 79 | wb.automaticstyles.addElement(ts) 80 | wb.styles.addElement(datetime_style) 81 | wb.automaticstyles.addElement(dts) 82 | 83 | ws = table.Table(name=dataset.title if dataset.title else 'Tablib Dataset') 84 | wb.spreadsheet.addElement(ws) 85 | cls.dset_sheet(dataset, ws) 86 | 87 | stream = BytesIO() 88 | wb.save(stream) 89 | return stream.getvalue() 90 | 91 | @classmethod 92 | def export_book(cls, databook): 93 | """Returns ODF representation of DataBook.""" 94 | 95 | wb = opendocument.OpenDocumentSpreadsheet() 96 | wb.automaticstyles.addElement(bold) 97 | 98 | for i, dset in enumerate(databook._datasets): 99 | ws = table.Table(name=dset.title if dset.title else f"Sheet{i}") 100 | wb.spreadsheet.addElement(ws) 101 | cls.dset_sheet(dset, ws) 102 | 103 | stream = BytesIO() 104 | wb.save(stream) 105 | return stream.getvalue() 106 | 107 | @classmethod 108 | def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): 109 | """Populate dataset `dset` with sheet data.""" 110 | 111 | dset.title = sheet.getAttribute('name') 112 | 113 | def is_real_cell(cell): 114 | return cell.hasChildNodes() or not cell.getAttribute('numbercolumnsrepeated') 115 | 116 | rows = (row for row in sheet.childNodes if row.tagName == "table:table-row") 117 | 118 | for i, row in enumerate(rows): 119 | if i < skip_lines: 120 | continue 121 | row_vals = [cls.read_cell(cell) for cell in row.childNodes if is_real_cell(cell)] 122 | if not row_vals: 123 | continue 124 | if i == skip_lines and headers: 125 | dset.headers = row_vals 126 | else: 127 | if i > skip_lines and len(row_vals) < dset.width: 128 | row_vals += [''] * (dset.width - len(row_vals)) 129 | dset.append(row_vals) 130 | 131 | @classmethod 132 | def read_cell(cls, cell, value_type=None): 133 | def convert_date(val): 134 | if 'T' in val: 135 | return dt.datetime.strptime(val, "%Y-%m-%dT%H:%M:%S") 136 | else: 137 | return dt.datetime.strptime(val, "%Y-%m-%d").date() 138 | 139 | if value_type is None: 140 | value_type = cell.getAttribute('valuetype') 141 | if value_type == 'date': 142 | date_value = cell.getAttribute('datevalue') 143 | if date_value: 144 | return convert_date(date_value) 145 | if value_type == 'time': 146 | time_value = cell.getAttribute('timevalue') 147 | try: 148 | return dt.datetime.strptime(time_value, "PT%HH%MM%SS").time() 149 | except ValueError: 150 | # backwards compatibility for times exported with older tablib versions 151 | return dt.datetime.strptime(time_value, "%H:%M:%S").time() 152 | if value_type == 'boolean': 153 | bool_value = cell.getAttribute('booleanvalue') 154 | return bool_value == 'true' 155 | if not cell.childNodes: 156 | value = getattr(cell, 'data', None) 157 | if value is None: 158 | try: 159 | value = cell.getAttribute('value') 160 | except ValueError: 161 | pass 162 | if value is None: 163 | return '' 164 | if value_type == 'float': 165 | return float(value) 166 | if value_type == 'date': 167 | return convert_date(value) 168 | return value # Any other type default to 'string' 169 | 170 | for subnode in cell.childNodes: 171 | return cls.read_cell(subnode, value_type) 172 | 173 | @classmethod 174 | def import_set(cls, dset, in_stream, headers=True, skip_lines=0): 175 | """Populate dataset `dset` from ODS stream.""" 176 | 177 | dset.wipe() 178 | 179 | ods_book = opendocument.load(in_stream) 180 | for sheet in ods_book.spreadsheet.childNodes: 181 | if sheet.qname[1] == 'table': 182 | cls.import_sheet(dset, sheet, headers, skip_lines) 183 | 184 | @classmethod 185 | def import_book(cls, dbook, in_stream, headers=True): 186 | """Populate databook `dbook` from ODS stream.""" 187 | 188 | dbook.wipe() 189 | 190 | ods_book = opendocument.load(in_stream) 191 | 192 | for sheet in ods_book.spreadsheet.childNodes: 193 | if sheet.qname[1] != 'table': 194 | continue 195 | dset = tablib.Dataset() 196 | cls.import_sheet(dset, sheet, headers) 197 | dbook.add_sheet(dset) 198 | 199 | @classmethod 200 | def dset_sheet(cls, dataset, ws): 201 | """Completes given worksheet from given Dataset.""" 202 | _package = dataset._package(dicts=False) 203 | 204 | for i, sep in enumerate(dataset._separators): 205 | _offset = i 206 | _package.insert((sep[0] + _offset), (sep[1],)) 207 | 208 | for row_number, row in enumerate(_package, start=1): 209 | is_header = row_number == 1 and dataset.headers 210 | style = bold if is_header else None 211 | odf_row = table.TableRow(stylename=style) 212 | ws.addElement(odf_row) 213 | for j, col in enumerate(row): 214 | if isinstance(col, numbers.Number): 215 | cell = table.TableCell(valuetype="float", value=col) 216 | elif isinstance(col, dt.datetime): 217 | cell = table.TableCell( 218 | valuetype="date", 219 | datevalue=col.strftime('%Y-%m-%dT%H:%M:%S'), 220 | stylename=dts, 221 | ) 222 | cell.addElement(text.P(text=col.strftime('%Y-%m-%d %H:%M:%S'))) 223 | elif isinstance(col, dt.date): 224 | date_value = col.strftime('%Y-%m-%d') 225 | cell = table.TableCell(valuetype="date", datevalue=date_value, stylename=ds) 226 | cell.addElement(text.P(text=date_value)) 227 | elif isinstance(col, dt.time): 228 | cell = table.TableCell( 229 | valuetype="time", 230 | timevalue=col.strftime('PT%HH%MM%SS'), 231 | stylename=ts, 232 | ) 233 | cell.addElement(text.P(text=col.strftime('%H:%M:%S'))) 234 | elif col is None: 235 | cell = table.TableCell(valuetype="void") 236 | else: 237 | cell = table.TableCell(valuetype="string") 238 | cell.addElement(text.P(text=str(col), stylename=style)) 239 | odf_row.addElement(cell) 240 | 241 | @classmethod 242 | def detect(cls, stream): 243 | if isinstance(stream, bytes): 244 | # load expects a file-like object. 245 | stream = BytesIO(stream) 246 | try: 247 | opendocument.load(stream) 248 | return True 249 | except Exception: 250 | return False 251 | -------------------------------------------------------------------------------- /src/tablib/formats/_rst.py: -------------------------------------------------------------------------------- 1 | """ Tablib - reStructuredText Support 2 | """ 3 | 4 | from itertools import zip_longest 5 | from statistics import median 6 | from textwrap import TextWrapper 7 | 8 | JUSTIFY_LEFT = 'left' 9 | JUSTIFY_CENTER = 'center' 10 | JUSTIFY_RIGHT = 'right' 11 | JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) 12 | 13 | 14 | def to_str(value): 15 | if isinstance(value, bytes): 16 | return value.decode('utf-8') 17 | return str(value) 18 | 19 | 20 | def _max_word_len(text): 21 | """ 22 | Return the length of the longest word in `text`. 23 | 24 | >>> _max_word_len('Python Module for Tabular Datasets') 25 | 8 26 | """ 27 | return max((len(word) for word in text.split()), default=0) if text else 0 28 | 29 | 30 | class ReSTFormat: 31 | title = 'rst' 32 | extensions = ('rst',) 33 | 34 | MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words. 35 | 36 | @classmethod 37 | def _get_column_string_lengths(cls, dataset): 38 | """ 39 | Returns a list of string lengths of each column, and a list of 40 | maximum word lengths. 41 | """ 42 | if dataset.headers: 43 | column_lengths = [[len(h)] for h in dataset.headers] 44 | word_lens = [_max_word_len(h) for h in dataset.headers] 45 | else: 46 | column_lengths = [[] for _ in range(dataset.width)] 47 | word_lens = [0 for _ in range(dataset.width)] 48 | for row in dataset.dict: 49 | values = iter(row.values() if hasattr(row, 'values') else row) 50 | for i, val in enumerate(values): 51 | text = to_str(val) 52 | column_lengths[i].append(len(text)) 53 | word_lens[i] = max(word_lens[i], _max_word_len(text)) 54 | return column_lengths, word_lens 55 | 56 | @classmethod 57 | def _row_to_lines(cls, values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): 58 | """ 59 | Returns a table row of wrapped values as a list of lines 60 | """ 61 | if justify not in JUSTIFY_VALUES: 62 | raise ValueError('Value of "justify" must be one of "{}"'.format( 63 | '", "'.join(JUSTIFY_VALUES) 64 | )) 65 | 66 | def just(text_, width_): 67 | if justify == JUSTIFY_LEFT: 68 | return text_.ljust(width_) 69 | elif justify == JUSTIFY_CENTER: 70 | return text_.center(width_) 71 | else: 72 | return text_.rjust(width_) 73 | lpad = sep + ' ' if sep else '' 74 | rpad = ' ' + sep if sep else '' 75 | pad = ' ' + sep + ' ' 76 | cells = [] 77 | for value, width in zip(values, widths): 78 | wrapper.width = width 79 | text = to_str(value) 80 | cell = wrapper.wrap(text) 81 | cells.append(cell) 82 | lines = zip_longest(*cells, fillvalue='') 83 | lines = ( 84 | (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) 85 | for line in lines 86 | ) 87 | lines = [''.join((lpad, pad.join(line), rpad)) for line in lines] 88 | return lines 89 | 90 | @classmethod 91 | def _get_column_widths(cls, dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3): 92 | """ 93 | Returns a list of column widths proportional to the median length 94 | of the text in their cells. 95 | """ 96 | str_lens, word_lens = cls._get_column_string_lengths(dataset) 97 | median_lens = [int(median(lens)) for lens in str_lens] 98 | total = sum(median_lens) 99 | if total > max_table_width - (pad_len * len(median_lens)): 100 | column_widths = (max_table_width * lens // total for lens in median_lens) 101 | else: 102 | column_widths = (lens for lens in median_lens) 103 | # Allow for separator and padding: 104 | column_widths = (w - pad_len if w > pad_len else w for w in column_widths) 105 | # Rather widen table than break words: 106 | column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)] 107 | return column_widths 108 | 109 | @classmethod 110 | def export_set_as_simple_table(cls, dataset, column_widths=None): 111 | """ 112 | Returns reStructuredText grid table representation of dataset. 113 | """ 114 | lines = [] 115 | wrapper = TextWrapper() 116 | if column_widths is None: 117 | column_widths = cls._get_column_widths(dataset, pad_len=2) 118 | border = ' '.join(['=' * w for w in column_widths]) 119 | 120 | lines.append(border) 121 | if dataset.headers: 122 | lines.extend(cls._row_to_lines( 123 | dataset.headers, 124 | column_widths, 125 | wrapper, 126 | sep='', 127 | justify=JUSTIFY_CENTER, 128 | )) 129 | lines.append(border) 130 | for row in dataset.dict: 131 | values = iter(row.values() if hasattr(row, 'values') else row) 132 | lines.extend(cls._row_to_lines(values, column_widths, wrapper, '')) 133 | lines.append(border) 134 | return '\n'.join(lines) 135 | 136 | @classmethod 137 | def export_set_as_grid_table(cls, dataset, column_widths=None): 138 | """ 139 | Returns reStructuredText grid table representation of dataset. 140 | 141 | 142 | >>> from tablib import Dataset 143 | >>> from tablib.formats import registry 144 | >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) 145 | >>> data = Dataset() 146 | >>> data.headers = ['A', 'B', 'A and B'] 147 | >>> for a, b in bits: 148 | ... data.append([bool(a), bool(b), bool(a * b)]) 149 | >>> rst = registry.get_format('rst') 150 | >>> print(rst.export_set(data, force_grid=True)) 151 | +-------+-------+-------+ 152 | | A | B | A and | 153 | | | | B | 154 | +=======+=======+=======+ 155 | | False | False | False | 156 | +-------+-------+-------+ 157 | | True | False | False | 158 | +-------+-------+-------+ 159 | | False | True | False | 160 | +-------+-------+-------+ 161 | | True | True | True | 162 | +-------+-------+-------+ 163 | 164 | """ 165 | lines = [] 166 | wrapper = TextWrapper() 167 | if column_widths is None: 168 | column_widths = cls._get_column_widths(dataset) 169 | header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+' 170 | row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+' 171 | 172 | lines.append(row_sep) 173 | 174 | if dataset.headers: 175 | lines.extend(cls._row_to_lines( 176 | dataset.headers, 177 | column_widths, 178 | wrapper, 179 | justify=JUSTIFY_CENTER, 180 | )) 181 | lines.append(header_sep) 182 | for row in dataset.dict: 183 | values = iter(row.values() if hasattr(row, 'values') else row) 184 | lines.extend(cls._row_to_lines(values, column_widths, wrapper)) 185 | lines.append(row_sep) 186 | return '\n'.join(lines) 187 | 188 | @classmethod 189 | def _use_simple_table(cls, head0, col0, width0): 190 | """ 191 | Use a simple table if the text in the first column is never wrapped 192 | 193 | 194 | >>> from tablib.formats import registry 195 | >>> rst = registry.get_format('rst') 196 | >>> rst._use_simple_table('menu', ['egg', 'bacon'], 10) 197 | True 198 | >>> rst._use_simple_table(None, ['lobster thermidor', 'spam'], 10) 199 | False 200 | 201 | """ 202 | if head0 is not None: 203 | head0 = to_str(head0) 204 | if len(head0) > width0: 205 | return False 206 | for cell in col0: 207 | cell = to_str(cell) 208 | if len(cell) > width0: 209 | return False 210 | return True 211 | 212 | @classmethod 213 | def export_set(cls, dataset, **kwargs): 214 | """ 215 | Returns reStructuredText table representation of dataset. 216 | 217 | Returns a simple table if the text in the first column is never 218 | wrapped, otherwise returns a grid table. 219 | 220 | 221 | >>> from tablib import Dataset 222 | >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) 223 | >>> data = Dataset() 224 | >>> data.headers = ['A', 'B', 'A and B'] 225 | >>> for a, b in bits: 226 | ... data.append([bool(a), bool(b), bool(a * b)]) 227 | >>> table = data.rst 228 | >>> table.split('\\n') == [ 229 | ... '===== ===== =====', 230 | ... ' A B A and', 231 | ... ' B ', 232 | ... '===== ===== =====', 233 | ... 'False False False', 234 | ... 'True False False', 235 | ... 'False True False', 236 | ... 'True True True ', 237 | ... '===== ===== =====', 238 | ... ] 239 | True 240 | 241 | """ 242 | if not dataset.dict: 243 | return '' 244 | force_grid = kwargs.get('force_grid', False) 245 | max_table_width = kwargs.get('max_table_width', cls.MAX_TABLE_WIDTH) 246 | column_widths = cls._get_column_widths(dataset, max_table_width) 247 | 248 | use_simple_table = cls._use_simple_table( 249 | dataset.headers[0] if dataset.headers else None, 250 | dataset.get_col(0), 251 | column_widths[0], 252 | ) 253 | if use_simple_table and not force_grid: 254 | return cls.export_set_as_simple_table(dataset, column_widths) 255 | else: 256 | return cls.export_set_as_grid_table(dataset, column_widths) 257 | 258 | @classmethod 259 | def export_book(cls, databook): 260 | """ 261 | reStructuredText representation of a Databook. 262 | 263 | Tables are separated by a blank line. All tables use the grid 264 | format. 265 | """ 266 | return '\n\n'.join(cls.export_set(dataset, force_grid=True) 267 | for dataset in databook._datasets) 268 | -------------------------------------------------------------------------------- /src/tablib/formats/_sql.py: -------------------------------------------------------------------------------- 1 | """Tablib - SQL INSERT Export Support.""" 2 | import datetime 3 | import decimal 4 | 5 | from ..exceptions import UnsupportedFormat 6 | 7 | 8 | class SQLFormat: 9 | """Export Dataset rows as SQL INSERT statements.""" 10 | title = 'sql' 11 | extensions = ('sql',) 12 | 13 | @staticmethod 14 | def _render_literal(value): 15 | """Render a Python value as an SQL literal.""" 16 | if value is None: 17 | return 'NULL' 18 | if isinstance(value, bool): 19 | return 'TRUE' if value else 'FALSE' 20 | if isinstance(value, (int, decimal.Decimal)): 21 | return str(value) 22 | if isinstance(value, float): 23 | # Represent finite floats; non-finite as NULL 24 | try: 25 | if value == value and value not in (float('inf'), -float('inf')): 26 | return repr(value) 27 | except Exception: 28 | pass 29 | return 'NULL' 30 | if isinstance(value, datetime.datetime): 31 | # ANSI SQL timestamp literal 32 | return f"TIMESTAMP '{value.isoformat(sep=' ')}'" 33 | if isinstance(value, datetime.date): 34 | # ANSI SQL date literal 35 | return f"DATE '{value.isoformat()}'" 36 | # Fallback for strings and others 37 | text = str(value).replace("'", "''") 38 | return f"'{text}'" 39 | 40 | @classmethod 41 | def export_set(cls, dataset, table=None, columns=None, commit=False): 42 | """ 43 | Return SQL INSERT statements for Dataset rows. 44 | :param table: optional table name; defaults to dataset.title or 'data' 45 | """ 46 | tbl = table or getattr(dataset, 'title', None) or 'export_table' 47 | tbl_ident = str(tbl) 48 | columns_headers = (','.join( 49 | columns if columns is not None else 50 | dataset.headers if dataset.headers is not None else [] 51 | ) 52 | ) 53 | columns_headers = f' ({columns_headers})' if columns_headers else '' 54 | statements = [] 55 | for row in dataset._data: 56 | values = ', '.join(cls._render_literal(v) for v in row) 57 | statements.append( 58 | f'INSERT INTO {tbl_ident}{columns_headers} VALUES ({values});' 59 | ) 60 | return '\n'.join(statements) + '\n' + ('COMMIT;\n' if commit else '') 61 | 62 | @classmethod 63 | def import_set(cls, dataset, in_stream, **kwargs): 64 | """Importing SQL is not supported.""" 65 | raise UnsupportedFormat('SQL import is not supported.') 66 | 67 | @classmethod 68 | def detect(cls, stream): 69 | """Always return False: no autodetect for SQL.""" 70 | return False 71 | -------------------------------------------------------------------------------- /src/tablib/formats/_tsv.py: -------------------------------------------------------------------------------- 1 | """ Tablib - TSV (Tab Separated Values) Support. 2 | """ 3 | 4 | from ._csv import CSVFormat 5 | 6 | 7 | class TSVFormat(CSVFormat): 8 | title = 'tsv' 9 | extensions = ('tsv',) 10 | 11 | DEFAULT_DELIMITER = '\t' 12 | -------------------------------------------------------------------------------- /src/tablib/formats/_xls.py: -------------------------------------------------------------------------------- 1 | """ Tablib - XLS Support. 2 | """ 3 | import datetime 4 | from io import BytesIO 5 | 6 | import xlrd 7 | import xlwt 8 | from xlrd.xldate import xldate_as_datetime 9 | 10 | import tablib 11 | 12 | # special styles 13 | wrap = xlwt.easyxf("alignment: wrap on") 14 | bold = xlwt.easyxf("font: bold on") 15 | datetime_style = xlwt.easyxf(num_format_str='M/D/YY h:mm') 16 | date_style = xlwt.easyxf(num_format_str='M/D/YY') 17 | time_style = xlwt.easyxf(num_format_str='h:mm:ss') 18 | 19 | 20 | class XLSFormat: 21 | title = 'xls' 22 | extensions = ('xls',) 23 | 24 | @classmethod 25 | def detect(cls, stream): 26 | """Returns True if given stream is a readable excel file.""" 27 | try: 28 | xlrd.open_workbook(file_contents=stream) 29 | return True 30 | except Exception: 31 | pass 32 | try: 33 | xlrd.open_workbook(file_contents=stream.read()) 34 | return True 35 | except Exception: 36 | pass 37 | try: 38 | xlrd.open_workbook(filename=stream) 39 | return True 40 | except Exception: 41 | return False 42 | 43 | @classmethod 44 | def export_set(cls, dataset): 45 | """Returns XLS representation of Dataset.""" 46 | 47 | wb = xlwt.Workbook(encoding='utf8') 48 | ws = wb.add_sheet(dataset.title if dataset.title else 'Tablib Dataset') 49 | 50 | cls.dset_sheet(dataset, ws) 51 | 52 | stream = BytesIO() 53 | wb.save(stream) 54 | return stream.getvalue() 55 | 56 | @classmethod 57 | def export_book(cls, databook): 58 | """Returns XLS representation of DataBook.""" 59 | 60 | wb = xlwt.Workbook(encoding='utf8') 61 | 62 | for i, dset in enumerate(databook._datasets): 63 | ws = wb.add_sheet(dset.title if dset.title else f"Sheet{i}") 64 | 65 | cls.dset_sheet(dset, ws) 66 | 67 | stream = BytesIO() 68 | wb.save(stream) 69 | return stream.getvalue() 70 | 71 | @classmethod 72 | def import_set(cls, dset, in_stream, headers=True, skip_lines=0): 73 | """Returns databook from XLS stream.""" 74 | 75 | dset.wipe() 76 | 77 | xls_book = xlrd.open_workbook(file_contents=in_stream.read()) 78 | sheet = xls_book.sheet_by_index(0) 79 | 80 | dset.title = sheet.name 81 | 82 | def cell_value(value, type_): 83 | if type_ == xlrd.XL_CELL_ERROR: 84 | return xlrd.error_text_from_code[value] 85 | elif type_ == xlrd.XL_CELL_DATE: 86 | return xldate_as_datetime(value, xls_book.datemode) 87 | return value 88 | 89 | for i in range(sheet.nrows): 90 | if i < skip_lines: 91 | continue 92 | if i == skip_lines and headers: 93 | dset.headers = sheet.row_values(i) 94 | else: 95 | dset.append([ 96 | cell_value(val, typ) 97 | for val, typ in zip(sheet.row_values(i), sheet.row_types(i)) 98 | ]) 99 | 100 | @classmethod 101 | def import_book(cls, dbook, in_stream, headers=True): 102 | """Returns databook from XLS stream.""" 103 | 104 | dbook.wipe() 105 | 106 | xls_book = xlrd.open_workbook(file_contents=in_stream.read()) 107 | 108 | for sheet in xls_book.sheets(): 109 | data = tablib.Dataset() 110 | data.title = sheet.name 111 | 112 | for i in range(sheet.nrows): 113 | if i == 0 and headers: 114 | data.headers = sheet.row_values(0) 115 | else: 116 | data.append(sheet.row_values(i)) 117 | 118 | dbook.add_sheet(data) 119 | 120 | @classmethod 121 | def dset_sheet(cls, dataset, ws): 122 | """Completes given worksheet from given Dataset.""" 123 | _package = dataset._package(dicts=False) 124 | 125 | for i, sep in enumerate(dataset._separators): 126 | _offset = i 127 | _package.insert((sep[0] + _offset), (sep[1],)) 128 | 129 | for i, row in enumerate(_package): 130 | for j, col in enumerate(row): 131 | 132 | # bold headers 133 | if (i == 0) and dataset.headers: 134 | ws.write(i, j, col, bold) 135 | 136 | # frozen header row 137 | ws.panes_frozen = True 138 | ws.horz_split_pos = 1 139 | 140 | # bold separators 141 | elif len(row) < dataset.width: 142 | ws.write(i, j, col, bold) 143 | 144 | # format date types 145 | elif isinstance(col, datetime.datetime): 146 | ws.write(i, j, col, datetime_style) 147 | elif isinstance(col, datetime.date): 148 | ws.write(i, j, col, date_style) 149 | elif isinstance(col, datetime.time): 150 | ws.write(i, j, col, time_style) 151 | # wrap the rest 152 | else: 153 | try: 154 | if '\n' in col: 155 | ws.write(i, j, col, wrap) 156 | else: 157 | ws.write(i, j, col) 158 | except TypeError: 159 | ws.write(i, j, col) 160 | -------------------------------------------------------------------------------- /src/tablib/formats/_xlsx.py: -------------------------------------------------------------------------------- 1 | """ Tablib - XLSX Support. 2 | """ 3 | import re 4 | from io import BytesIO 5 | 6 | from openpyxl.reader.excel import ExcelReader, load_workbook 7 | from openpyxl.styles import Alignment, Font 8 | from openpyxl.utils import get_column_letter 9 | from openpyxl.workbook import Workbook 10 | 11 | import tablib 12 | 13 | INVALID_TITLE_REGEX = re.compile(r'[\\*?:/\[\]]') 14 | 15 | 16 | def safe_xlsx_sheet_title(s, replace="-"): 17 | return re.sub(INVALID_TITLE_REGEX, replace, s)[:31] 18 | 19 | 20 | class XLSXFormat: 21 | title = 'xlsx' 22 | extensions = ('xlsx',) 23 | 24 | @classmethod 25 | def detect(cls, stream): 26 | """Returns True if given stream is a readable excel file.""" 27 | try: 28 | # No need to fully load the file, it should be enough to be able to 29 | # read the manifest. 30 | reader = ExcelReader(stream, read_only=False) 31 | reader.read_manifest() 32 | return True 33 | except Exception: 34 | return False 35 | 36 | @classmethod 37 | def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", 38 | escape=False, column_width="adaptive"): 39 | """Returns XLSX representation of Dataset. 40 | 41 | If ``freeze_panes`` is True, Export will freeze panes only after first line. 42 | 43 | If ``dataset.title`` contains characters which are 44 | considered invalid for an XLSX file sheet name 45 | (https://web.archive.org/web/20230323081941/https://www.excelcodex.com/2012/06/worksheets-naming-conventions/), 46 | they will be replaced with ``invalid_char_subst``. 47 | 48 | If ``escape`` is True, formulae will have the leading '=' character removed. 49 | This is a security measure to prevent formulae from executing by default 50 | in exported XLSX files. 51 | 52 | If ``column_width`` is set to "adaptive", the column width will be set to the maximum 53 | width of the content in each column. If it is set to an integer, the column width will be 54 | set to that integer value. If it is set to None, the column width will be set as the 55 | default openpyxl.Worksheet width value. 56 | 57 | """ 58 | wb = Workbook() 59 | ws = wb.worksheets[0] 60 | 61 | ws.title = ( 62 | safe_xlsx_sheet_title(dataset.title, invalid_char_subst) 63 | if dataset.title else 'Tablib Dataset' 64 | ) 65 | 66 | cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape) 67 | 68 | cls._adapt_column_width(ws, column_width) 69 | 70 | stream = BytesIO() 71 | wb.save(stream) 72 | return stream.getvalue() 73 | 74 | @classmethod 75 | def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape=False): 76 | """Returns XLSX representation of DataBook. 77 | See export_set(). 78 | """ 79 | 80 | wb = Workbook() 81 | for sheet in wb.worksheets: 82 | wb.remove(sheet) 83 | for i, dset in enumerate(databook._datasets): 84 | ws = wb.create_sheet() 85 | ws.title = ( 86 | safe_xlsx_sheet_title(dset.title, invalid_char_subst) 87 | if dset.title else f"Sheet{i}" 88 | ) 89 | 90 | cls.dset_sheet(dset, ws, freeze_panes=freeze_panes, escape=escape) 91 | 92 | stream = BytesIO() 93 | wb.save(stream) 94 | return stream.getvalue() 95 | 96 | @classmethod 97 | def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): 98 | """Populates dataset with sheet.""" 99 | 100 | dset.title = sheet.title 101 | 102 | for i, row in enumerate(sheet.rows): 103 | if i < skip_lines: 104 | continue 105 | row_vals = [c.value for c in row] 106 | if i == skip_lines and headers: 107 | dset.headers = row_vals 108 | else: 109 | if i > skip_lines and len(row_vals) < dset.width: 110 | row_vals += [''] * (dset.width - len(row_vals)) 111 | dset.append(row_vals) 112 | 113 | @classmethod 114 | def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0): 115 | """Returns databook from XLS stream.""" 116 | 117 | dset.wipe() 118 | 119 | xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) 120 | sheet = xls_book.active 121 | cls.import_sheet(dset, sheet, headers, skip_lines) 122 | 123 | @classmethod 124 | def import_book(cls, dbook, in_stream, headers=True, read_only=True): 125 | """Returns databook from XLS stream.""" 126 | 127 | dbook.wipe() 128 | 129 | xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) 130 | 131 | for sheet in xls_book.worksheets: 132 | dset = tablib.Dataset() 133 | cls.import_sheet(dset, sheet, headers) 134 | dbook.add_sheet(dset) 135 | 136 | @classmethod 137 | def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): 138 | """Completes given worksheet from given Dataset.""" 139 | _package = dataset._package(dicts=False) 140 | 141 | for i, sep in enumerate(dataset._separators): 142 | _offset = i 143 | _package.insert((sep[0] + _offset), (sep[1],)) 144 | 145 | bold = Font(bold=True) 146 | wrap_text = Alignment(wrap_text=True) 147 | 148 | for i, row in enumerate(_package): 149 | row_number = i + 1 150 | for j, col in enumerate(row): 151 | col_idx = get_column_letter(j + 1) 152 | cell = ws[f'{col_idx}{row_number}'] 153 | 154 | # bold headers 155 | if (row_number == 1) and dataset.headers: 156 | cell.font = bold 157 | if freeze_panes: 158 | # Export Freeze only after first Line 159 | ws.freeze_panes = 'A2' 160 | 161 | # bold separators 162 | elif len(row) < dataset.width: 163 | cell.font = bold 164 | 165 | # wrap the rest 166 | else: 167 | if '\n' in str(col): 168 | cell.alignment = wrap_text 169 | 170 | try: 171 | cell.value = col 172 | except ValueError: 173 | cell.value = str(col) 174 | 175 | if escape and cell.data_type == 'f' and cell.value.startswith('='): 176 | cell.value = cell.value.replace("=", "") 177 | 178 | @classmethod 179 | def _adapt_column_width(cls, worksheet, width): 180 | if isinstance(width, str) and width != "adaptive": 181 | msg = ( 182 | f"Invalid value for column_width: {width}. " 183 | "Must be 'adaptive' or an integer." 184 | ) 185 | raise ValueError(msg) 186 | 187 | if width is None: 188 | return 189 | 190 | column_widths = [] 191 | if width == "adaptive": 192 | for row in worksheet.values: 193 | for i, cell in enumerate(row): 194 | cell_width = len(str(cell)) 195 | if len(column_widths) > i: 196 | if cell_width > column_widths[i]: 197 | column_widths[i] = cell_width 198 | else: 199 | column_widths.append(cell_width) 200 | else: 201 | column_widths = [width] * worksheet.max_column 202 | 203 | for i, column_width in enumerate(column_widths, 1): # start at 1 204 | worksheet.column_dimensions[get_column_letter(i)].width = column_width 205 | -------------------------------------------------------------------------------- /src/tablib/formats/_yaml.py: -------------------------------------------------------------------------------- 1 | """ Tablib - YAML Support. 2 | """ 3 | 4 | import yaml 5 | 6 | import tablib 7 | 8 | 9 | class YAMLFormat: 10 | title = 'yaml' 11 | extensions = ('yaml', 'yml') 12 | 13 | @classmethod 14 | def export_set(cls, dataset): 15 | """Returns YAML representation of Dataset.""" 16 | return yaml.safe_dump( 17 | dataset._package(), default_flow_style=None, allow_unicode=True 18 | ) 19 | 20 | @classmethod 21 | def export_book(cls, databook): 22 | """Returns YAML representation of Databook.""" 23 | return yaml.safe_dump( 24 | databook._package(), default_flow_style=None, allow_unicode=True 25 | ) 26 | 27 | @classmethod 28 | def import_set(cls, dset, in_stream): 29 | """Returns dataset from YAML stream.""" 30 | 31 | dset.wipe() 32 | dset.dict = yaml.safe_load(in_stream) 33 | 34 | @classmethod 35 | def import_book(cls, dbook, in_stream): 36 | """Returns databook from YAML stream.""" 37 | 38 | dbook.wipe() 39 | 40 | for sheet in yaml.safe_load(in_stream): 41 | data = tablib.Dataset() 42 | data.title = sheet['title'] 43 | data.dict = sheet['data'] 44 | dbook.add_sheet(data) 45 | 46 | @classmethod 47 | def detect(cls, stream): 48 | """Returns True if given stream is valid YAML.""" 49 | try: 50 | _yaml = yaml.safe_load(stream) 51 | if isinstance(_yaml, (list, tuple, dict)): 52 | return True 53 | else: 54 | return False 55 | except (yaml.parser.ParserError, yaml.reader.ReaderError, 56 | yaml.scanner.ScannerError): 57 | return False 58 | -------------------------------------------------------------------------------- /src/tablib/utils.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO, StringIO 2 | 3 | 4 | def normalize_input(stream): 5 | """ 6 | Accept either a str/bytes stream or a file-like object and always return a 7 | file-like object. 8 | """ 9 | if isinstance(stream, str): 10 | return StringIO(stream, newline='') 11 | elif isinstance(stream, bytes): 12 | return BytesIO(stream) 13 | return stream 14 | -------------------------------------------------------------------------------- /tests/files/bad_dimensions.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/bad_dimensions.xlsx -------------------------------------------------------------------------------- /tests/files/book.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/book.ods -------------------------------------------------------------------------------- /tests/files/dates.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/dates.xls -------------------------------------------------------------------------------- /tests/files/errors.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/errors.xls -------------------------------------------------------------------------------- /tests/files/founders.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/founders.xlsx -------------------------------------------------------------------------------- /tests/files/issue_524.yaml: -------------------------------------------------------------------------------- 1 | title: Voice of Miki Vanoušek 2 | -------------------------------------------------------------------------------- /tests/files/ragged.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/ragged.ods -------------------------------------------------------------------------------- /tests/files/ragged.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/ragged.xlsx -------------------------------------------------------------------------------- /tests/files/unknown_value_type.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/unknown_value_type.ods -------------------------------------------------------------------------------- /tests/files/xlsx_cell_values.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jazzband/tablib/8dcb87f69d2f6d501a511210387df233099b2957/tests/files/xlsx_cell_values.xlsx -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | odfpy 2 | openpyxl>=2.6.0 3 | pytest 4 | pytest-cov 5 | pyyaml 6 | tabulate 7 | xlrd 8 | xlwt 9 | -------------------------------------------------------------------------------- /tests/test_tablib_dbfpy_packages_fields.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for tablib._vendor.dbfpy.""" 3 | 4 | import unittest 5 | 6 | from tablib._vendor.dbfpy import fields 7 | 8 | 9 | class DbfFieldDefTestCompareCase(unittest.TestCase): 10 | """dbfpy.fields.DbfFieldDef comparison test cases, via child classes.""" 11 | 12 | def setUp(self) -> None: 13 | self.length = 10 14 | self.a = fields.DbfCharacterFieldDef("abc", self.length) 15 | self.z = fields.DbfCharacterFieldDef("xyz", self.length) 16 | self.a2 = fields.DbfCharacterFieldDef("abc", self.length) 17 | 18 | def test_compare__eq__(self): 19 | # Act / Assert 20 | self.assertEqual(self.a, self.a2) 21 | 22 | def test_compare__ne__(self): 23 | # Act / Assert 24 | self.assertNotEqual(self.a, self.z) 25 | 26 | def test_compare__lt__(self): 27 | # Act / Assert 28 | self.assertLess(self.a, self.z) 29 | 30 | def test_compare__le__(self): 31 | # Act / Assert 32 | self.assertLessEqual(self.a, self.a2) 33 | self.assertLessEqual(self.a, self.z) 34 | 35 | def test_compare__gt__(self): 36 | # Act / Assert 37 | self.assertGreater(self.z, self.a) 38 | 39 | def test_compare__ge__(self): 40 | # Act / Assert 41 | self.assertGreaterEqual(self.a2, self.a) 42 | self.assertGreaterEqual(self.z, self.a) 43 | -------------------------------------------------------------------------------- /tests/test_tablib_dbfpy_packages_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Tests for tablib._vendor.dbfpy.""" 3 | 4 | import datetime as dt 5 | import unittest 6 | 7 | from tablib._vendor.dbfpy import utils 8 | 9 | 10 | class UtilsUnzfillTestCase(unittest.TestCase): 11 | """dbfpy.utils.unzfill test cases.""" 12 | 13 | def test_unzfill_with_nul(self): 14 | # Arrange 15 | text = b"abc\0xyz" 16 | 17 | # Act 18 | output = utils.unzfill(text) 19 | 20 | # Assert 21 | self.assertEqual(output, b"abc") 22 | 23 | def test_unzfill_without_nul(self): 24 | # Arrange 25 | text = b"abcxyz" 26 | 27 | # Act 28 | output = utils.unzfill(text) 29 | 30 | # Assert 31 | self.assertEqual(output, b"abcxyz") 32 | 33 | 34 | class UtilsGetDateTestCase(unittest.TestCase): 35 | """dbfpy.utils.getDate test cases.""" 36 | 37 | def test_getDate_none(self): 38 | # Arrange 39 | value = None 40 | 41 | # Act 42 | output = utils.getDate(value) 43 | 44 | # Assert 45 | self.assertIsInstance(output, dt.date) 46 | 47 | def test_getDate_datetime_date(self): 48 | # Arrange 49 | value = dt.date(2019, 10, 19) 50 | 51 | # Act 52 | output = utils.getDate(value) 53 | 54 | # Assert 55 | self.assertIsInstance(output, dt.date) 56 | self.assertEqual(output, value) 57 | 58 | def test_getDate_datetime_datetime(self): 59 | # Arrange 60 | value = dt.datetime(2019, 10, 19, 12, 00, 00) 61 | 62 | # Act 63 | output = utils.getDate(value) 64 | 65 | # Assert 66 | self.assertIsInstance(output, dt.date) 67 | self.assertEqual(output, value) 68 | 69 | def test_getDate_datetime_timestamp(self): 70 | # Arrange 71 | value = 1571515306 72 | 73 | # Act 74 | output = utils.getDate(value) 75 | 76 | # Assert 77 | self.assertIsInstance(output, dt.date) 78 | self.assertEqual(output, dt.date(2019, 10, 19)) 79 | 80 | def test_getDate_datetime_string_yyyy_mm_dd(self): 81 | # Arrange 82 | value = "20191019" 83 | 84 | # Act 85 | output = utils.getDate(value) 86 | 87 | # Assert 88 | self.assertIsInstance(output, dt.date) 89 | self.assertEqual(output, dt.date(2019, 10, 19)) 90 | 91 | def test_getDate_datetime_string_yymmdd(self): 92 | # Arrange 93 | value = "191019" 94 | 95 | # Act 96 | output = utils.getDate(value) 97 | 98 | # Assert 99 | self.assertIsInstance(output, dt.date) 100 | self.assertEqual(output, dt.date(2019, 10, 19)) 101 | 102 | 103 | class UtilsGetDateTimeTestCase(unittest.TestCase): 104 | """dbfpy.utils.getDateTime test cases.""" 105 | 106 | def test_getDateTime_none(self): 107 | # Arrange 108 | value = None 109 | 110 | # Act 111 | output = utils.getDateTime(value) 112 | 113 | # Assert 114 | self.assertIsInstance(output, dt.datetime) 115 | 116 | def test_getDateTime_datetime_datetime(self): 117 | # Arrange 118 | value = dt.datetime(2019, 10, 19, 12, 00, 00) 119 | 120 | # Act 121 | output = utils.getDateTime(value) 122 | 123 | # Assert 124 | self.assertIsInstance(output, dt.date) 125 | self.assertEqual(output, value) 126 | 127 | def test_getDateTime_datetime_date(self): 128 | # Arrange 129 | value = dt.date(2019, 10, 19) 130 | 131 | # Act 132 | output = utils.getDateTime(value) 133 | 134 | # Assert 135 | self.assertIsInstance(output, dt.date) 136 | self.assertEqual(output, dt.datetime(2019, 10, 19, 00, 00)) 137 | 138 | def test_getDateTime_datetime_timestamp(self): 139 | # Arrange 140 | value = 1571515306 141 | 142 | # Act 143 | output = utils.getDateTime(value) 144 | 145 | # Assert 146 | self.assertIsInstance(output, dt.datetime) 147 | 148 | def test_getDateTime_datetime_string(self): 149 | # Arrange 150 | value = "20191019" 151 | 152 | # Act / Assert 153 | with self.assertRaises(NotImplementedError): 154 | utils.getDateTime(value) 155 | 156 | 157 | class InvalidValueTestCase(unittest.TestCase): 158 | """dbfpy.utils._InvalidValue test cases.""" 159 | 160 | def test_sanity(self): 161 | # Arrange 162 | INVALID_VALUE = utils.INVALID_VALUE 163 | 164 | # Act / Assert 165 | self.assertEqual(INVALID_VALUE, INVALID_VALUE) 166 | self.assertNotEqual(INVALID_VALUE, 123) 167 | self.assertEqual(int(INVALID_VALUE), 0) 168 | self.assertEqual(float(INVALID_VALUE), 0.0) 169 | self.assertEqual(str(INVALID_VALUE), "") 170 | self.assertEqual(repr(INVALID_VALUE), "") 171 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | usedevelop = true 3 | minversion = 2.4 4 | envlist = 5 | docs 6 | lint 7 | py{39,310,311,312,313,314} 8 | 9 | [testenv] 10 | deps = 11 | -rtests/requirements.txt 12 | commands_pre = 13 | - {envpython} -m pip install --only-binary :all: pandas 14 | passenv = 15 | FORCE_COLOR 16 | commands = 17 | pytest {posargs:tests} 18 | 19 | [testenv:docs] 20 | deps = 21 | sphinx 22 | commands = 23 | sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html 24 | 25 | [testenv:lint] 26 | deps = 27 | pre-commit 28 | build 29 | twine 30 | commands = 31 | pre-commit run --all-files 32 | python -m build 33 | twine check dist/* 34 | skip_install = true 35 | --------------------------------------------------------------------------------