├── .carthorse.yml ├── .circleci └── config.yml ├── .coveragerc ├── .gitignore ├── .readthedocs.yml ├── CHANGELOG.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── acknowledgements.rst ├── api.rst ├── changes.rst ├── conf.py ├── dates.rst ├── development.rst ├── formatting.rst ├── index.rst ├── licenses.rst ├── make.bat ├── on_demand.rst ├── references.rst └── unicode.rst ├── scripts └── runxlrd.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── helpers.py ├── samples │ ├── Formate.xls │ ├── biff4_no_format_no_window2.xls │ ├── corrupted_error.xls │ ├── formula_test_names.xls │ ├── formula_test_sjmachin.xls │ ├── issue20.xls │ ├── namesdemo.xls │ ├── picture_in_cell.xls │ ├── profiles.xls │ ├── ragged.xls │ ├── sample.ods │ ├── sample.txt │ ├── sample.xlsb │ ├── sample.xlsx │ ├── sample.zip │ └── xf_class.xls ├── test_biffh.py ├── test_cell.py ├── test_formats.py ├── test_formulas.py ├── test_ignore_workbook_corruption_error.py ├── test_inspect.py ├── test_missing_records.py ├── test_open_workbook.py ├── test_sheet.py ├── test_workbook.py ├── test_xldate.py └── test_xldate_to_datetime.py └── xlrd ├── __init__.py ├── biffh.py ├── book.py ├── compdoc.py ├── formatting.py ├── formula.py ├── info.py ├── sheet.py ├── timemachine.py └── xldate.py /.carthorse.yml: -------------------------------------------------------------------------------- 1 | carthorse: 2 | version-from: setup.py 3 | tag-format: "{version}" 4 | when: 5 | - version-not-tagged 6 | actions: 7 | - run: "sudo pip install -e .[build]" 8 | - run: "twine upload -u __token__ -p $PYPI_TOKEN dist/*" 9 | - create-tag 10 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | python: cjw296/python-ci@2.1 5 | 6 | jobs: 7 | coverage: 8 | docker: 9 | - image: circleci/python:3.8 10 | steps: 11 | - checkout 12 | - attach_workspace: 13 | at: coverage_output 14 | - run: 15 | name: "Check coverage" 16 | command: | 17 | sudo pip install coverage 18 | coverage combine coverage_output/ 19 | bash <(curl -s https://codecov.io/bash) 20 | 21 | check-package: 22 | parameters: 23 | image: 24 | type: string 25 | docker: 26 | - image: << parameters.image >> 27 | steps: 28 | - python/check-package: 29 | package: "xlrd" 30 | test: 31 | - run: 32 | name: "Check Import" 33 | command: python -c "import xlrd" 34 | - run: 35 | name: "Check no XLS in wheel" 36 | command: "! unzip -l dist/*.whl | egrep '.xlsx?$'" 37 | - run: 38 | name: "Check no XLS in source dist" 39 | command: "! tar tzf dist/*.tar.gz | egrep '.xlsx?$'" 40 | 41 | common: &common 42 | jobs: 43 | 44 | - python/pip-run-tests: 45 | matrix: 46 | parameters: 47 | image: 48 | - circleci/python:2.7 49 | - circleci/python:3.6 50 | - circleci/python:3.9 51 | 52 | - coverage: 53 | name: coverage 54 | requires: 55 | - python/pip-run-tests 56 | 57 | - python/pip-docs: 58 | name: docs 59 | requires: 60 | - coverage 61 | 62 | - python/pip-setuptools-build-package: 63 | name: package 64 | requires: 65 | - docs 66 | filters: 67 | branches: 68 | only: master 69 | 70 | - check-package: 71 | matrix: 72 | parameters: 73 | image: 74 | - circleci/python:2.7 75 | - circleci/python:3.9 76 | requires: 77 | - package 78 | 79 | - python/release: 80 | name: release 81 | config: .carthorse.yml 82 | requires: 83 | - check-package 84 | filters: 85 | branches: 86 | only: master 87 | 88 | workflows: 89 | push: 90 | <<: *common 91 | periodic: 92 | <<: *common 93 | triggers: 94 | - schedule: 95 | cron: "0 0 11 * *" 96 | filters: 97 | branches: 98 | only: master 99 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = xlrd,scripts,tests 3 | 4 | [report] 5 | exclude_lines = 6 | # the original exclude 7 | pragma: no cover 8 | 9 | # debug stuff 10 | if DEBUG: 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /dist 3 | *.egg-info 4 | build/ 5 | _build/ 6 | *.pyc 7 | /.coverage 8 | /.tox 9 | /*.xml 10 | /htmlcov 11 | MANIFEST 12 | /bin 13 | .Python 14 | /include 15 | /lib 16 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | python: 3 | version: 3.8 4 | install: 5 | - method: pip 6 | path: . 7 | extra_requirements: 8 | - docs 9 | sphinx: 10 | fail_on_warning: true 11 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changes 2 | ======= 3 | 4 | 2.0.1 (11 December 2020) 5 | ------------------------ 6 | 7 | - Use the README as the long description on PyPI. 8 | 9 | 2.0.0 (11 December 2020) 10 | ------------------------ 11 | 12 | - Remove support for anything other than ``.xls`` files. 13 | - Remove support for ``psyco``. 14 | - Change the default encoding used when no ``CODEPAGE`` record can be found 15 | from ``ascii`` to ``iso-8859-1``. 16 | - Add support for iterating over :class:`~xlrd.book.Book` objects. 17 | - Add support for item access from :class:`~xlrd.book.Book` objects, 18 | where integer indices and string sheet names are supported. 19 | - Non-unicode spaces are now stripped from the "last author" information. 20 | - Workbook corruption errors can now be ignored using the 21 | ``ignore_workbook_corruption`` option to :class:`~xlrd.open_workbook`. 22 | - Handle ``WRITEACCESS`` records with invalid trailing characters. 23 | - Officially support Python 3.8 and 3.9. 24 | 25 | Thanks to the following for their contributions to this release: 26 | 27 | - Jon Dufresne 28 | - Tore Lundqvist 29 | - nayyarv 30 | - Michael Davis 31 | - skonik 32 | 33 | 1.2.0 (15 December 2018) 34 | ------------------------ 35 | 36 | - Added support for Python 3.7. 37 | - Added optional support for defusedxml to help mitigate exploits. 38 | - Automatically convert ``~`` in file paths to the current user's home 39 | directory. 40 | - Removed ``examples`` directory from the installed package. They are still 41 | available in the source distribution. 42 | - Fixed ``time.clock()`` deprecation warning. 43 | 44 | 1.1.0 (22 August 2017) 45 | ---------------------- 46 | 47 | - Fix for parsing of merged cells containing a single cell reference in xlsx 48 | files. 49 | 50 | - Fix for "invalid literal for int() with base 10: 'true'" when reading some 51 | xlsx files. 52 | 53 | - Make xldate_as_datetime available to import direct from xlrd. 54 | 55 | - Build universal wheels. 56 | 57 | - Sphinx documentation. 58 | 59 | - Document the problem with XML vulnerabilities in xlsx files and mitigation 60 | measures. 61 | 62 | - Fix :class:`NameError` on ``has_defaults is not defined``. 63 | 64 | - Some whitespace and code style tweaks. 65 | 66 | - Make example in README compatible with both Python 2 and 3. 67 | 68 | - Add default value for cells containing errors that causeed parsing of some 69 | xlsx files to fail. 70 | 71 | - Add Python 3.6 to the list of supported Python versions, drop 3.3 and 2.6. 72 | 73 | - Use generator expressions to avoid unnecessary lists in memory. 74 | 75 | - Document unicode encoding used in Excel files from Excel 97 onwards. 76 | 77 | - Report hyperlink errors in R1C1 syntax. 78 | 79 | Thanks to the following for their contributions to this release: 80 | 81 | - icereval@gmail.com 82 | - Daniel Rech 83 | - Ville Skyttä 84 | - Yegor Yefremov 85 | - Maxime Lorant 86 | - Alexandr N Zamaraev 87 | - Zhaorong Ma 88 | - Jon Dufresne 89 | - Chris McIntyre 90 | - coltleese@gmail.com 91 | - Ivan Masá 92 | 93 | 1.0.0 (2 June 2016) 94 | ------------------- 95 | 96 | - Official support, such as it is, is now for 2.6, 2.7, 3.3+ 97 | 98 | - Fixes a bug in looking up non-lowercase sheet filenames by ensuring that the 99 | sheet targets are transformed the same way as the component_names dict keys. 100 | 101 | - Fixes a bug for ``ragged_rows=False`` when merged cells increases the number 102 | of columns in the sheet. This requires all rows to be extended to ensure equal 103 | row lengths that match the number of columns in the sheet. 104 | 105 | - Fixes to enable reading of SAP-generated .xls files. 106 | 107 | - support BIFF4 files with missing FORMAT records. 108 | 109 | - support files with missing WINDOW2 record. 110 | 111 | - Empty cells are now always unicode strings, they were a bytestring on 112 | Python 2 and a unicode string on Python 3. 113 | 114 | - Fix for ```` ``inlineStr`` attribute without ```` child. 115 | 116 | - Fix for a zoom of ``None`` causing problems on Python 3. 117 | 118 | - Fix parsing of bad dimensions. 119 | 120 | - Fix xlsx sheet to comments relationship. 121 | 122 | Thanks to the following for their contributions to this release: 123 | 124 | - Lars-Erik Hannelius 125 | - Deshi Xiao 126 | - Stratos Moro 127 | - Volker Diels-Grabsch 128 | - John McNamara 129 | - Ville Skyttä 130 | - Patrick Fuller 131 | - Dragon Dave McKee 132 | - Gunnlaugur Þór Briem 133 | 134 | 0.9.4 (14 July 2015) 135 | -------------------- 136 | 137 | - Automated tests are now run on Python 3.4 138 | 139 | - Use ``ElementTree.iter()`` if available, instead of the deprecated 140 | ``getiterator()`` when parsing xlsx files. 141 | 142 | - Fix #106 : Exception Value: unorderable types: Name() < Name() 143 | 144 | - Create row generator expression with Sheet.get_rows() 145 | 146 | - Fix for forward slash file separator and lowercase names within xlsx 147 | internals. 148 | 149 | Thanks to the following for their contributions to this release: 150 | 151 | - Corey Farwell 152 | - Jonathan Kamens 153 | - Deepak N 154 | - Brandon R. Stoner 155 | - John McNamara 156 | 157 | 0.9.3 (8 Apr 2014) 158 | ------------------ 159 | 160 | - Github issue #49 161 | 162 | - Github issue #64 - skip meaningless chunk of 4 zero bytes between two 163 | otherwise-valid BIFF records 164 | 165 | - Github issue #61 - fix updating of escapement attribute of Font objects read 166 | from workbooks. 167 | 168 | - Implemented ``Sheet.visibility`` for xlsx files 169 | 170 | - Ignore anchors (``$``) in cell references 171 | 172 | - Dropped support for Python 2.5 and earlier, Python 2.6 is now the earliest 173 | Python release supported 174 | 175 | - Read xlsx merged cell elements. 176 | 177 | - Read cell comments in .xlsx files. 178 | 179 | - Added xldate_as_datetime() function to convert from Excel 180 | serial date/time to datetime.datetime object. 181 | 182 | Thanks to the following for their contributions to this release: 183 | 184 | - John Machin 185 | - Caleb Epstein 186 | - Martin Panter 187 | - John McNamara 188 | - Gunnlaugur Þór Briem 189 | - Stephen Lewis 190 | 191 | 192 | 0.9.2 (9 Apr 2013) 193 | ------------------ 194 | 195 | - Fix some packaging issues that meant docs and examples were missing from the tarball. 196 | 197 | - Fixed a small but serious regression that caused problems opening .xlsx files. 198 | 199 | 0.9.1 (5 Apr 2013) 200 | ------------------ 201 | 202 | - Many fixes bugs in Python 3 support. 203 | - Fix bug where ragged rows needed fixing when formatting info was being parsed. 204 | - Improved handling of aberrant Excel 4.0 Worksheet files. 205 | - Various bug fixes. 206 | - Simplify a lot of the distribution packaging. 207 | - Remove unused and duplicate imports. 208 | 209 | Thanks to the following for their contributions to this release: 210 | 211 | - Thomas Kluyver 212 | 213 | 0.9.0 (31 Jan 2013) 214 | ------------------- 215 | 216 | - Support for Python 3.2+ 217 | - Many new unit test added. 218 | - Continuous integration tests are now run. 219 | - Various bug fixes. 220 | 221 | Special thanks to Thomas Kluyver and Martin Panter for their work on 222 | Python 3 compatibility. 223 | 224 | Thanks to Manfred Moitzi for re-licensing his unit tests so we could include 225 | them. 226 | 227 | Thanks to the following for their contributions to this release: 228 | 229 | - "holm" 230 | - Victor Safronovich 231 | - Ross Jones 232 | 233 | 0.8.0 (22 Aug 2012) 234 | ------------------- 235 | 236 | - More work-arounds for broken source files. 237 | - Support for reading .xlsx files. 238 | - Drop support for Python 2.5 and older. 239 | 240 | 0.7.8 (7 June 2012) 241 | ------------------- 242 | 243 | - Ignore superfluous zero bytes at end of xls OBJECT record. 244 | - Fix assertion error when reading file with xlwt-written bitmap. 245 | 246 | 0.7.7 (13 Apr 2012) 247 | ------------------- 248 | 249 | - More packaging changes, this time to support 2to3. 250 | 251 | 0.7.6 (3 Apr 2012) 252 | ------------------ 253 | 254 | - Fix more packaging issues. 255 | 256 | 0.7.5 (3 Apr 2012) 257 | ------------------ 258 | - Fix packaging issue that missed ``version.txt`` from the distributions. 259 | 260 | 0.7.4 (2 Apr 2012) 261 | ------------------ 262 | 263 | - More tolerance of out-of-spec files. 264 | - Fix bugs reading long text formula results. 265 | 266 | 0.7.3 (28 Feb 2012) 267 | ------------------- 268 | 269 | - Packaging and documentation updates. 270 | 271 | 0.7.2 (21 Feb 2012) 272 | ------------------- 273 | 274 | - Tolerant handling of files with extra zero bytes at end of NUMBER record. 275 | Sample provided by Jan Kraus. 276 | - Added access to cell notes/comments. Many cross-references added to Sheet 277 | class docs. 278 | - Added code to extract hyperlink (HLINK) records. Based on a patch supplied by 279 | John Morrisey. 280 | - Extraction of rich text formatting info based on code supplied by 281 | Nathan van Gheem. 282 | - added handling of BIFF2 WINDOW2 record. 283 | - Included modified version of page breaks patch from Sam Listopad. 284 | - Added reading of the PANE record. 285 | - Reading SCL record. New attribute ``Sheet.scl_mag_factor``. 286 | - Lots of bug fixes. 287 | - Added ``ragged_rows`` functionality. 288 | 289 | 0.7.1 (31 May 2009) 290 | ------------------- 291 | 292 | - Backed out "slash'n'burn" of sheet resources in unload_sheet(). 293 | Fixed problem with STYLE records on some Mac Excel files. 294 | - quieten warnings 295 | - Integrated on_demand patch by Armando Serrano Lombillo 296 | 297 | 0.7.0 (11 March 2009) 298 | --------------------- 299 | 300 | + colname utility function now supports more than 256 columns. 301 | + Fix bug where BIFF record type 0x806 was being regarded as a formula 302 | opcode. 303 | + Ignore PALETTE record when formatting_info is false. 304 | + Tolerate up to 4 bytes trailing junk on PALETTE record. 305 | + Fixed bug in unused utility function xldate_from_date_tuple which 306 | affected some years after 2099. 307 | + Added code for inspecting as-yet-unused record types: FILEPASS, TXO, 308 | NOTE. 309 | + Added inspection code for add_in function calls. 310 | + Added support for unnumbered biff_dump (better for doing diffs). 311 | + ignore distutils cruft 312 | + Avoid assertion error in compdoc when -1 used instead of -2 for 313 | first_SID of empty SCSS 314 | + Make version numbers match up. 315 | + Enhanced recovery from out-of-order/missing/wrong CODEPAGE record. 316 | + Added Name.area2d convenience method. 317 | + Avoided some checking of XF info when formatting_info is false. 318 | + Minor changes in preparation for XLSX support. 319 | + remove duplicate files that were out of date. 320 | + Basic support for Excel 2.0 321 | + Decouple Book init & load. 322 | + runxlrd: minor fix for xfc. 323 | + More Excel 2.x work. 324 | + is_date_format() tweak. 325 | + Better detection of IronPython. 326 | + Better error message (including first 8 bytes of file) when file is 327 | not in a supported format. 328 | + More BIFF2 formatting: ROW, COLWIDTH, and COLUMNDEFAULT records; 329 | + finished stage 1 of XF records. 330 | + More work on supporting BIFF2 (Excel 2.x) files. 331 | + Added support for Excel 2.x (BIFF2) files. Data only, no formatting 332 | info. Alpha. 333 | + Wasn't coping with EXTERNSHEET record followed by CONTINUE 334 | record(s). 335 | + Allow for BIFF2/3-style FORMAT record in BIFF4/8 file 336 | + Avoid crash when zero-length Unicode string missing options byte. 337 | + Warning message if sector sizes are extremely large. 338 | + Work around corrupt STYLE record 339 | + Added missing entry for blank cell type to ctype_text 340 | + Added "fonts" command to runxlrd script 341 | + Warning: style XF whose parent XF index != 0xFFF 342 | + Logfile arg wasn't being passed from open_workbook to 343 | compdoc.CompDoc. 344 | 345 | 346 | 0.6.1 (10 June 2007) 347 | --------------------- 348 | 349 | + Version number updated to 0.6.1 350 | + Documented runxlrd.py commands in its usage message. Changed 351 | commands: dump to biff_dump, count_records to biff_count. 352 | 353 | 354 | 0.6.1a5 355 | ------- 356 | 357 | + Bug fixed: Missing "<" in a struct.unpack call means can't open 358 | files on bigendian platforms. Discovered by "Mihalis". 359 | + Removed antique undocumented Book.get_name_dict method and 360 | experimental "trimming" facility. 361 | + Meaningful exception instead of IndexError if a SAT (sector 362 | allocation table) is corrupted. 363 | + If no CODEPAGE record in pre-8.0 file, assume ascii and keep going 364 | (instead of raising exception). 365 | 366 | 367 | 0.6.1a4 368 | ------- 369 | 370 | + At least one source of XLS files writes parent style XF records 371 | *after* the child cell XF records that refer to them, triggering 372 | IndexError in 0.5.2 and AssertionError in later versions. Reported 373 | with sample file by Todd O'Bryan. Fixed by changing to two-pass 374 | processing of XF records. 375 | + Formatting info in pre-BIFF8 files: Ensured appropriate defaults and 376 | lossless conversions to make the info BIFF8-compatible. Fixed bug in 377 | extracting the "used" flags. 378 | + Fixed problems discovered with opening test files from Planmaker 379 | 2006 (http://www.softmaker.com/english/ofwcomp_en.htm): (1) Four files 380 | have reduced size of PALETTE record (51 and 32 colours; Excel writes 381 | 56 always). xlrd now emits a NOTE to the logfile and continues. (2) 382 | FORMULA records use the Excel 2.x record code 0x0021 instead of 383 | 0x0221. xlrd now continues silently. (3) In two files, at the OLE2 384 | compound document level, the internal directory says that the length 385 | of the Short-Stream Container Stream is 16384 bytes, but the actual 386 | contents are 11264 and 9728 bytes respectively. xlrd now emits a 387 | WARNING to the logfile and continues. 388 | + After discussion with Daniel Rentz, the concept of two lists of XF 389 | (eXtended Format) objects (raw_xf_list and computed_xf_list) has been 390 | abandoned. There is now a single list, called xf_list 391 | 392 | 393 | 0.6.1a3 394 | ------- 395 | 396 | + Added Book.sheets ... for sheetx, sheet in enumerate(book.sheets): 397 | + Formatting info: extraction of sheet-level flags from WINDOW2 398 | record, and sheet.visibility from BOUNDSHEET record. Added Macintosh- 399 | only Font attributes "outline" and "shadow'. 400 | 401 | 402 | 0.6.1a2 403 | ------- 404 | 405 | + Added extraction of merged cells info. 406 | + pyExcelerator uses "general" instead of "General" for the generic 407 | "number format". Worked around. 408 | + Crystal Reports writes "WORKBOOK" in the OLE2 Compound Document 409 | directory instead of "Workbook". Changed to case-insensitive directory 410 | search. Reported by Vic Simkus. 411 | 412 | 413 | 0.6.1a1 (18 Dec 2006) 414 | --------------------- 415 | 416 | + Added formatting information for cells (font, "number format", 417 | background, border, alignment and protection) and rows/columns 418 | (height/width etc). To save memory and time for those who don't need 419 | it, this information is extracted only if formatting_info=1 is 420 | supplied to the open_workbook() function. The cell records BLANK and 421 | MULBLANKS which contain no data, only formatting information, will 422 | continue to be ignored in the default (no formatting info) case. 423 | + Ralph Heimburger reported a problem with xlrd being intolerant about 424 | an Excel 4.0 file (created by "some web app") with a DIMENSIONS record 425 | that omitted Microsoft's usual padding with 2 unused bytes. Fixed. 426 | 427 | 428 | 0.6.0a4 (not released) 429 | ---------------------- 430 | 431 | + Added extraction of human-readable formulas from NAME records. 432 | + Worked around OOo Calc writing 9-byte BOOLERR records instead of 8. 433 | Reported by Rory Campbell-Lange. 434 | + This history file converted to descending chronological order and 435 | HTML format. 436 | 437 | 438 | 0.6.0a3 (19 Sept 2006) 439 | ---------------------- 440 | 441 | + Names: minor bugfixes; added script xlrdnameAPIdemo.py 442 | + ROW records were being used as additional hints for sizing memory 443 | requirements. In some files the ROW records overstate the number of 444 | used columns, and/or there are ROW records for rows that have no data 445 | in them. This would cause xlrd to report sheet.ncols and/or 446 | sheet.nrows as larger than reasonably expected. Change: ROW records 447 | are ignored. The number of columns/rows is based solely on the highest 448 | column/row index seen in non-empty data records. Empty data records 449 | (types BLANK and MULBLANKS) which contain no data, only formatting 450 | information, have always been ignored, and this will continue. 451 | Consequence: trailing rows and columns which contain only empty cells 452 | will vanish. 453 | 454 | 455 | 0.6.0a2 (13 Sept 2006) 456 | ---------------------- 457 | 458 | 459 | + Fixed a bug reported by Rory Campbell-Lange.: "open failed"; 460 | incorrect assumptions about the layout of array formulas which return 461 | strings. 462 | + Further work on defined names, especially the API. 463 | 464 | 465 | 0.6.0a1 (8 Sept 2006) 466 | --------------------- 467 | 468 | + Sheet objects have two new convenience methods: col_values(colx, 469 | start_rowx=0, end_rowx=None) and the corresponding col_types. 470 | Suggested by Dennis O'Brien. 471 | + BIFF 8 file missing its CODEPAGE record: xlrd will now assume 472 | utf_16_le encoding (the only possibility) and keep going. 473 | + Older files missing a CODEPAGE record: an exception will be raised. 474 | Thanks to Sergey Krushinsky for a sample file. The open_workbook() 475 | function has a new argument (encoding_override) which can be used if 476 | the CODEPAGE record is missing or incorrect (for example, 477 | codepage=1251 but the data is actually encoded in koi8_r). The 478 | runxlrd.py script takes a corresponding -e argument, for example -e 479 | cp1251 480 | + Further work done on parsing "number formats". Thanks to Chris 481 | Withers for the ``"General_)"`` example. 482 | + Excel 97 introduced the concept of row and column labels, defined by 483 | Insert > Name > Labels. The ranges containing the labels are now 484 | exposed as the Sheet attributes row_label_ranges and col_label_ranges. 485 | + The major effort in this 0.6.0 release has been the provision of 486 | access to named cell ranges and named constants (Excel: 487 | Insert/Name/Define). Juan C. Mendez provided very useful real-world 488 | sample files. 489 | 490 | 491 | 0.5.3a1 (24 May 2006) 492 | --------------------- 493 | 494 | + John Popplewell and Richard Sharp provided sample files which caused 495 | any reliance at all on DIMENSIONS records and ROW records to be 496 | abandoned. 497 | + If the file size is not a whole number of OLE sectors, a warning 498 | message is logged. Previously this caused an exception to be raised. 499 | 500 | 501 | 0.5.2 (14 March 2006) 502 | --------------------- 503 | 504 | + public release 505 | + Updated version numbers, README, HISTORY. 506 | 507 | 508 | 0.5.2a3 (13 March 2006) 509 | ----------------------- 510 | 511 | + Gnumeric writes user-defined formats with format codes starting at 512 | 50 instead of 164; worked around. 513 | + Thanks to Didrik Pinte for reporting the need for xlrd to be more 514 | tolerant of the idiosyncracies of other software, for supplying sample 515 | files, and for performing alpha testing. 516 | + '_' character in a format should be treated like an escape 517 | character; fixed. 518 | + An "empty" formula result means a zero-length string, not an empty 519 | cell! Fixed. 520 | 521 | 522 | 0.5.2a2 (9 March 2006) 523 | ---------------------- 524 | 525 | + Found that Gnumeric writes all DIMENSIONS records with nrows and 526 | ncols each 1 less than they should be (except when it clamps ncols at 527 | 256!), and pyXLwriter doesn't write ROW records. Cell memory pre- 528 | allocation was generalised to use ROW records if available with fall- 529 | back to DIMENSIONS records. 530 | 531 | 532 | 0.5.2a1 (6 March 2006) 533 | ---------------------- 534 | 535 | 536 | + pyXLwriter writes DIMENSIONS record with antique opcode 0x0000 537 | instead of 0x0200; worked around 538 | + A file written by Gnumeric had zeroes in DIMENSIONS record but data 539 | in cell A1; worked around 540 | 541 | 542 | 0.5.1 (18 Feb 2006) 543 | -------------------- 544 | 545 | + released to Journyx 546 | + Python 2.1 mmap requires file to be opened for update access. Added 547 | fall-back to read-only access without mmap if 2.1 open fails because 548 | "permission denied". 549 | 550 | 551 | 0.5 (7 Feb 2006) 552 | ---------------- 553 | 554 | + released to Journyx 555 | + Now works with Python 2.1. Backporting to Python 2.1 was partially 556 | funded by Journyx - provider of timesheet and project accounting 557 | solutions (http://journyx.com/) 558 | + open_workbook() can be given the contents of a file instead of its 559 | name. Thanks to Remco Boerma for the suggestion. 560 | + New module attribute __VERSION__ (as a string; for example "0.5") 561 | + Minor enhancements to classification of formats as date or not-date. 562 | + Added warnings about files with inconsistent OLE compound document 563 | structures. Thanks to Roman V. Kiseliov (author of pyExcelerator) for 564 | the tip-off. 565 | 566 | 567 | 0.4a1, (7 Sept 2005) 568 | -------------------- 569 | 570 | + released to Laurent T. 571 | + Book and sheet objects can now be pickled and unpickled. Instead of 572 | reading a large spreadsheet multiple times, consider pickling it once 573 | and loading the saved pickle; can be much faster. Thanks to Laurent 574 | Thioudellet for the enhancement request. 575 | + Using the mmap module can be turned off. But you would only do that 576 | for benchmarking purposes. 577 | + Handling NUMBER records has been made faster 578 | 579 | 580 | 0.3a1 (15 May 2005) 581 | ------------------- 582 | 583 | - first public release 584 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | There are two licenses associated with xlrd. This one relates to the bulk of 2 | the work done on the library:: 3 | 4 | Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any 18 | contributors may be used to endorse or promote products derived from this 19 | software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 25 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 31 | THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | This one covers some earlier work:: 34 | 35 | /*- 36 | * Copyright (c) 2001 David Giffin. 37 | * All rights reserved. 38 | * 39 | * Based on the the Java version: Andrew Khan Copyright (c) 2000. 40 | * 41 | * 42 | * Redistribution and use in source and binary forms, with or without 43 | * modification, are permitted provided that the following conditions 44 | * are met: 45 | * 46 | * 1. Redistributions of source code must retain the above copyright 47 | * notice, this list of conditions and the following disclaimer. 48 | * 49 | * 2. Redistributions in binary form must reproduce the above copyright 50 | * notice, this list of conditions and the following disclaimer in 51 | * the documentation and/or other materials provided with the 52 | * distribution. 53 | * 54 | * 3. All advertising materials mentioning features or use of this 55 | * software must display the following acknowledgment: 56 | * "This product includes software developed by 57 | * David Giffin ." 58 | * 59 | * 4. Redistributions of any form whatsoever must retain the following 60 | * acknowledgment: 61 | * "This product includes software developed by 62 | * David Giffin ." 63 | * 64 | * THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY 65 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 67 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR 68 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 69 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 70 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 71 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 73 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 75 | * OF THE POSSIBILITY OF SUCH DAMAGE. 76 | */ 77 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.rst 2 | include LICENSE 3 | include README.md 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | xlrd 2 | ==== 3 | 4 | |Build Status|_ |Coverage Status|_ |Documentation|_ |PyPI version|_ 5 | 6 | .. |Build Status| image:: https://circleci.com/gh/python-excel/xlrd/tree/master.svg?style=shield 7 | .. _Build Status: https://circleci.com/gh/python-excel/xlrd/tree/master 8 | 9 | .. |Coverage Status| image:: https://codecov.io/gh/python-excel/xlrd/branch/master/graph/badge.svg?token=lNSqwBBbvk 10 | .. _Coverage Status: https://codecov.io/gh/python-excel/xlrd 11 | 12 | .. |Documentation| image:: https://readthedocs.org/projects/xlrd/badge/?version=latest 13 | .. _Documentation: http://xlrd.readthedocs.io/en/latest/?badge=latest 14 | 15 | .. |PyPI version| image:: https://badge.fury.io/py/xlrd.svg 16 | .. _PyPI version: https://badge.fury.io/py/xlrd 17 | 18 | 19 | xlrd is a library for reading data and formatting information from Excel 20 | files in the historical ``.xls`` format. 21 | 22 | .. warning:: 23 | 24 | This library will no longer read anything other than ``.xls`` files. For 25 | alternatives that read newer file formats, please see http://www.python-excel.org/. 26 | 27 | The following are also not supported but will safely and reliably be ignored: 28 | 29 | * Charts, Macros, Pictures, any other embedded object, **including** embedded worksheets. 30 | * VBA modules 31 | * Formulas, but results of formula calculations are extracted. 32 | * Comments 33 | * Hyperlinks 34 | * Autofilters, advanced filters, pivot tables, conditional formatting, data validation 35 | 36 | Password-protected files are not supported and cannot be read by this library. 37 | 38 | Quick start: 39 | 40 | .. code-block:: bash 41 | 42 | pip install xlrd 43 | 44 | .. code-block:: python 45 | 46 | import xlrd 47 | book = xlrd.open_workbook("myfile.xls") 48 | print("The number of worksheets is {0}".format(book.nsheets)) 49 | print("Worksheet name(s): {0}".format(book.sheet_names())) 50 | sh = book.sheet_by_index(0) 51 | print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols)) 52 | print("Cell D30 is {0}".format(sh.cell_value(rowx=29, colx=3))) 53 | for rx in range(sh.nrows): 54 | print(sh.row(rx)) 55 | 56 | From the command line, this will show the first, second and last rows of each sheet in each file: 57 | 58 | .. code-block:: bash 59 | 60 | python PYDIR/scripts/runxlrd.py 3rows *blah*.xls 61 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | epub: 76 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 77 | @echo 78 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 79 | 80 | latex: 81 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 82 | @echo 83 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 84 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 85 | "(use \`make latexpdf' here to do that automatically)." 86 | 87 | latexpdf: 88 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 89 | @echo "Running LaTeX files through pdflatex..." 90 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 91 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 92 | 93 | text: 94 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 95 | @echo 96 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 97 | 98 | man: 99 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 100 | @echo 101 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 102 | 103 | texinfo: 104 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 105 | @echo 106 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 107 | @echo "Run \`make' in that directory to run these through makeinfo" \ 108 | "(use \`make info' here to do that automatically)." 109 | 110 | info: 111 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 112 | @echo "Running Texinfo files through makeinfo..." 113 | make -C $(BUILDDIR)/texinfo info 114 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 115 | 116 | gettext: 117 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 118 | @echo 119 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 120 | 121 | changes: 122 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 123 | @echo 124 | @echo "The overview file is in $(BUILDDIR)/changes." 125 | 126 | linkcheck: 127 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 128 | @echo 129 | @echo "Link check complete; look for any errors in the above output " \ 130 | "or in $(BUILDDIR)/linkcheck/output.txt." 131 | 132 | doctest: 133 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 134 | @echo "Testing of doctests in the sources finished, look at the " \ 135 | "results in $(BUILDDIR)/doctest/output.txt." 136 | -------------------------------------------------------------------------------- /docs/acknowledgements.rst: -------------------------------------------------------------------------------- 1 | Acknowledgements 2 | ================ 3 | 4 | Many thanks to to John Machin for originally writing :mod:`xlrd` and tirelessly 5 | supporting it for many years before retiring. 6 | 7 | * This package started life as a translation from C into Python of parts of a utility called "xlreader" developed by David Giffin. "This product includes software developed by David Giffin ." 8 | * OpenOffice.org has truly excellent documentation of the Microsoft Excel file formats and Compound Document file format, authored by Daniel Rentz. See http://sc.openoffice.org 9 | * U+5F20 U+654F: over a decade of inspiration, support, and interesting decoding opportunities. 10 | * Ksenia Marasanova: sample Macintosh and non-Latin1 files, alpha testing 11 | * Backporting to Python 2.1 was partially funded by Journyx - provider of timesheet and project accounting solutions (http://journyx.com/). 12 | * Provision of formatting information in version 0.6.1 was funded by `Simplistix Ltd`__. 13 | 14 | __ http://www.simplistix.co.uk 15 | 16 | Development of this package would not have been possible without the document 17 | OpenOffice.org's Documentation of the Microsoft Excel File Format" 18 | ("OOo docs" for short). 19 | The latest version is available from OpenOffice.org in 20 | `PDF format`__ and `ODT format`__. 21 | Small portions of the OOo docs are reproduced in this 22 | document. A study of the OOo docs is recommended for those who wish a 23 | deeper understanding of the Excel file layout than the xlrd docs can provide. 24 | 25 | __ http://sc.openoffice.org/excelfileformat.pdf 26 | 27 | __ http://sc.openoffice.org/excelfileformat.odt 28 | 29 | Backporting to Python 2.1 was partially funded by 30 | `Journyx - provider of timesheet and project accounting solutions`__. 31 | 32 | __ http://journyx.com/ 33 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | xlrd 5 | ---- 6 | 7 | .. automodule:: xlrd 8 | :members: 9 | 10 | xlrd.biffh 11 | ---------- 12 | 13 | .. automodule:: xlrd.biffh 14 | :members: 15 | 16 | xlrd.book 17 | --------- 18 | 19 | .. automodule:: xlrd.book 20 | :members: 21 | 22 | xlrd.compdoc 23 | ------------ 24 | 25 | .. automodule:: xlrd.compdoc 26 | :members: 27 | 28 | xlrd.formatting 29 | --------------- 30 | 31 | .. automodule:: xlrd.formatting 32 | :members: 33 | 34 | xlrd.formula 35 | ------------- 36 | 37 | .. automodule:: xlrd.formula 38 | :members: 39 | 40 | xlrd.sheet 41 | ---------- 42 | 43 | .. currentmodule:: xlrd.sheet 44 | 45 | .. autoclass:: xlrd.sheet.Sheet 46 | :members: 47 | :exclude-members: gcw, col 48 | 49 | .. method:: col(colx) 50 | 51 | Returns a sequence of the :class:`Cell` objects in the given column. 52 | 53 | .. autoattribute:: xlrd.sheet.Sheet.gcw 54 | :annotation: 55 | 56 | 57 | .. automodule:: xlrd.sheet 58 | :members: 59 | :exclude-members: Sheet 60 | 61 | xlrd.xldate 62 | ----------- 63 | 64 | .. currentmodule:: xlrd.xldate 65 | 66 | .. automodule:: xlrd.xldate 67 | :members: 68 | -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | 2 | .. currentmodule:: xlrd 3 | 4 | .. include:: ../CHANGELOG.rst 5 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | 4 | from xlrd.info import __VERSION__ 5 | 6 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 7 | 8 | intersphinx_mapping = {'http://docs.python.org': None} 9 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx'] 10 | source_suffix = '.rst' 11 | master_doc = 'index' 12 | project = u'xlrd' 13 | copyright = ( 14 | '2005-2019 Stephen John Machin, Lingfo Pty Ltd. ' 15 | '2019-%s Chris Withers' 16 | ) % datetime.datetime.now().year 17 | version = release = __VERSION__ 18 | exclude_patterns = ['_build'] 19 | pygments_style = 'sphinx' 20 | 21 | if on_rtd: 22 | html_theme = 'default' 23 | else: 24 | html_theme = 'classic' 25 | 26 | htmlhelp_basename = project+'doc' 27 | intersphinx_mapping = {'python': ('http://docs.python.org', None)} 28 | 29 | autodoc_member_order = 'bysource' 30 | -------------------------------------------------------------------------------- /docs/dates.rst: -------------------------------------------------------------------------------- 1 | Dates in Excel spreadsheets 2 | =========================== 3 | 4 | .. currentmodule:: xlrd.xldate 5 | 6 | In reality, there are no such things. What you have are floating point 7 | numbers and pious hope. 8 | There are several problems with Excel dates: 9 | 10 | 1. Dates are not stored as a separate data type; they are stored as 11 | floating point numbers and you have to rely on: 12 | 13 | - the "number format" applied to them in Excel and/or 14 | - knowing which cells are supposed to have dates in them. 15 | 16 | This module helps with the former by inspecting the 17 | format that has been applied to each number cell; 18 | if it appears to be a date format, the cell 19 | is classified as a date rather than a number. 20 | 21 | Feedback on this feature, especially from non-English-speaking locales, 22 | would be appreciated. 23 | 24 | 2. Excel for Windows stores dates by default as the number of 25 | days (or fraction thereof) since ``1899-12-31T00:00:00``. Excel for 26 | Macintosh uses a default start date of ``1904-01-01T00:00:00``. 27 | 28 | The date system can be changed in Excel on a per-workbook basis (for example: 29 | Tools -> Options -> Calculation, tick the "1904 date system" box). 30 | This is of course a bad idea if there are already dates in the 31 | workbook. There is no good reason to change it even if there are no 32 | dates in the workbook. 33 | 34 | Which date system is in use is recorded in the 35 | workbook. A workbook transported from Windows to Macintosh (or vice 36 | versa) will work correctly with the host Excel. 37 | 38 | When using this package's :func:`xldate_as_tuple` function to convert numbers 39 | from a workbook, you must use the :attr:`~xlrd.Book.datemode` attribute of 40 | the :class:`~xlrd.Book` object. If you guess, or make a judgement depending 41 | on where you believe the workbook was created, you run the risk of being 1462 42 | days out of kilter. 43 | 44 | Reference: 45 | https://support.microsoft.com/en-us/help/180162/xl-the-1900-date-system-vs.-the-1904-date-system 46 | 47 | 48 | 3. The Excel implementation of the Windows-default 1900-based date system 49 | works on the incorrect premise that 1900 was a leap year. It interprets the 50 | number 60 as meaning ``1900-02-29``, which is not a valid date. 51 | 52 | Consequently, any number less than 61 is ambiguous. For example, is 59 the 53 | result of ``1900-02-28`` entered directly, or is it ``1900-03-01`` minus 2 54 | days? 55 | 56 | The OpenOffice.org Calc program "corrects" the Microsoft problem; 57 | entering ``1900-02-27`` causes the number 59 to be stored. 58 | Save as an XLS file, then open the file with Excel and you'll see 59 | ``1900-02-28`` displayed. 60 | 61 | Reference: https://support.microsoft.com/en-us/help/214326/excel-incorrectly-assumes-that-the-year-1900-is-a-leap-year 62 | 63 | 4. The Macintosh-default 1904-based date system counts ``1904-01-02`` as day 1 64 | and ``1904-01-01`` as day zero. Thus any number such that 65 | ``(0.0 <= number < 1.0)`` is ambiguous. Is 0.625 a time of day 66 | (``15:00:00``), independent of the calendar, or should it be interpreted as 67 | an instant on a particular day (``1904-01-01T15:00:00``)? 68 | 69 | The functions in :mod:`~xlrd.xldate` take the view that such a number is a 70 | calendar-independent time of day (like Python's :class:`datetime.time` type) 71 | for both date systems. This is consistent with more recent Microsoft 72 | documentation. For example, the help file for Excel 2002, which says that the 73 | first day in the 1904 date system is ``1904-01-02``. 74 | 75 | 5. Usage of the Excel ``DATE()`` function may leave strange dates in a 76 | spreadsheet. Quoting the help file in respect of the 1900 date system:: 77 | 78 | If year is between 0 (zero) and 1899 (inclusive), 79 | Excel adds that value to 1900 to calculate the year. 80 | For example, DATE(108,1,2) returns January 2, 2008 (1900+108). 81 | 82 | This gimmick, semi-defensible only for arguments up to 99 and only in the 83 | pre-Y2K-awareness era, means that ``DATE(1899, 12, 31)`` is interpreted as 84 | ``3799-12-31``. 85 | 86 | For further information, please refer to the documentation for the 87 | functions in :mod:`~xlrd.xldate`. 88 | -------------------------------------------------------------------------------- /docs/development.rst: -------------------------------------------------------------------------------- 1 | Development 2 | =========== 3 | 4 | .. highlight:: bash 5 | 6 | If you wish to contribute to this project, then you should fork the 7 | repository found here: 8 | 9 | https://github.com/python-excel/xlrd 10 | 11 | Once that has been done and you have a checkout, you can follow these 12 | instructions to perform various development tasks: 13 | 14 | Setting up a virtualenv 15 | ----------------------- 16 | 17 | The recommended way to set up a development environment is to turn 18 | your checkout into a virtualenv and then install the package in 19 | editable form as follows:: 20 | 21 | $ virtualenv . 22 | $ bin/pip install -e .[test] 23 | 24 | Running the tests 25 | ----------------- 26 | 27 | Once you've set up a virtualenv, the tests can be run as follows:: 28 | 29 | $ source bin/activate 30 | $ pytest 31 | 32 | Building the documentation 33 | -------------------------- 34 | 35 | The Sphinx documentation is built by doing the following, having activated 36 | the virtualenv above, from the directory containing setup.py:: 37 | 38 | $ source bin/activate 39 | $ cd docs 40 | $ make html 41 | 42 | To check that the description that will be used on PyPI renders properly, 43 | do the following:: 44 | 45 | $ python setup.py --long-description | rst2html.py > desc.html 46 | 47 | Making a release 48 | ---------------- 49 | 50 | To make a release, just update the version in ``xlrd.info.__VERSION__``, update the change log 51 | and push to https://github.com/python-excel/xlrd 52 | and Carthorse should take care of the rest. 53 | -------------------------------------------------------------------------------- /docs/formatting.rst: -------------------------------------------------------------------------------- 1 | Formatting information in Excel Spreadsheets 2 | ============================================ 3 | 4 | Introduction 5 | ------------ 6 | 7 | This collection of features, new in xlrd version 0.6.1, is intended 8 | to provide the information needed to: 9 | 10 | - display/render spreadsheet contents (say) on a screen or in a PDF file 11 | - copy spreadsheet data to another file without losing the ability to 12 | display/render it. 13 | 14 | .. _palette: 15 | 16 | The Palette; Colour Indexes 17 | --------------------------- 18 | 19 | A colour is represented in Excel as a ``(red, green, blue)`` ("RGB") tuple 20 | with each component in ``range(256)``. However it is not possible to access an 21 | unlimited number of colours; each spreadsheet is limited to a palette of 64 22 | different colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). 23 | Colours are referenced by an index ("colour index") into this palette. 24 | 25 | Colour indexes 0 to 7 represent 8 fixed built-in colours: 26 | black, white, red, green, blue, yellow, magenta, and cyan. 27 | 28 | The remaining colours in the palette (8 to 63 in Excel 5.0 and later) 29 | can be changed by the user. In the Excel 2003 UI, 30 | Tools -> Options -> Color presents a palette 31 | of 7 rows of 8 colours. The last two rows are reserved for use in charts. 32 | 33 | The correspondence between this grid and the assigned 34 | colour indexes is NOT left-to-right top-to-bottom. 35 | 36 | Indexes 8 to 15 correspond to changeable 37 | parallels of the 8 fixed colours -- for example, index 7 is forever cyan; 38 | index 15 starts off being cyan but can be changed by the user. 39 | 40 | The default colour for each index depends on the file version; tables of the 41 | defaults are available in the source code. If the user changes one or more 42 | colours, a ``PALETTE`` record appears in the XLS file -- it gives the RGB values 43 | for *all* changeable 44 | indexes. 45 | 46 | Note that colours can be used in "number formats": ``[CYAN]....`` and 47 | ``[COLOR8]....`` refer to colour index 7; ``[COLOR16]....`` will produce cyan 48 | unless the user changes colour index 15 to something else. 49 | 50 | In addition, there are several "magic" colour indexes used by Excel: 51 | 52 | ``0x18`` (BIFF3-BIFF4), ``0x40`` (BIFF5-BIFF8): 53 | System window text colour for border lines (used in ``XF``, ``CF``, and 54 | ``WINDOW2`` records) 55 | 56 | ``0x19`` (BIFF3-BIFF4), ``0x41`` (BIFF5-BIFF8): 57 | System window background colour for pattern background (used in ``XF`` and 58 | ``CF`` records ) 59 | 60 | ``0x43``: 61 | System face colour (dialogue background colour) 62 | 63 | ``0x4D``: 64 | System window text colour for chart border lines 65 | 66 | ``0x4E``: 67 | System window background colour for chart areas 68 | 69 | ``0x4F``: 70 | Automatic colour for chart border lines (seems to be always Black) 71 | 72 | ``0x50``: 73 | System ToolTip background colour (used in note objects) 74 | 75 | ``0x51``: 76 | System ToolTip text colour (used in note objects) 77 | 78 | ``0x7FFF``: 79 | System window text colour for fonts (used in ``FONT`` and ``CF`` records). 80 | 81 | .. note:: 82 | ``0x7FFF`` appears to be the *default* colour index. 83 | It appears quite often in ``FONT`` records. 84 | 85 | Default Formatting 86 | ------------------ 87 | 88 | Default formatting is applied to all empty cells (those not described by a cell 89 | record): 90 | 91 | - Firstly, row default information (``ROW`` record, :class:`~xlrd.sheet.Rowinfo` 92 | class) is used if available. 93 | 94 | - Failing that, column default information (``COLINFO`` record, 95 | :class:`~xlrd.sheet.Colinfo` class) is used if available. 96 | 97 | - As a last resort the worksheet/workbook default cell format will be used; this 98 | should always be present in an Excel file, 99 | described by the ``XF`` record with the fixed index 15 (0-based). 100 | By default, it uses the worksheet/workbook default cell style, 101 | described by the very first ``XF`` record (index 0). 102 | 103 | Formatting features not included in xlrd 104 | ---------------------------------------- 105 | 106 | - Asian phonetic text (known as "ruby"), used for Japanese furigana. 107 | See OOo docs s3.4.2 (p15) 108 | 109 | - Conditional formatting. See OOo docs s5.12, s6.21 (CONDFMT record), s6.16 110 | (CF record) 111 | 112 | - Miscellaneous sheet-level and book-level items, e.g. printing layout, 113 | screen panes. 114 | 115 | - Modern Excel file versions don't keep most of the built-in 116 | "number formats" in the file; Excel loads formats according to the 117 | user's locale. Currently, xlrd's emulation of this is limited to 118 | a hard-wired table that applies to the US English locale. This may mean 119 | that currency symbols, date order, thousands separator, decimals separator, 120 | etc are inappropriate. 121 | 122 | .. note:: 123 | This does not affect users who are copying XLS 124 | files, only those who are visually rendering cells. 125 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | You may also wish to consult the `tutorial`__. 4 | 5 | __ https://github.com/python-excel/tutorial 6 | 7 | Details: 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | unicode.rst 13 | dates.rst 14 | references.rst 15 | formatting.rst 16 | on_demand.rst 17 | api.rst 18 | 19 | For details of how to get involved in development of this package, 20 | and other meta-information, please see the sections below: 21 | 22 | .. toctree:: 23 | :maxdepth: 1 24 | 25 | development.rst 26 | changes.rst 27 | acknowledgements.rst 28 | licenses.rst 29 | 30 | Indices and tables 31 | ================== 32 | 33 | * :ref:`genindex` 34 | * :ref:`modindex` 35 | * :ref:`search` 36 | -------------------------------------------------------------------------------- /docs/licenses.rst: -------------------------------------------------------------------------------- 1 | Licenses 2 | ======== 3 | 4 | .. literalinclude:: ../LICENSE 5 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "devhelp" ( 97 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished. 101 | goto end 102 | ) 103 | 104 | if "%1" == "epub" ( 105 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 106 | if errorlevel 1 exit /b 1 107 | echo. 108 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 109 | goto end 110 | ) 111 | 112 | if "%1" == "latex" ( 113 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 117 | goto end 118 | ) 119 | 120 | if "%1" == "text" ( 121 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 122 | if errorlevel 1 exit /b 1 123 | echo. 124 | echo.Build finished. The text files are in %BUILDDIR%/text. 125 | goto end 126 | ) 127 | 128 | if "%1" == "man" ( 129 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 130 | if errorlevel 1 exit /b 1 131 | echo. 132 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 133 | goto end 134 | ) 135 | 136 | if "%1" == "texinfo" ( 137 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 141 | goto end 142 | ) 143 | 144 | if "%1" == "gettext" ( 145 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 149 | goto end 150 | ) 151 | 152 | if "%1" == "changes" ( 153 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.The overview file is in %BUILDDIR%/changes. 157 | goto end 158 | ) 159 | 160 | if "%1" == "linkcheck" ( 161 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 162 | if errorlevel 1 exit /b 1 163 | echo. 164 | echo.Link check complete; look for any errors in the above output ^ 165 | or in %BUILDDIR%/linkcheck/output.txt. 166 | goto end 167 | ) 168 | 169 | if "%1" == "doctest" ( 170 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 171 | if errorlevel 1 exit /b 1 172 | echo. 173 | echo.Testing of doctests in the sources finished, look at the ^ 174 | results in %BUILDDIR%/doctest/output.txt. 175 | goto end 176 | ) 177 | 178 | :end 179 | -------------------------------------------------------------------------------- /docs/on_demand.rst: -------------------------------------------------------------------------------- 1 | Loading worksheets on demand 2 | ============================= 3 | 4 | .. currentmodule:: xlrd.book 5 | 6 | This feature, new in version 0.7.1, is governed by the ``on_demand`` argument 7 | to the :func:`~xlrd.open_workbook` function and allows saving memory and time by 8 | loading only those sheets that the caller is interested in, and releasing sheets 9 | when no longer required. 10 | 11 | ``on_demand=False`` (default): 12 | No change. :func:`~xlrd.open_workbook` loads global data 13 | and all sheets, releases resources no longer required (principally the 14 | :class:`str` or :class:`mmap.mmap` object containing the Workbook stream), 15 | and returns. 16 | 17 | ``on_demand=True`` and BIFF version < 5.0: 18 | A warning message is emitted, 19 | ``on_demand`` is recorded as ``False``, and the old process is followed. 20 | 21 | ``on_demand=True`` and BIFF version >= 5.0: 22 | :func:`~xlrd.open_workbook` loads global 23 | data and returns without releasing resources. At this stage, the only 24 | information available about sheets is :attr:`Book.nsheets` and 25 | :meth:`Book.sheet_names`. 26 | 27 | :meth:`Book.sheet_by_name` and :meth:`Book.sheet_by_index` will load the 28 | requested sheet if it is not already loaded. 29 | 30 | :meth:`Book.sheets` will load all unloaded sheets. 31 | 32 | The caller may save memory by calling 33 | :meth:`Book.unload_sheet` when finished with the sheet. 34 | This applies irrespective of the state of ``on_demand``. 35 | 36 | The caller may re-load an unloaded sheet by calling :meth:`Book.sheet_by_name` 37 | or :meth:`Book.sheet_by_index`, except if the required resources have been 38 | released (which will 39 | have happened automatically when ``on_demand`` is false). This is the only 40 | case where an exception will be raised. 41 | 42 | The caller may query the state of a sheet using :meth:`Book.sheet_loaded`. 43 | 44 | :meth:`Book.release_resources` may used to save memory and close 45 | any memory-mapped file before proceeding to examine already-loaded 46 | sheets. Once resources are released, no further sheets can be loaded. 47 | 48 | When using on-demand, it is advisable to ensure that 49 | :meth:`Book.release_resources` is always called, even if an exception 50 | is raised in your own code; otherwise if the input file has been 51 | memory-mapped, the :class:`mmap.mmap` object will not be closed and you will 52 | not be able to access the physical file until your Python process 53 | terminates. This can be done by calling :meth:`Book.release_resources` 54 | explicitly in the finally part of a try/finally block. 55 | 56 | The Book object is also a context manager, so you can wrap your code in a 57 | ``with`` statement that will make sure underlying resources are closed. 58 | -------------------------------------------------------------------------------- /docs/references.rst: -------------------------------------------------------------------------------- 1 | Named references, constants, formulas, and macros 2 | ================================================= 3 | 4 | .. currentmodule:: xlrd.book 5 | 6 | 7 | A name is used to refer to a cell, a group of cells, a constant 8 | value, a formula, or a macro. Usually the scope of a name is global 9 | across the whole workbook. However it can be local to a worksheet. 10 | For example, if the sales figures are in different cells in 11 | different sheets, the user may define the name "Sales" in each 12 | sheet. There are built-in names, like "Print_Area" and 13 | "Print_Titles"; these two are naturally local to a sheet. 14 | 15 | To inspect the names with a user interface like MS Excel, OOo Calc, 16 | or Gnumeric, click on Insert -> Names -> Define. This will show the global 17 | names, plus those local to the currently selected sheet. 18 | 19 | A :class:`Book` object provides two dictionaries (:attr:`Book.name_map` and 20 | :attr:`Book.name_and_scope_map`) and a list (:attr:`Book.name_obj_list`) which 21 | allow various ways of accessing the :class:`Name` objects. 22 | There is one :class:`Name` object for each `NAME` record found in the workbook. 23 | :class:`Name` objects have many attributes, several of which are relevant only 24 | when ``obj.macro`` is ``1``. 25 | 26 | In the examples directory you will find ``namesdemo.xls`` which 27 | showcases the many different ways that names can be used, and 28 | ``xlrdnamesAPIdemo.py`` which offers 3 different queries for inspecting 29 | the names in your files, and shows how to extract whatever a name is 30 | referring to. There is currently one "convenience method", 31 | :meth:`Name.cell`, which extracts the value in the case where the name 32 | refers to a single cell. The source code for :meth:`Name.cell` is an extra 33 | source of information on how the :class:`Name` attributes hang together. 34 | 35 | .. note:: 36 | 37 | Name information is *not* extracted from files older than 38 | Excel 5.0 (``Book.biff_version < 50``). 39 | -------------------------------------------------------------------------------- /docs/unicode.rst: -------------------------------------------------------------------------------- 1 | Handling of Unicode 2 | =================== 3 | 4 | This package presents all text strings as Python unicode objects. 5 | From Excel 97 onwards, text in Excel spreadsheets has been stored as `UTF-16LE 6 | `_ 7 | (a 16-bit Unicode Transformation Format). 8 | Older files (Excel 95 and earlier) don't keep strings in Unicode; 9 | a ``CODEPAGE`` record provides a codepage number (for example, 1252) which is 10 | used by xlrd to derive the encoding (for same example: "cp1252") which is 11 | used to translate to Unicode. 12 | 13 | If the ``CODEPAGE`` record is missing (possible if the file was created 14 | by third-party software), ``xlrd`` will assume that the encoding is ascii, 15 | and keep going. If the actual encoding is not ascii, a 16 | :class:`UnicodeDecodeError` exception will be raised and 17 | you will need to determine the encoding yourself, and tell xlrd: 18 | 19 | .. code-block:: python 20 | 21 | book = xlrd.open_workbook(..., encoding_override="cp1252") 22 | 23 | If the ``CODEPAGE`` record exists but is wrong (for example, the codepage 24 | number is 1251, but the strings are actually encoded in koi8_r), 25 | it can be overridden using the same mechanism. 26 | 27 | The supplied ``runxlrd.py`` has a corresponding command-line argument, which 28 | may be used for experimentation: 29 | 30 | .. code-block:: bash 31 | 32 | runxlrd.py -e koi8_r 3rows myfile.xls 33 | 34 | The first place to look for an encoding, the "codec name", is 35 | `the Python documentation`__. 36 | 37 | __ https://docs.python.org/library/codecs.html#standard-encodings 38 | -------------------------------------------------------------------------------- /scripts/runxlrd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd 3 | # This script is part of the xlrd package, which is released under a 4 | # BSD-style licence. 5 | 6 | from __future__ import print_function 7 | 8 | cmd_doc = """ 9 | Commands: 10 | 11 | 2rows Print the contents of first and last row in each sheet 12 | 3rows Print the contents of first, second and last row in each sheet 13 | bench Same as "show", but doesn't print -- for profiling 14 | biff_count[1] Print a count of each type of BIFF record in the file 15 | biff_dump[1] Print a dump (char and hex) of the BIFF records in the file 16 | fonts hdr + print a dump of all font objects 17 | hdr Mini-overview of file (no per-sheet information) 18 | hotshot Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls 19 | labels Dump of sheet.col_label_ranges and ...row... for each sheet 20 | name_dump Dump of each object in book.name_obj_list 21 | names Print brief information for each NAME record 22 | ov Overview of file 23 | profile Like "hotshot", but uses cProfile 24 | show Print the contents of all rows in each sheet 25 | version[0] Print versions of xlrd and Python and exit 26 | xfc Print "XF counts" and cell-type counts -- see code for details 27 | 28 | [0] means no file arg 29 | [1] means only one file arg i.e. no glob.glob pattern 30 | """ 31 | 32 | options = None 33 | if __name__ == "__main__": 34 | import xlrd 35 | import sys 36 | import time 37 | import glob 38 | import traceback 39 | import gc 40 | 41 | from xlrd.timemachine import xrange, REPR 42 | 43 | 44 | class LogHandler(object): 45 | 46 | def __init__(self, logfileobj): 47 | self.logfileobj = logfileobj 48 | self.fileheading = None 49 | self.shown = 0 50 | 51 | def setfileheading(self, fileheading): 52 | self.fileheading = fileheading 53 | self.shown = 0 54 | 55 | def write(self, text): 56 | if self.fileheading and not self.shown: 57 | self.logfileobj.write(self.fileheading) 58 | self.shown = 1 59 | self.logfileobj.write(text) 60 | 61 | null_cell = xlrd.empty_cell 62 | 63 | def show_row(bk, sh, rowx, colrange, printit): 64 | if bk.ragged_rows: 65 | colrange = range(sh.row_len(rowx)) 66 | if not colrange: return 67 | if printit: print() 68 | if bk.formatting_info: 69 | for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange): 70 | if printit: 71 | print("cell %s%d: type=%d, data: %r, xfx: %s" 72 | % (xlrd.colname(colx), rowx+1, ty, val, cxfx)) 73 | else: 74 | for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange): 75 | if printit: 76 | print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val)) 77 | 78 | def get_row_data(bk, sh, rowx, colrange): 79 | result = [] 80 | dmode = bk.datemode 81 | ctys = sh.row_types(rowx) 82 | cvals = sh.row_values(rowx) 83 | for colx in colrange: 84 | cty = ctys[colx] 85 | cval = cvals[colx] 86 | if bk.formatting_info: 87 | cxfx = str(sh.cell_xf_index(rowx, colx)) 88 | else: 89 | cxfx = '' 90 | if cty == xlrd.XL_CELL_DATE: 91 | try: 92 | showval = xlrd.xldate_as_tuple(cval, dmode) 93 | except xlrd.XLDateError as e: 94 | showval = "%s:%s" % (type(e).__name__, e) 95 | cty = xlrd.XL_CELL_ERROR 96 | elif cty == xlrd.XL_CELL_ERROR: 97 | showval = xlrd.error_text_from_code.get(cval, '' % cval) 98 | else: 99 | showval = cval 100 | result.append((colx, cty, showval, cxfx)) 101 | return result 102 | 103 | def bk_header(bk): 104 | print() 105 | print("BIFF version: %s; datemode: %s" 106 | % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode)) 107 | print("codepage: %r (encoding: %s); countries: %r" 108 | % (bk.codepage, bk.encoding, bk.countries)) 109 | print("Last saved by: %r" % bk.user_name) 110 | print("Number of data sheets: %d" % bk.nsheets) 111 | print("Use mmap: %d; Formatting: %d; On demand: %d" 112 | % (bk.use_mmap, bk.formatting_info, bk.on_demand)) 113 | print("Ragged rows: %d" % bk.ragged_rows) 114 | if bk.formatting_info: 115 | print("FORMATs: %d, FONTs: %d, XFs: %d" 116 | % (len(bk.format_list), len(bk.font_list), len(bk.xf_list))) 117 | if not options.suppress_timing: 118 | print("Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)" 119 | % (bk.load_time_stage_1, bk.load_time_stage_2)) 120 | print() 121 | 122 | def show_fonts(bk): 123 | print("Fonts:") 124 | for x in xrange(len(bk.font_list)): 125 | font = bk.font_list[x] 126 | font.dump(header='== Index %d ==' % x, indent=4) 127 | 128 | def show_names(bk, dump=0): 129 | bk_header(bk) 130 | if bk.biff_version < 50: 131 | print("Names not extracted in this BIFF version") 132 | return 133 | nlist = bk.name_obj_list 134 | print("Name list: %d entries" % len(nlist)) 135 | for nobj in nlist: 136 | if dump: 137 | nobj.dump(sys.stdout, 138 | header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index) 139 | else: 140 | print("[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n" 141 | % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result)) 142 | 143 | def print_labels(sh, labs, title): 144 | if not labs:return 145 | for rlo, rhi, clo, chi in labs: 146 | print("%s label range %s:%s contains:" 147 | % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1))) 148 | for rx in xrange(rlo, rhi): 149 | for cx in xrange(clo, chi): 150 | print(" %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx))) 151 | 152 | def show_labels(bk): 153 | # bk_header(bk) 154 | hdr = 0 155 | for shx in range(bk.nsheets): 156 | sh = bk.sheet_by_index(shx) 157 | clabs = sh.col_label_ranges 158 | rlabs = sh.row_label_ranges 159 | if clabs or rlabs: 160 | if not hdr: 161 | bk_header(bk) 162 | hdr = 1 163 | print("sheet %d: name = %r; nrows = %d; ncols = %d" % 164 | (shx, sh.name, sh.nrows, sh.ncols)) 165 | print_labels(sh, clabs, 'Col') 166 | print_labels(sh, rlabs, 'Row') 167 | if bk.on_demand: bk.unload_sheet(shx) 168 | 169 | def show(bk, nshow=65535, printit=1): 170 | bk_header(bk) 171 | if 0: 172 | rclist = xlrd.sheet.rc_stats.items() 173 | rclist = sorted(rclist) 174 | print("rc stats") 175 | for k, v in rclist: 176 | print("0x%04x %7d" % (k, v)) 177 | if options.onesheet: 178 | try: 179 | shx = int(options.onesheet) 180 | except ValueError: 181 | shx = bk.sheet_by_name(options.onesheet).number 182 | shxrange = [shx] 183 | else: 184 | shxrange = range(bk.nsheets) 185 | # print("shxrange", list(shxrange)) 186 | for shx in shxrange: 187 | sh = bk.sheet_by_index(shx) 188 | nrows, ncols = sh.nrows, sh.ncols 189 | colrange = range(ncols) 190 | anshow = min(nshow, nrows) 191 | print("sheet %d: name = %s; nrows = %d; ncols = %d" % 192 | (shx, REPR(sh.name), sh.nrows, sh.ncols)) 193 | if nrows and ncols: 194 | # Beat the bounds 195 | for rowx in xrange(nrows): 196 | nc = sh.row_len(rowx) 197 | if nc: 198 | sh.row_types(rowx)[nc-1] 199 | sh.row_values(rowx)[nc-1] 200 | sh.cell(rowx, nc-1) 201 | for rowx in xrange(anshow-1): 202 | if not printit and rowx % 10000 == 1 and rowx > 1: 203 | print("done %d rows" % (rowx-1,)) 204 | show_row(bk, sh, rowx, colrange, printit) 205 | if anshow and nrows: 206 | show_row(bk, sh, nrows-1, colrange, printit) 207 | print() 208 | if bk.on_demand: bk.unload_sheet(shx) 209 | 210 | def count_xfs(bk): 211 | bk_header(bk) 212 | for shx in range(bk.nsheets): 213 | sh = bk.sheet_by_index(shx) 214 | nrows = sh.nrows 215 | print("sheet %d: name = %r; nrows = %d; ncols = %d" % 216 | (shx, sh.name, sh.nrows, sh.ncols)) 217 | # Access all xfindexes to force gathering stats 218 | type_stats = [0, 0, 0, 0, 0, 0, 0] 219 | for rowx in xrange(nrows): 220 | for colx in xrange(sh.row_len(rowx)): 221 | xfx = sh.cell_xf_index(rowx, colx) 222 | assert xfx >= 0 223 | cty = sh.cell_type(rowx, colx) 224 | type_stats[cty] += 1 225 | print("XF stats", sh._xf_index_stats) 226 | print("type stats", type_stats) 227 | print() 228 | if bk.on_demand: bk.unload_sheet(shx) 229 | 230 | def main(cmd_args): 231 | import optparse 232 | global options 233 | usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc 234 | oparser = optparse.OptionParser(usage) 235 | oparser.add_option( 236 | "-l", "--logfilename", 237 | default="", 238 | help="contains error messages") 239 | oparser.add_option( 240 | "-v", "--verbosity", 241 | type="int", default=0, 242 | help="level of information and diagnostics provided") 243 | oparser.add_option( 244 | "-m", "--mmap", 245 | type="int", default=-1, 246 | help="1: use mmap; 0: don't use mmap; -1: accept heuristic") 247 | oparser.add_option( 248 | "-e", "--encoding", 249 | default="", 250 | help="encoding override") 251 | oparser.add_option( 252 | "-f", "--formatting", 253 | type="int", default=0, 254 | help="0 (default): no fmt info\n" 255 | "1: fmt info (all cells)\n", 256 | ) 257 | oparser.add_option( 258 | "-g", "--gc", 259 | type="int", default=0, 260 | help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc") 261 | oparser.add_option( 262 | "-s", "--onesheet", 263 | default="", 264 | help="restrict output to this sheet (name or index)") 265 | oparser.add_option( 266 | "-u", "--unnumbered", 267 | action="store_true", default=0, 268 | help="omit line numbers or offsets in biff_dump") 269 | oparser.add_option( 270 | "-d", "--on-demand", 271 | action="store_true", default=0, 272 | help="load sheets on demand instead of all at once") 273 | oparser.add_option( 274 | "-t", "--suppress-timing", 275 | action="store_true", default=0, 276 | help="don't print timings (diffs are less messy)") 277 | oparser.add_option( 278 | "-r", "--ragged-rows", 279 | action="store_true", default=0, 280 | help="open_workbook(..., ragged_rows=True)") 281 | options, args = oparser.parse_args(cmd_args) 282 | if len(args) == 1 and args[0] in ("version", ): 283 | pass 284 | elif len(args) < 2: 285 | oparser.error("Expected at least 2 args, found %d" % len(args)) 286 | cmd = args[0] 287 | xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5") 288 | if cmd == 'biff_dump': 289 | xlrd.dump(args[1], unnumbered=options.unnumbered) 290 | sys.exit(0) 291 | if cmd == 'biff_count': 292 | xlrd.count_records(args[1]) 293 | sys.exit(0) 294 | if cmd == 'version': 295 | print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__)) 296 | print("Python:", sys.version) 297 | sys.exit(0) 298 | if options.logfilename: 299 | logfile = LogHandler(open(options.logfilename, 'w')) 300 | else: 301 | logfile = sys.stdout 302 | mmap_opt = options.mmap 303 | mmap_arg = xlrd.USE_MMAP 304 | if mmap_opt in (1, 0): 305 | mmap_arg = mmap_opt 306 | elif mmap_opt != -1: 307 | print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt) 308 | fmt_opt = options.formatting | (cmd in ('xfc', )) 309 | gc_mode = options.gc 310 | if gc_mode: 311 | gc.disable() 312 | for pattern in args[1:]: 313 | for fname in glob.glob(pattern): 314 | print("\n=== File: %s ===" % fname) 315 | if logfile != sys.stdout: 316 | logfile.setfileheading("\n=== File: %s ===\n" % fname) 317 | if gc_mode == 1: 318 | n_unreachable = gc.collect() 319 | if n_unreachable: 320 | print("GC before open:", n_unreachable, "unreachable objects") 321 | try: 322 | t0 = time.time() 323 | bk = xlrd.open_workbook( 324 | fname, 325 | verbosity=options.verbosity, logfile=logfile, 326 | use_mmap=mmap_arg, 327 | encoding_override=options.encoding, 328 | formatting_info=fmt_opt, 329 | on_demand=options.on_demand, 330 | ragged_rows=options.ragged_rows, 331 | ) 332 | t1 = time.time() 333 | if not options.suppress_timing: 334 | print("Open took %.2f seconds" % (t1-t0,)) 335 | except xlrd.XLRDError as e: 336 | print("*** Open failed: %s: %s" % (type(e).__name__, e)) 337 | continue 338 | except KeyboardInterrupt: 339 | print("*** KeyboardInterrupt ***") 340 | traceback.print_exc(file=sys.stdout) 341 | sys.exit(1) 342 | except BaseException as e: 343 | print("*** Open failed: %s: %s" % (type(e).__name__, e)) 344 | traceback.print_exc(file=sys.stdout) 345 | continue 346 | t0 = time.time() 347 | if cmd == 'hdr': 348 | bk_header(bk) 349 | elif cmd == 'ov': # OverView 350 | show(bk, 0) 351 | elif cmd == 'show': # all rows 352 | show(bk) 353 | elif cmd == '2rows': # first row and last row 354 | show(bk, 2) 355 | elif cmd == '3rows': # first row, 2nd row and last row 356 | show(bk, 3) 357 | elif cmd == 'bench': 358 | show(bk, printit=0) 359 | elif cmd == 'fonts': 360 | bk_header(bk) 361 | show_fonts(bk) 362 | elif cmd == 'names': # named reference list 363 | show_names(bk) 364 | elif cmd == 'name_dump': # named reference list 365 | show_names(bk, dump=1) 366 | elif cmd == 'labels': 367 | show_labels(bk) 368 | elif cmd == 'xfc': 369 | count_xfs(bk) 370 | else: 371 | print("*** Unknown command <%s>" % cmd) 372 | sys.exit(1) 373 | del bk 374 | if gc_mode == 1: 375 | n_unreachable = gc.collect() 376 | if n_unreachable: 377 | print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects") 378 | if not options.suppress_timing: 379 | t1 = time.time() 380 | print("\ncommand took %.2f seconds\n" % (t1-t0,)) 381 | 382 | return None 383 | 384 | av = sys.argv[1:] 385 | if not av: 386 | main(av) 387 | firstarg = av[0].lower() 388 | if firstarg == "hotshot": 389 | import hotshot 390 | import hotshot.stats 391 | av = av[1:] 392 | prof_log_name = "XXXX.prof" 393 | prof = hotshot.Profile(prof_log_name) 394 | # benchtime, result = prof.runcall(main, *av) 395 | result = prof.runcall(main, *(av, )) 396 | print("result", repr(result)) 397 | prof.close() 398 | stats = hotshot.stats.load(prof_log_name) 399 | stats.strip_dirs() 400 | stats.sort_stats('time', 'calls') 401 | stats.print_stats(20) 402 | elif firstarg == "profile": 403 | import cProfile 404 | av = av[1:] 405 | cProfile.run('main(av)', 'YYYY.prof') 406 | import pstats 407 | p = pstats.Stats('YYYY.prof') 408 | p.strip_dirs().sort_stats('cumulative').print_stats(30) 409 | else: 410 | main(av) 411 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_file = LICENSE 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from xlrd.info import __VERSION__ 4 | 5 | setup( 6 | name='xlrd', 7 | version=__VERSION__, 8 | author='Chris Withers', 9 | author_email='chris@withers.org', 10 | url='http://www.python-excel.org/', 11 | packages=['xlrd'], 12 | scripts=[ 13 | 'scripts/runxlrd.py', 14 | ], 15 | description=( 16 | 'Library for developers to extract data from ' 17 | 'Microsoft Excel (tm) .xls spreadsheet files' 18 | ), 19 | long_description=open('README.rst').read(), 20 | license='BSD', 21 | keywords=['xls', 'excel', 'spreadsheet', 'workbook'], 22 | classifiers=[ 23 | 'Development Status :: 5 - Production/Stable', 24 | 'Intended Audience :: Developers', 25 | 'License :: OSI Approved :: BSD License', 26 | 'Programming Language :: Python', 27 | 'Programming Language :: Python :: 2', 28 | 'Programming Language :: Python :: 2.7', 29 | 'Programming Language :: Python :: 3', 30 | 'Programming Language :: Python :: 3.6', 31 | 'Programming Language :: Python :: 3.7', 32 | 'Programming Language :: Python :: 3.8', 33 | 'Programming Language :: Python :: 3.9', 34 | 'Operating System :: OS Independent', 35 | 'Topic :: Database', 36 | 'Topic :: Office/Business', 37 | 'Topic :: Software Development :: Libraries :: Python Modules', 38 | ], 39 | python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*", 40 | extras_require=dict( 41 | test=['pytest', 'pytest-cov'], 42 | docs=['sphinx'], 43 | build=['wheel', 'twine'] 44 | ) 45 | ) 46 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/__init__.py -------------------------------------------------------------------------------- /tests/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def from_sample(filename): 5 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'samples', filename) 6 | -------------------------------------------------------------------------------- /tests/samples/Formate.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/Formate.xls -------------------------------------------------------------------------------- /tests/samples/biff4_no_format_no_window2.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/biff4_no_format_no_window2.xls -------------------------------------------------------------------------------- /tests/samples/corrupted_error.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/corrupted_error.xls -------------------------------------------------------------------------------- /tests/samples/formula_test_names.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/formula_test_names.xls -------------------------------------------------------------------------------- /tests/samples/formula_test_sjmachin.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/formula_test_sjmachin.xls -------------------------------------------------------------------------------- /tests/samples/issue20.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/issue20.xls -------------------------------------------------------------------------------- /tests/samples/namesdemo.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/namesdemo.xls -------------------------------------------------------------------------------- /tests/samples/picture_in_cell.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/picture_in_cell.xls -------------------------------------------------------------------------------- /tests/samples/profiles.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/profiles.xls -------------------------------------------------------------------------------- /tests/samples/ragged.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/ragged.xls -------------------------------------------------------------------------------- /tests/samples/sample.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.ods -------------------------------------------------------------------------------- /tests/samples/sample.txt: -------------------------------------------------------------------------------- 1 | This is a text file. 2 | -------------------------------------------------------------------------------- /tests/samples/sample.xlsb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.xlsb -------------------------------------------------------------------------------- /tests/samples/sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.xlsx -------------------------------------------------------------------------------- /tests/samples/sample.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.zip -------------------------------------------------------------------------------- /tests/samples/xf_class.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/xf_class.xls -------------------------------------------------------------------------------- /tests/test_biffh.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | from xlrd import biffh 5 | 6 | if sys.version_info[0] >= 3: 7 | from io import StringIO 8 | else: 9 | # Python 2.6+ does have the io module, but io.StringIO is strict about 10 | # unicode, which won't work for our test. 11 | from StringIO import StringIO 12 | 13 | 14 | class TestHexDump(unittest.TestCase): 15 | def test_hex_char_dump(self): 16 | sio = StringIO() 17 | biffh.hex_char_dump(b"abc\0e\01", 0, 6, fout=sio) 18 | s = sio.getvalue() 19 | assert "61 62 63 00 65 01" in s, s 20 | assert "abc~e?" in s, s 21 | 22 | if __name__=='__main__': 23 | unittest.main() 24 | -------------------------------------------------------------------------------- /tests/test_cell.py: -------------------------------------------------------------------------------- 1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence 2 | 3 | import unittest 4 | 5 | import xlrd 6 | from xlrd.timemachine import UNICODE_LITERAL 7 | 8 | from .helpers import from_sample 9 | 10 | 11 | class TestCell(unittest.TestCase): 12 | 13 | def setUp(self): 14 | self.book = xlrd.open_workbook(from_sample('profiles.xls'), formatting_info=True) 15 | self.sheet = self.book.sheet_by_name('PROFILEDEF') 16 | 17 | def test_empty_cell(self): 18 | sheet = self.book.sheet_by_name('TRAVERSALCHAINAGE') 19 | cell = sheet.cell(0, 0) 20 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_EMPTY) 21 | self.assertEqual(cell.value, '') 22 | self.assertEqual(type(cell.value), type(UNICODE_LITERAL(''))) 23 | self.assertTrue(cell.xf_index > 0) 24 | 25 | def test_string_cell(self): 26 | cell = self.sheet.cell(0, 0) 27 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT) 28 | self.assertEqual(cell.value, 'PROFIL') 29 | self.assertEqual(type(cell.value), type(UNICODE_LITERAL(''))) 30 | self.assertTrue(cell.xf_index > 0) 31 | 32 | def test_number_cell(self): 33 | cell = self.sheet.cell(1, 1) 34 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER) 35 | self.assertEqual(cell.value, 100) 36 | self.assertTrue(cell.xf_index > 0) 37 | 38 | def test_calculated_cell(self): 39 | sheet2 = self.book.sheet_by_name('PROFILELEVELS') 40 | cell = sheet2.cell(1, 3) 41 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER) 42 | self.assertAlmostEqual(cell.value, 265.131, places=3) 43 | self.assertTrue(cell.xf_index > 0) 44 | 45 | def test_merged_cells(self): 46 | book = xlrd.open_workbook(from_sample('xf_class.xls'), formatting_info=True) 47 | sheet3 = book.sheet_by_name('table2') 48 | row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0] 49 | self.assertEqual(sheet3.cell(row_lo, col_lo).value, 'MERGED') 50 | self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5)) 51 | -------------------------------------------------------------------------------- /tests/test_formats.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence 3 | 4 | import sys 5 | from unittest import TestCase 6 | 7 | import xlrd 8 | 9 | from .helpers import from_sample 10 | 11 | if sys.version_info[0] >= 3: 12 | def u(s): return s 13 | else: 14 | def u(s): 15 | return s.decode('utf-8') 16 | 17 | 18 | class TestCellContent(TestCase): 19 | 20 | def setUp(self): 21 | self.book = xlrd.open_workbook(from_sample('Formate.xls'), formatting_info=True) 22 | self.sheet = self.book.sheet_by_name(u('Blätt1')) 23 | 24 | def test_text_cells(self): 25 | for row, name in enumerate([u('Huber'), u('Äcker'), u('Öcker')]): 26 | cell = self.sheet.cell(row, 0) 27 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT) 28 | self.assertEqual(cell.value, name) 29 | self.assertTrue(cell.xf_index > 0) 30 | 31 | def test_date_cells(self): 32 | # see also 'Dates in Excel spreadsheets' in the documentation 33 | # convert: xldate_as_tuple(float, book.datemode) -> (year, month, 34 | # day, hour, minutes, seconds) 35 | for row, date in [(0, 2741.), (1, 38406.), (2, 32266.)]: 36 | cell = self.sheet.cell(row, 1) 37 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_DATE) 38 | self.assertEqual(cell.value, date) 39 | self.assertTrue(cell.xf_index > 0) 40 | 41 | def test_time_cells(self): 42 | # see also 'Dates in Excel spreadsheets' in the documentation 43 | # convert: xldate_as_tuple(float, book.datemode) -> (year, month, 44 | # day, hour, minutes, seconds) 45 | for row, time in [(3, .273611), (4, .538889), (5, .741123)]: 46 | cell = self.sheet.cell(row, 1) 47 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_DATE) 48 | self.assertAlmostEqual(cell.value, time, places=6) 49 | self.assertTrue(cell.xf_index > 0) 50 | 51 | def test_percent_cells(self): 52 | for row, time in [(6, .974), (7, .124)]: 53 | cell = self.sheet.cell(row, 1) 54 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER) 55 | self.assertAlmostEqual(cell.value, time, places=3) 56 | self.assertTrue(cell.xf_index > 0) 57 | 58 | def test_currency_cells(self): 59 | for row, time in [(8, 1000.30), (9, 1.20)]: 60 | cell = self.sheet.cell(row, 1) 61 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER) 62 | self.assertAlmostEqual(cell.value, time, places=2) 63 | self.assertTrue(cell.xf_index > 0) 64 | 65 | def test_get_from_merged_cell(self): 66 | sheet = self.book.sheet_by_name(u('ÖÄÜ')) 67 | cell = sheet.cell(2, 2) 68 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT) 69 | self.assertEqual(cell.value, 'MERGED CELLS') 70 | self.assertTrue(cell.xf_index > 0) 71 | 72 | def test_ignore_diagram(self): 73 | sheet = self.book.sheet_by_name(u('Blätt3')) 74 | cell = sheet.cell(0, 0) 75 | self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER) 76 | self.assertEqual(cell.value, 100) 77 | self.assertTrue(cell.xf_index > 0) 78 | -------------------------------------------------------------------------------- /tests/test_formulas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence 3 | 4 | from unittest import TestCase 5 | 6 | import xlrd 7 | 8 | from .helpers import from_sample 9 | 10 | try: 11 | ascii 12 | except NameError: 13 | # For Python 2 14 | def ascii(s): 15 | a = repr(s) 16 | if a.startswith(('u"', "u'")): 17 | a = a[1:] 18 | return a 19 | 20 | class TestFormulas(TestCase): 21 | 22 | def setUp(self): 23 | book = xlrd.open_workbook(from_sample('formula_test_sjmachin.xls')) 24 | self.sheet = book.sheet_by_index(0) 25 | 26 | def get_value(self, col, row): 27 | return ascii(self.sheet.col_values(col)[row]) 28 | 29 | def test_cell_B2(self): 30 | self.assertEqual( 31 | self.get_value(1, 1), 32 | r"'\u041c\u041e\u0421\u041a\u0412\u0410 \u041c\u043e\u0441\u043a\u0432\u0430'", 33 | ) 34 | 35 | def test_cell_B3(self): 36 | self.assertEqual(self.get_value(1, 2), '0.14285714285714285') 37 | 38 | def test_cell_B4(self): 39 | self.assertEqual(self.get_value(1, 3), "'ABCDEF'") 40 | 41 | def test_cell_B5(self): 42 | self.assertEqual(self.get_value(1, 4), "''") 43 | 44 | def test_cell_B6(self): 45 | self.assertEqual(self.get_value(1, 5), '1') 46 | 47 | def test_cell_B7(self): 48 | self.assertEqual(self.get_value(1, 6), '7') 49 | 50 | def test_cell_B8(self): 51 | self.assertEqual( 52 | self.get_value(1, 7), 53 | r"'\u041c\u041e\u0421\u041a\u0412\u0410 \u041c\u043e\u0441\u043a\u0432\u0430'", 54 | ) 55 | 56 | class TestNameFormulas(TestCase): 57 | 58 | def setUp(self): 59 | book = xlrd.open_workbook(from_sample('formula_test_names.xls')) 60 | self.sheet = book.sheet_by_index(0) 61 | 62 | def get_value(self, col, row): 63 | return ascii(self.sheet.col_values(col)[row]) 64 | 65 | def test_unaryop(self): 66 | self.assertEqual(self.get_value(1, 1), '-7.0') 67 | 68 | def test_attrsum(self): 69 | self.assertEqual(self.get_value(1, 2), '4.0') 70 | 71 | def test_func(self): 72 | self.assertEqual(self.get_value(1, 3), '6.0') 73 | 74 | def test_func_var_args(self): 75 | self.assertEqual(self.get_value(1, 4), '3.0') 76 | 77 | def test_if(self): 78 | self.assertEqual(self.get_value(1, 5), "'b'") 79 | 80 | def test_choose(self): 81 | self.assertEqual(self.get_value(1, 6), "'C'") 82 | -------------------------------------------------------------------------------- /tests/test_ignore_workbook_corruption_error.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import xlrd 4 | 5 | from .helpers import from_sample 6 | 7 | 8 | class TestIgnoreWorkbookCorruption(TestCase): 9 | 10 | def test_not_corrupted(self): 11 | with self.assertRaises(Exception) as context: 12 | xlrd.open_workbook(from_sample('corrupted_error.xls')) 13 | self.assertTrue('Workbook corruption' in str(context.exception)) 14 | 15 | xlrd.open_workbook(from_sample('corrupted_error.xls'), ignore_workbook_corruption=True) 16 | -------------------------------------------------------------------------------- /tests/test_inspect.py: -------------------------------------------------------------------------------- 1 | from xlrd import inspect_format 2 | 3 | from .helpers import from_sample 4 | 5 | 6 | def test_xlsx(): 7 | assert inspect_format(from_sample('sample.xlsx')) == 'xlsx' 8 | 9 | 10 | def test_xlsb(): 11 | assert inspect_format(from_sample('sample.xlsb')) == 'xlsb' 12 | 13 | 14 | def test_ods(): 15 | assert inspect_format(from_sample('sample.ods')) == 'ods' 16 | 17 | 18 | def test_zip(): 19 | assert inspect_format(from_sample('sample.zip')) == 'zip' 20 | 21 | 22 | def test_xls(): 23 | assert inspect_format(from_sample('namesdemo.xls')) == 'xls' 24 | 25 | 26 | def test_content(): 27 | with open(from_sample('sample.xlsx'), 'rb') as source: 28 | assert inspect_format(content=source.read()) == 'xlsx' 29 | 30 | 31 | def test_unknown(): 32 | assert inspect_format(from_sample('sample.txt')) is None 33 | -------------------------------------------------------------------------------- /tests/test_missing_records.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from xlrd import open_workbook 4 | from xlrd.biffh import XL_CELL_TEXT 5 | 6 | from .helpers import from_sample 7 | 8 | 9 | class TestMissingRecords(TestCase): 10 | 11 | def setUp(self): 12 | path = from_sample('biff4_no_format_no_window2.xls') 13 | self.book = open_workbook(path) 14 | self.sheet = self.book.sheet_by_index(0) 15 | 16 | def test_default_format(self): 17 | cell = self.sheet.cell(0, 0) 18 | self.assertEqual(cell.ctype, XL_CELL_TEXT) 19 | 20 | def test_default_window2_options(self): 21 | self.assertEqual(self.sheet.cached_page_break_preview_mag_factor, 0) 22 | self.assertEqual(self.sheet.cached_normal_view_mag_factor, 0) 23 | -------------------------------------------------------------------------------- /tests/test_open_workbook.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | from unittest import TestCase 5 | 6 | import pytest 7 | 8 | from xlrd import open_workbook, XLRDError 9 | 10 | from .helpers import from_sample 11 | 12 | 13 | class TestOpen(object): 14 | # test different uses of open_workbook 15 | 16 | def test_names_demo(self): 17 | # For now, we just check this doesn't raise an error. 18 | open_workbook(from_sample('namesdemo.xls')) 19 | 20 | def test_ragged_rows_tidied_with_formatting(self): 21 | # For now, we just check this doesn't raise an error. 22 | open_workbook(from_sample('issue20.xls'), 23 | formatting_info=True) 24 | 25 | def test_BYTES_X00(self): 26 | # For now, we just check this doesn't raise an error. 27 | open_workbook(from_sample('picture_in_cell.xls'), 28 | formatting_info=True) 29 | 30 | def test_open_xlsx(self): 31 | with pytest.raises(XLRDError, match='Excel xlsx file; not supported'): 32 | open_workbook(from_sample('sample.xlsx')) 33 | 34 | def test_open_unknown(self): 35 | with pytest.raises(XLRDError, match="Unsupported format, or corrupt file"): 36 | open_workbook(from_sample('sample.txt')) 37 | -------------------------------------------------------------------------------- /tests/test_sheet.py: -------------------------------------------------------------------------------- 1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence 2 | 3 | import types 4 | from unittest import TestCase 5 | 6 | import xlrd 7 | from xlrd.timemachine import xrange 8 | 9 | from .helpers import from_sample 10 | 11 | SHEETINDEX = 0 12 | NROWS = 15 13 | NCOLS = 13 14 | 15 | ROW_ERR = NROWS + 10 16 | COL_ERR = NCOLS + 10 17 | 18 | 19 | class TestSheet(TestCase): 20 | 21 | sheetnames = ['PROFILEDEF', 'AXISDEF', 'TRAVERSALCHAINAGE', 22 | 'AXISDATUMLEVELS', 'PROFILELEVELS'] 23 | 24 | def setUp(self): 25 | self.book = xlrd.open_workbook(from_sample('profiles.xls'), formatting_info=True) 26 | 27 | def check_sheet_function(self, function): 28 | self.assertTrue(function(0, 0)) 29 | self.assertTrue(function(NROWS-1, NCOLS-1)) 30 | 31 | def check_sheet_function_index_error(self, function): 32 | self.assertRaises(IndexError, function, ROW_ERR, 0) 33 | self.assertRaises(IndexError, function, 0, COL_ERR) 34 | 35 | def check_col_slice(self, col_function): 36 | _slice = col_function(0, 2, NROWS-2) 37 | self.assertEqual(len(_slice), NROWS-4) 38 | 39 | def check_row_slice(self, row_function): 40 | _slice = row_function(0, 2, NCOLS-2) 41 | self.assertEqual(len(_slice), NCOLS-4) 42 | 43 | def test_nrows(self): 44 | sheet = self.book.sheet_by_index(SHEETINDEX) 45 | self.assertEqual(sheet.nrows, NROWS) 46 | 47 | def test_ncols(self): 48 | sheet = self.book.sheet_by_index(SHEETINDEX) 49 | self.assertEqual(sheet.ncols, NCOLS) 50 | 51 | def test_cell(self): 52 | sheet = self.book.sheet_by_index(SHEETINDEX) 53 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0)) 54 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS-1, NCOLS-1)) 55 | 56 | def test_cell_error(self): 57 | sheet = self.book.sheet_by_index(SHEETINDEX) 58 | self.check_sheet_function_index_error(sheet.cell) 59 | 60 | def test_cell_type(self): 61 | sheet = self.book.sheet_by_index(SHEETINDEX) 62 | self.check_sheet_function(sheet.cell_type) 63 | 64 | def test_cell_type_error(self): 65 | sheet = self.book.sheet_by_index(SHEETINDEX) 66 | self.check_sheet_function_index_error(sheet.cell_type) 67 | 68 | def test_cell_value(self): 69 | sheet = self.book.sheet_by_index(SHEETINDEX) 70 | self.check_sheet_function(sheet.cell_value) 71 | 72 | def test_cell_value_error(self): 73 | sheet = self.book.sheet_by_index(SHEETINDEX) 74 | self.check_sheet_function_index_error(sheet.cell_value) 75 | 76 | def test_cell_xf_index(self): 77 | sheet = self.book.sheet_by_index(SHEETINDEX) 78 | self.check_sheet_function(sheet.cell_xf_index) 79 | 80 | def test_cell_xf_index_error(self): 81 | sheet = self.book.sheet_by_index(SHEETINDEX) 82 | self.check_sheet_function_index_error(sheet.cell_xf_index) 83 | 84 | def test_col(self): 85 | sheet = self.book.sheet_by_index(SHEETINDEX) 86 | col = sheet.col(0) 87 | self.assertEqual(len(col), NROWS) 88 | 89 | def test_row(self): 90 | sheet = self.book.sheet_by_index(SHEETINDEX) 91 | row = sheet.row(0) 92 | self.assertEqual(len(row), NCOLS) 93 | 94 | def test_getitem_int(self): 95 | sheet = self.book.sheet_by_index(SHEETINDEX) 96 | row = sheet[0] 97 | self.assertEqual(len(row), NCOLS) 98 | 99 | def test_getitem_tuple(self): 100 | sheet = self.book.sheet_by_index(SHEETINDEX) 101 | self.assertNotEqual(xlrd.empty_cell, sheet[0, 0]) 102 | self.assertNotEqual(xlrd.empty_cell, sheet[NROWS-1, NCOLS-1]) 103 | 104 | def test_getitem_failure(self): 105 | sheet = self.book.sheet_by_index(SHEETINDEX) 106 | with self.assertRaises(ValueError): 107 | sheet[0, 0, 0] 108 | 109 | with self.assertRaises(TypeError): 110 | sheet["hi"] 111 | 112 | def test_get_rows(self): 113 | sheet = self.book.sheet_by_index(SHEETINDEX) 114 | rows = sheet.get_rows() 115 | self.assertTrue(isinstance(rows, types.GeneratorType), True) 116 | self.assertEqual(len(list(rows)), sheet.nrows) 117 | 118 | def test_iter(self): 119 | sheet = self.book.sheet_by_index(SHEETINDEX) 120 | rows = [] 121 | # check syntax 122 | for row in sheet: 123 | rows.append(row) 124 | self.assertEqual(len(rows), sheet.nrows) 125 | 126 | def test_col_slice(self): 127 | sheet = self.book.sheet_by_index(SHEETINDEX) 128 | self.check_col_slice(sheet.col_slice) 129 | 130 | def test_col_types(self): 131 | sheet = self.book.sheet_by_index(SHEETINDEX) 132 | self.check_col_slice(sheet.col_types) 133 | 134 | def test_col_values(self): 135 | sheet = self.book.sheet_by_index(SHEETINDEX) 136 | self.check_col_slice(sheet.col_values) 137 | 138 | def test_row_slice(self): 139 | sheet = self.book.sheet_by_index(SHEETINDEX) 140 | self.check_row_slice(sheet.row_slice) 141 | 142 | def test_row_types(self): 143 | sheet = self.book.sheet_by_index(SHEETINDEX) 144 | self.check_row_slice(sheet.col_types) 145 | 146 | def test_row_values(self): 147 | sheet = self.book.sheet_by_index(SHEETINDEX) 148 | self.check_col_slice(sheet.row_values) 149 | 150 | 151 | class TestSheetRagged(TestCase): 152 | 153 | def test_read_ragged(self): 154 | book = xlrd.open_workbook(from_sample('ragged.xls'), ragged_rows=True) 155 | sheet = book.sheet_by_index(0) 156 | self.assertEqual(sheet.row_len(0), 3) 157 | self.assertEqual(sheet.row_len(1), 2) 158 | self.assertEqual(sheet.row_len(2), 1) 159 | self.assertEqual(sheet.row_len(3), 4) 160 | self.assertEqual(sheet.row_len(4), 4) 161 | -------------------------------------------------------------------------------- /tests/test_workbook.py: -------------------------------------------------------------------------------- 1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence 2 | 3 | from unittest import TestCase 4 | 5 | import xlrd 6 | from xlrd import open_workbook 7 | from xlrd.book import Book 8 | from xlrd.sheet import Sheet 9 | 10 | from .helpers import from_sample 11 | 12 | SHEETINDEX = 0 13 | NROWS = 15 14 | NCOLS = 13 15 | 16 | 17 | class TestWorkbook(TestCase): 18 | sheetnames = ['PROFILEDEF', 'AXISDEF', 'TRAVERSALCHAINAGE', 19 | 'AXISDATUMLEVELS', 'PROFILELEVELS'] 20 | 21 | def setUp(self): 22 | self.book = open_workbook(from_sample('profiles.xls')) 23 | 24 | def test_open_workbook(self): 25 | self.assertTrue(isinstance(self.book, Book)) 26 | 27 | def test_nsheets(self): 28 | self.assertEqual(self.book.nsheets, 5) 29 | 30 | def test_sheet_by_name(self): 31 | for name in self.sheetnames: 32 | sheet = self.book.sheet_by_name(name) 33 | self.assertTrue(isinstance(sheet, Sheet)) 34 | self.assertEqual(name, sheet.name) 35 | 36 | def test_sheet_by_index(self): 37 | for index in range(5): 38 | sheet = self.book.sheet_by_index(index) 39 | self.assertTrue(isinstance(sheet, Sheet)) 40 | self.assertEqual(sheet.name, self.sheetnames[index]) 41 | 42 | def test_sheets(self): 43 | sheets = self.book.sheets() 44 | for index, sheet in enumerate(sheets): 45 | self.assertTrue(isinstance(sheet, Sheet)) 46 | self.assertEqual(sheet.name, self.sheetnames[index]) 47 | 48 | def test_sheet_names(self): 49 | self.assertEqual(self.sheetnames, self.book.sheet_names()) 50 | 51 | def test_getitem_ix(self): 52 | sheet = self.book[SHEETINDEX] 53 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0)) 54 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS - 1, NCOLS - 1)) 55 | 56 | def test_getitem_name(self): 57 | sheet = self.book[self.sheetnames[SHEETINDEX]] 58 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0)) 59 | self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS - 1, NCOLS - 1)) 60 | 61 | def test_iter(self): 62 | sheets = [sh.name for sh in self.book] 63 | self.assertEqual(sheets, self.sheetnames) 64 | -------------------------------------------------------------------------------- /tests/test_xldate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Author: mozman 3 | # Purpose: test xldate.py 4 | # Created: 04.12.2010 5 | # Copyright (C) 2010, Manfred Moitzi 6 | # License: BSD licence 7 | 8 | import unittest 9 | 10 | from xlrd import xldate 11 | 12 | DATEMODE = 0 # 1900-based 13 | 14 | class TestXLDate(unittest.TestCase): 15 | def test_date_as_tuple(self): 16 | date = xldate.xldate_as_tuple(2741., DATEMODE) 17 | self.assertEqual(date, (1907, 7, 3, 0, 0, 0)) 18 | date = xldate.xldate_as_tuple(38406., DATEMODE) 19 | self.assertEqual(date, (2005, 2, 23, 0, 0, 0)) 20 | date = xldate.xldate_as_tuple(32266., DATEMODE) 21 | self.assertEqual(date, (1988, 5, 3, 0, 0, 0)) 22 | 23 | def test_time_as_tuple(self): 24 | time = xldate.xldate_as_tuple(.273611, DATEMODE) 25 | self.assertEqual(time, (0, 0, 0, 6, 34, 0)) 26 | time = xldate.xldate_as_tuple(.538889, DATEMODE) 27 | self.assertEqual(time, (0, 0, 0, 12, 56, 0)) 28 | time = xldate.xldate_as_tuple(.741123, DATEMODE) 29 | self.assertEqual(time, (0, 0, 0, 17, 47, 13)) 30 | 31 | def test_xldate_from_date_tuple(self): 32 | date = xldate.xldate_from_date_tuple( (1907, 7, 3), DATEMODE ) 33 | self.assertAlmostEqual(date, 2741.) 34 | date = xldate.xldate_from_date_tuple( (2005, 2, 23), DATEMODE ) 35 | self.assertAlmostEqual(date, 38406.) 36 | date = xldate.xldate_from_date_tuple( (1988, 5, 3), DATEMODE ) 37 | self.assertAlmostEqual(date, 32266.) 38 | 39 | def test_xldate_from_time_tuple(self): 40 | time = xldate.xldate_from_time_tuple( (6, 34, 0) ) 41 | self.assertAlmostEqual(time, .273611, places=6) 42 | time = xldate.xldate_from_time_tuple( (12, 56, 0) ) 43 | self.assertAlmostEqual(time, .538889, places=6) 44 | time = xldate.xldate_from_time_tuple( (17, 47, 13) ) 45 | self.assertAlmostEqual(time, .741123, places=6) 46 | 47 | def test_xldate_from_datetime_tuple(self): 48 | date = xldate.xldate_from_datetime_tuple( (1907, 7, 3, 6, 34, 0), DATEMODE) 49 | self.assertAlmostEqual(date, 2741.273611, places=6) 50 | date = xldate.xldate_from_datetime_tuple( (2005, 2, 23, 12, 56, 0), DATEMODE) 51 | self.assertAlmostEqual(date, 38406.538889, places=6) 52 | date = xldate.xldate_from_datetime_tuple( (1988, 5, 3, 17, 47, 13), DATEMODE) 53 | self.assertAlmostEqual(date, 32266.741123, places=6) 54 | 55 | if __name__=='__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /tests/test_xldate_to_datetime.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # 3 | # Tests for the xlrd xldate.xldate_as_datetime() function. 4 | # 5 | 6 | import unittest 7 | from datetime import datetime 8 | 9 | from xlrd import xldate 10 | 11 | not_1904 = False 12 | is_1904 = True 13 | 14 | 15 | class TestConvertToDateTime(unittest.TestCase): 16 | """ 17 | Testcases to test the _xldate_to_datetime() function against dates 18 | extracted from Excel files, with 1900/1904 epochs. 19 | 20 | """ 21 | 22 | def test_dates_and_times_1900_epoch(self): 23 | """ 24 | Test the _xldate_to_datetime() function for dates and times in 25 | the Excel standard 1900 epoch. 26 | 27 | """ 28 | # Test Excel dates strings and corresponding serial date numbers taken 29 | # from an Excel file. 30 | excel_dates = [ 31 | # Excel's 0.0 date in the 1900 epoch is 1 day before 1900. 32 | ('1899-12-31T00:00:00.000', 0), 33 | 34 | # Date/time before the false Excel 1900 leapday. 35 | ('1900-02-28T02:11:11.986', 59.09111094906), 36 | 37 | # Date/time after the false Excel 1900 leapday. 38 | ('1900-03-01T05:46:44.068', 61.24078782403), 39 | 40 | # Random date/times in Excel's 0-9999.9999+ range. 41 | ('1982-08-25T00:15:20.213', 30188.010650613425), 42 | ('2065-04-19T00:16:48.290', 60376.011670023145), 43 | ('3222-06-11T03:08:08.251', 483014.13065105322), 44 | ('4379-08-03T06:14:48.580', 905652.26028449077), 45 | ('5949-12-30T12:59:54.263', 1479232.5416002662), 46 | 47 | # End of Excel's date range. 48 | ('9999-12-31T23:59:59.000', 2958465.999988426), 49 | ] 50 | 51 | # Convert the Excel date strings to datetime objects and compare 52 | # against the dateitme return value of xldate.xldate_as_datetime(). 53 | for excel_date in excel_dates: 54 | exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f") 55 | got = xldate.xldate_as_datetime(excel_date[1], not_1904) 56 | 57 | self.assertEqual(got, exp) 58 | 59 | def test_dates_only_1900_epoch(self): 60 | """ 61 | Test the _xldate_to_datetime() function for dates in the Excel 62 | standard 1900 epoch. 63 | 64 | """ 65 | # Test Excel dates strings and corresponding serial date numbers taken 66 | # from an Excel file. 67 | excel_dates = [ 68 | # Excel's day 0 in the 1900 epoch is 1 day before 1900. 69 | ('1899-12-31', 0), 70 | 71 | # Excel's day 1 in the 1900 epoch. 72 | ('1900-01-01', 1), 73 | 74 | # Date/time before the false Excel 1900 leapday. 75 | ('1900-02-28', 59), 76 | 77 | # Date/time after the false Excel 1900 leapday. 78 | ('1900-03-01', 61), 79 | 80 | # Random date/times in Excel's 0-9999.9999+ range. 81 | ('1902-09-27', 1001), 82 | ('1999-12-31', 36525), 83 | ('2000-01-01', 36526), 84 | ('4000-12-31', 767376), 85 | ('4321-01-01', 884254), 86 | ('9999-01-01', 2958101), 87 | 88 | # End of Excel's date range. 89 | ('9999-12-31', 2958465), 90 | ] 91 | 92 | # Convert the Excel date strings to datetime objects and compare 93 | # against the dateitme return value of xldate.xldate_as_datetime(). 94 | for excel_date in excel_dates: 95 | exp = datetime.strptime(excel_date[0], "%Y-%m-%d") 96 | got = xldate.xldate_as_datetime(excel_date[1], not_1904) 97 | 98 | self.assertEqual(got, exp) 99 | 100 | def test_dates_only_1904_epoch(self): 101 | """ 102 | Test the _xldate_to_datetime() function for dates in the Excel 103 | Mac/1904 epoch. 104 | 105 | """ 106 | # Test Excel dates strings and corresponding serial date numbers taken 107 | # from an Excel file. 108 | excel_dates = [ 109 | # Excel's day 0 in the 1904 epoch. 110 | ('1904-01-01', 0), 111 | 112 | # Random date/times in Excel's 0-9999.9999+ range. 113 | ('1904-01-31', 30), 114 | ('1904-08-31', 243), 115 | ('1999-02-28', 34757), 116 | ('1999-12-31', 35063), 117 | ('2000-01-01', 35064), 118 | ('2400-12-31', 181526), 119 | ('4000-01-01', 765549), 120 | ('9999-01-01', 2956639), 121 | 122 | # End of Excel's date range. 123 | ('9999-12-31', 2957003), 124 | ] 125 | 126 | # Convert the Excel date strings to datetime objects and compare 127 | # against the dateitme return value of xldate.xldate_as_datetime(). 128 | for excel_date in excel_dates: 129 | exp = datetime.strptime(excel_date[0], "%Y-%m-%d") 130 | got = xldate.xldate_as_datetime(excel_date[1], is_1904) 131 | 132 | self.assertEqual(got, exp) 133 | 134 | def test_times_only(self): 135 | """ 136 | Test the _xldate_to_datetime() function for times only, i.e, the 137 | fractional part of the Excel date when the serial date is 0. 138 | 139 | """ 140 | # Test Excel dates strings and corresponding serial date numbers taken 141 | # from an Excel file. The 1899-12-31 date is Excel's day 0. 142 | excel_dates = [ 143 | # Random times in Excel's 0-0.9999+ range for 1 day. 144 | ('1899-12-31T00:00:00.000', 0), 145 | ('1899-12-31T00:15:20.213', 1.0650613425925924E-2), 146 | ('1899-12-31T02:24:37.095', 0.10042934027777778), 147 | ('1899-12-31T04:56:35.792', 0.2059698148148148), 148 | ('1899-12-31T07:31:20.407', 0.31343063657407405), 149 | ('1899-12-31T09:37:23.945', 0.40097158564814817), 150 | ('1899-12-31T12:09:48.602', 0.50681252314814818), 151 | ('1899-12-31T14:37:57.451', 0.60969271990740748), 152 | ('1899-12-31T17:04:02.415', 0.71113906250000003), 153 | ('1899-12-31T19:14:24.673', 0.80167445601851861), 154 | ('1899-12-31T21:39:05.944', 0.90215212962962965), 155 | ('1899-12-31T23:17:12.632', 0.97028509259259266), 156 | ('1899-12-31T23:59:59.999', 0.99999998842592586), 157 | ] 158 | 159 | # Convert the Excel date strings to datetime objects and compare 160 | # against the dateitme return value of xldate.xldate_as_datetime(). 161 | for excel_date in excel_dates: 162 | exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f") 163 | got = xldate.xldate_as_datetime(excel_date[1], not_1904) 164 | 165 | self.assertEqual(got, exp) 166 | -------------------------------------------------------------------------------- /xlrd/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd 2 | # This module is part of the xlrd package, which is released under a 3 | # BSD-style licence. 4 | import os 5 | import pprint 6 | import sys 7 | import zipfile 8 | 9 | from . import timemachine 10 | from .biffh import ( 11 | XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR, 12 | XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num, 13 | error_text_from_code, 14 | ) 15 | from .book import Book, colname, open_workbook_xls 16 | from .compdoc import SIGNATURE as XLS_SIGNATURE 17 | from .formula import * # is constrained by __all__ 18 | from .info import __VERSION__, __version__ 19 | from .sheet import empty_cell 20 | from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple 21 | 22 | 23 | #: descriptions of the file types :mod:`xlrd` can :func:`inspect `. 24 | FILE_FORMAT_DESCRIPTIONS = { 25 | 'xls': 'Excel xls', 26 | 'xlsb': 'Excel 2007 xlsb file', 27 | 'xlsx': 'Excel xlsx file', 28 | 'ods': 'Openoffice.org ODS file', 29 | 'zip': 'Unknown ZIP file', 30 | None: 'Unknown file type', 31 | } 32 | 33 | ZIP_SIGNATURE = b"PK\x03\x04" 34 | 35 | PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE)) 36 | 37 | 38 | def inspect_format(path=None, content=None): 39 | """ 40 | Inspect the content at the supplied path or the :class:`bytes` content provided 41 | and return the file's type as a :class:`str`, or ``None`` if it cannot 42 | be determined. 43 | 44 | :param path: 45 | A :class:`string ` path containing the content to inspect. 46 | ``~`` will be expanded. 47 | 48 | :param content: 49 | The :class:`bytes` content to inspect. 50 | 51 | :returns: 52 | A :class:`str`, or ``None`` if the format cannot be determined. 53 | The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS` 54 | to return a human-readable description of the format found. 55 | """ 56 | if content: 57 | peek = content[:PEEK_SIZE] 58 | else: 59 | path = os.path.expanduser(path) 60 | with open(path, "rb") as f: 61 | peek = f.read(PEEK_SIZE) 62 | 63 | if peek.startswith(XLS_SIGNATURE): 64 | return 'xls' 65 | 66 | if peek.startswith(ZIP_SIGNATURE): 67 | zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path) 68 | 69 | # Workaround for some third party files that use forward slashes and 70 | # lower case names. We map the expected name in lowercase to the 71 | # actual filename in the zip container. 72 | component_names = {name.replace('\\', '/').lower(): name 73 | for name in zf.namelist()} 74 | 75 | if 'xl/workbook.xml' in component_names: 76 | return 'xlsx' 77 | if 'xl/workbook.bin' in component_names: 78 | return 'xlsb' 79 | if 'content.xml' in component_names: 80 | return 'ods' 81 | return 'zip' 82 | 83 | 84 | def open_workbook(filename=None, 85 | logfile=sys.stdout, 86 | verbosity=0, 87 | use_mmap=True, 88 | file_contents=None, 89 | encoding_override=None, 90 | formatting_info=False, 91 | on_demand=False, 92 | ragged_rows=False, 93 | ignore_workbook_corruption=False 94 | ): 95 | """ 96 | Open a spreadsheet file for data extraction. 97 | 98 | :param filename: The path to the spreadsheet file to be opened. 99 | 100 | :param logfile: An open file to which messages and diagnostics are written. 101 | 102 | :param verbosity: Increases the volume of trace material written to the 103 | logfile. 104 | 105 | :param use_mmap: 106 | 107 | Whether to use the mmap module is determined heuristically. 108 | Use this arg to override the result. 109 | 110 | Current heuristic: mmap is used if it exists. 111 | 112 | :param file_contents: 113 | 114 | A string or an :class:`mmap.mmap` object or some other behave-alike 115 | object. If ``file_contents`` is supplied, ``filename`` will not be used, 116 | except (possibly) in messages. 117 | 118 | :param encoding_override: 119 | 120 | Used to overcome missing or bad codepage information 121 | in older-version files. See :doc:`unicode`. 122 | 123 | :param formatting_info: 124 | 125 | The default is ``False``, which saves memory. 126 | In this case, "Blank" cells, which are those with their own formatting 127 | information but no data, are treated as empty by ignoring the file's 128 | ``BLANK`` and ``MULBLANK`` records. 129 | This cuts off any bottom or right "margin" of rows of empty or blank 130 | cells. 131 | Only :meth:`~xlrd.sheet.Sheet.cell_value` and 132 | :meth:`~xlrd.sheet.Sheet.cell_type` are available. 133 | 134 | When ``True``, formatting information will be read from the spreadsheet 135 | file. This provides all cells, including empty and blank cells. 136 | Formatting information is available for each cell. 137 | 138 | Note that this will raise a NotImplementedError when used with an 139 | xlsx file. 140 | 141 | :param on_demand: 142 | 143 | Governs whether sheets are all loaded initially or when demanded 144 | by the caller. See :doc:`on_demand`. 145 | 146 | :param ragged_rows: 147 | 148 | The default of ``False`` means all rows are padded out with empty cells so 149 | that all rows have the same size as found in 150 | :attr:`~xlrd.sheet.Sheet.ncols`. 151 | 152 | ``True`` means that there are no empty cells at the ends of rows. 153 | This can result in substantial memory savings if rows are of widely 154 | varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method. 155 | 156 | 157 | :param ignore_workbook_corruption: 158 | 159 | This option allows to read corrupted workbooks. 160 | When ``False`` you may face CompDocError: Workbook corruption. 161 | When ``True`` that exception will be ignored. 162 | 163 | :returns: An instance of the :class:`~xlrd.book.Book` class. 164 | """ 165 | 166 | file_format = inspect_format(filename, file_contents) 167 | # We have to let unknown file formats pass through here, as some ancient 168 | # files that xlrd can parse don't start with the expected signature. 169 | if file_format and file_format != 'xls': 170 | raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported') 171 | 172 | bk = open_workbook_xls( 173 | filename=filename, 174 | logfile=logfile, 175 | verbosity=verbosity, 176 | use_mmap=use_mmap, 177 | file_contents=file_contents, 178 | encoding_override=encoding_override, 179 | formatting_info=formatting_info, 180 | on_demand=on_demand, 181 | ragged_rows=ragged_rows, 182 | ignore_workbook_corruption=ignore_workbook_corruption, 183 | ) 184 | 185 | return bk 186 | 187 | 188 | def dump(filename, outfile=sys.stdout, unnumbered=False): 189 | """ 190 | For debugging: dump an XLS file's BIFF records in char & hex. 191 | 192 | :param filename: The path to the file to be dumped. 193 | :param outfile: An open file, to which the dump is written. 194 | :param unnumbered: If true, omit offsets (for meaningful diffs). 195 | """ 196 | from .biffh import biff_dump 197 | bk = Book() 198 | bk.biff2_8_load(filename=filename, logfile=outfile, ) 199 | biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered) 200 | 201 | 202 | def count_records(filename, outfile=sys.stdout): 203 | """ 204 | For debugging and analysis: summarise the file's BIFF records. 205 | ie: produce a sorted file of ``(record_name, count)``. 206 | 207 | :param filename: The path to the file to be summarised. 208 | :param outfile: An open file, to which the summary is written. 209 | """ 210 | from .biffh import biff_count_records 211 | bk = Book() 212 | bk.biff2_8_load(filename=filename, logfile=outfile, ) 213 | biff_count_records(bk.mem, bk.base, bk.stream_len, outfile) 214 | -------------------------------------------------------------------------------- /xlrd/biffh.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd 3 | # This module is part of the xlrd package, which is released under a 4 | # BSD-style licence. 5 | from __future__ import print_function 6 | 7 | import sys 8 | from struct import unpack 9 | 10 | from .timemachine import * 11 | 12 | DEBUG = 0 13 | 14 | 15 | 16 | class XLRDError(Exception): 17 | """ 18 | An exception indicating problems reading data from an Excel file. 19 | """ 20 | 21 | 22 | class BaseObject(object): 23 | """ 24 | Parent of almost all other classes in the package. Defines a common 25 | :meth:`dump` method for debugging. 26 | """ 27 | 28 | _repr_these = [] 29 | 30 | 31 | def dump(self, f=None, header=None, footer=None, indent=0): 32 | """ 33 | :param f: open file object, to which the dump is written 34 | :param header: text to write before the dump 35 | :param footer: text to write after the dump 36 | :param indent: number of leading spaces (for recursive calls) 37 | """ 38 | if f is None: 39 | f = sys.stderr 40 | if hasattr(self, "__slots__"): 41 | alist = [] 42 | for attr in self.__slots__: 43 | alist.append((attr, getattr(self, attr))) 44 | else: 45 | alist = self.__dict__.items() 46 | alist = sorted(alist) 47 | pad = " " * indent 48 | if header is not None: print(header, file=f) 49 | list_type = type([]) 50 | dict_type = type({}) 51 | for attr, value in alist: 52 | if getattr(value, 'dump', None) and attr != 'book': 53 | value.dump(f, 54 | header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), 55 | indent=indent+4) 56 | elif (attr not in self._repr_these and 57 | (isinstance(value, list_type) or isinstance(value, dict_type))): 58 | print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f) 59 | else: 60 | fprintf(f, "%s%s: %r\n", pad, attr, value) 61 | if footer is not None: print(footer, file=f) 62 | 63 | FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text 64 | DATEFORMAT = FDT 65 | NUMBERFORMAT = FNU 66 | 67 | ( 68 | XL_CELL_EMPTY, 69 | XL_CELL_TEXT, 70 | XL_CELL_NUMBER, 71 | XL_CELL_DATE, 72 | XL_CELL_BOOLEAN, 73 | XL_CELL_ERROR, 74 | XL_CELL_BLANK, # for use in debugging, gathering stats, etc 75 | ) = range(7) 76 | 77 | biff_text_from_num = { 78 | 0: "(not BIFF)", 79 | 20: "2.0", 80 | 21: "2.1", 81 | 30: "3", 82 | 40: "4S", 83 | 45: "4W", 84 | 50: "5", 85 | 70: "7", 86 | 80: "8", 87 | 85: "8X", 88 | } 89 | 90 | #: This dictionary can be used to produce a text version of the internal codes 91 | #: that Excel uses for error cells. 92 | error_text_from_code = { 93 | 0x00: '#NULL!', # Intersection of two cell ranges is empty 94 | 0x07: '#DIV/0!', # Division by zero 95 | 0x0F: '#VALUE!', # Wrong type of operand 96 | 0x17: '#REF!', # Illegal or deleted cell reference 97 | 0x1D: '#NAME?', # Wrong function or range name 98 | 0x24: '#NUM!', # Value range overflow 99 | 0x2A: '#N/A', # Argument or function not available 100 | } 101 | 102 | BIFF_FIRST_UNICODE = 80 103 | 104 | XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 105 | XL_WORKBOOK_GLOBALS_4W = 0x100 106 | XL_WORKSHEET = WRKSHEET = 0x10 107 | 108 | XL_BOUNDSHEET_WORKSHEET = 0x00 109 | XL_BOUNDSHEET_CHART = 0x02 110 | XL_BOUNDSHEET_VB_MODULE = 0x06 111 | 112 | # XL_RK2 = 0x7e 113 | XL_ARRAY = 0x0221 114 | XL_ARRAY2 = 0x0021 115 | XL_BLANK = 0x0201 116 | XL_BLANK_B2 = 0x01 117 | XL_BOF = 0x809 118 | XL_BOOLERR = 0x205 119 | XL_BOOLERR_B2 = 0x5 120 | XL_BOUNDSHEET = 0x85 121 | XL_BUILTINFMTCOUNT = 0x56 122 | XL_CF = 0x01B1 123 | XL_CODEPAGE = 0x42 124 | XL_COLINFO = 0x7D 125 | XL_COLUMNDEFAULT = 0x20 # BIFF2 only 126 | XL_COLWIDTH = 0x24 # BIFF2 only 127 | XL_CONDFMT = 0x01B0 128 | XL_CONTINUE = 0x3c 129 | XL_COUNTRY = 0x8C 130 | XL_DATEMODE = 0x22 131 | XL_DEFAULTROWHEIGHT = 0x0225 132 | XL_DEFCOLWIDTH = 0x55 133 | XL_DIMENSION = 0x200 134 | XL_DIMENSION2 = 0x0 135 | XL_EFONT = 0x45 136 | XL_EOF = 0x0a 137 | XL_EXTERNNAME = 0x23 138 | XL_EXTERNSHEET = 0x17 139 | XL_EXTSST = 0xff 140 | XL_FEAT11 = 0x872 141 | XL_FILEPASS = 0x2f 142 | XL_FONT = 0x31 143 | XL_FONT_B3B4 = 0x231 144 | XL_FORMAT = 0x41e 145 | XL_FORMAT2 = 0x1E # BIFF2, BIFF3 146 | XL_FORMULA = 0x6 147 | XL_FORMULA3 = 0x206 148 | XL_FORMULA4 = 0x406 149 | XL_GCW = 0xab 150 | XL_HLINK = 0x01B8 151 | XL_QUICKTIP = 0x0800 152 | XL_HORIZONTALPAGEBREAKS = 0x1b 153 | XL_INDEX = 0x20b 154 | XL_INTEGER = 0x2 # BIFF2 only 155 | XL_IXFE = 0x44 # BIFF2 only 156 | XL_LABEL = 0x204 157 | XL_LABEL_B2 = 0x04 158 | XL_LABELRANGES = 0x15f 159 | XL_LABELSST = 0xfd 160 | XL_LEFTMARGIN = 0x26 161 | XL_TOPMARGIN = 0x28 162 | XL_RIGHTMARGIN = 0x27 163 | XL_BOTTOMMARGIN = 0x29 164 | XL_HEADER = 0x14 165 | XL_FOOTER = 0x15 166 | XL_HCENTER = 0x83 167 | XL_VCENTER = 0x84 168 | XL_MERGEDCELLS = 0xE5 169 | XL_MSO_DRAWING = 0x00EC 170 | XL_MSO_DRAWING_GROUP = 0x00EB 171 | XL_MSO_DRAWING_SELECTION = 0x00ED 172 | XL_MULRK = 0xbd 173 | XL_MULBLANK = 0xbe 174 | XL_NAME = 0x18 175 | XL_NOTE = 0x1c 176 | XL_NUMBER = 0x203 177 | XL_NUMBER_B2 = 0x3 178 | XL_OBJ = 0x5D 179 | XL_PAGESETUP = 0xA1 180 | XL_PALETTE = 0x92 181 | XL_PANE = 0x41 182 | XL_PRINTGRIDLINES = 0x2B 183 | XL_PRINTHEADERS = 0x2A 184 | XL_RK = 0x27e 185 | XL_ROW = 0x208 186 | XL_ROW_B2 = 0x08 187 | XL_RSTRING = 0xd6 188 | XL_SCL = 0x00A0 189 | XL_SHEETHDR = 0x8F # BIFF4W only 190 | XL_SHEETPR = 0x81 191 | XL_SHEETSOFFSET = 0x8E # BIFF4W only 192 | XL_SHRFMLA = 0x04bc 193 | XL_SST = 0xfc 194 | XL_STANDARDWIDTH = 0x99 195 | XL_STRING = 0x207 196 | XL_STRING_B2 = 0x7 197 | XL_STYLE = 0x293 198 | XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs 199 | XL_TABLEOP = 0x236 200 | XL_TABLEOP2 = 0x37 201 | XL_TABLEOP_B2 = 0x36 202 | XL_TXO = 0x1b6 203 | XL_UNCALCED = 0x5e 204 | XL_UNKNOWN = 0xffff 205 | XL_VERTICALPAGEBREAKS = 0x1a 206 | XL_WINDOW2 = 0x023E 207 | XL_WINDOW2_B2 = 0x003E 208 | XL_WRITEACCESS = 0x5C 209 | XL_WSBOOL = XL_SHEETPR 210 | XL_XF = 0xe0 211 | XL_XF2 = 0x0043 # BIFF2 version of XF record 212 | XL_XF3 = 0x0243 # BIFF3 version of XF record 213 | XL_XF4 = 0x0443 # BIFF4 version of XF record 214 | 215 | boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4} 216 | bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) 217 | 218 | XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) 219 | 220 | _cell_opcode_list = [ 221 | XL_BOOLERR, 222 | XL_FORMULA, 223 | XL_FORMULA3, 224 | XL_FORMULA4, 225 | XL_LABEL, 226 | XL_LABELSST, 227 | XL_MULRK, 228 | XL_NUMBER, 229 | XL_RK, 230 | XL_RSTRING, 231 | ] 232 | _cell_opcode_dict = {} 233 | for _cell_opcode in _cell_opcode_list: 234 | _cell_opcode_dict[_cell_opcode] = 1 235 | 236 | def is_cell_opcode(c): 237 | return c in _cell_opcode_dict 238 | 239 | def upkbits(tgt_obj, src, manifest, local_setattr=setattr): 240 | for n, mask, attr in manifest: 241 | local_setattr(tgt_obj, attr, (src & mask) >> n) 242 | 243 | def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): 244 | for n, mask, attr in manifest: 245 | local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) 246 | 247 | def unpack_string(data, pos, encoding, lenlen=1): 248 | nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] 249 | pos += lenlen 250 | return unicode(data[pos:pos+nchars], encoding) 251 | 252 | def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): 253 | if known_len is not None: 254 | # On a NAME record, the length byte is detached from the front of the string. 255 | nchars = known_len 256 | else: 257 | nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] 258 | pos += lenlen 259 | newpos = pos + nchars 260 | return (unicode(data[pos:newpos], encoding), newpos) 261 | 262 | def unpack_unicode(data, pos, lenlen=2): 263 | "Return unicode_strg" 264 | nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] 265 | if not nchars: 266 | # Ambiguous whether 0-length string should have an "options" byte. 267 | # Avoid crash if missing. 268 | return UNICODE_LITERAL("") 269 | pos += lenlen 270 | options = BYTES_ORD(data[pos]) 271 | pos += 1 272 | # phonetic = options & 0x04 273 | # richtext = options & 0x08 274 | if options & 0x08: 275 | # rt = unpack(' endpos=%d pos=%d endsub=%d substrg=%r\n', 534 | ofs, dlen, base, endpos, pos, endsub, substrg) 535 | break 536 | hexd = ''.join("%02x " % BYTES_ORD(c) for c in substrg) 537 | 538 | chard = '' 539 | for c in substrg: 540 | c = chr(BYTES_ORD(c)) 541 | if c == '\0': 542 | c = '~' 543 | elif not (' ' <= c <= '~'): 544 | c = '?' 545 | chard += c 546 | if numbered: 547 | num_prefix = "%5d: " % (base+pos-ofs) 548 | 549 | fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard) 550 | pos = endsub 551 | 552 | def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False): 553 | pos = stream_offset 554 | stream_end = stream_offset + stream_len 555 | adj = base - stream_offset 556 | dummies = 0 557 | numbered = not unnumbered 558 | num_prefix = '' 559 | while stream_end - pos >= 4: 560 | rc, length = unpack('') 580 | if numbered: 581 | num_prefix = "%5d: " % (adj + pos) 582 | fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length) 583 | pos += 4 584 | hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered) 585 | pos += length 586 | if dummies: 587 | if numbered: 588 | num_prefix = "%5d: " % (adj + savpos) 589 | fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies) 590 | if pos < stream_end: 591 | if numbered: 592 | num_prefix = "%5d: " % (adj + pos) 593 | fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix) 594 | hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered) 595 | elif pos > stream_end: 596 | fprintf(fout, "Last dumped record has length (%d) that is too large\n", length) 597 | 598 | def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout): 599 | pos = stream_offset 600 | stream_end = stream_offset + stream_len 601 | tally = {} 602 | while stream_end - pos >= 4: 603 | rc, length = unpack(' 20: # allows for 2**20 bytes i.e. 1MB 98 | print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." 99 | % ssz, file=logfile) 100 | ssz = 9 101 | if sssz > ssz: 102 | print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." 103 | % sssz, file=logfile) 104 | sssz = 6 105 | self.sec_size = sec_size = 1 << ssz 106 | self.short_sec_size = 1 << sssz 107 | if self.sec_size != 512 or self.short_sec_size != 64: 108 | print("@@@@ sec_size=%d short_sec_size=%d" % (self.sec_size, self.short_sec_size), file=logfile) 109 | ( 110 | SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream, 111 | SSAT_first_sec_sid, SSAT_tot_secs, 112 | MSATX_first_sec_sid, MSATX_tot_secs, 113 | ) = unpack(' 1: 153 | print('MSATX: sid=%d (0x%08X)' % (sid, sid), file=logfile) 154 | if sid >= mem_data_secs: 155 | msg = "MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs) 156 | if DEBUG > 1: 157 | print(msg, file=logfile) 158 | break 159 | raise CompDocError(msg) 160 | elif sid < 0: 161 | raise CompDocError("MSAT extension: invalid sector id: %d" % sid) 162 | if seen[sid]: 163 | raise CompDocError("MSAT corruption: seen[%d] == %d" % (sid, seen[sid])) 164 | seen[sid] = 1 165 | actual_MSATX_sectors += 1 166 | if DEBUG and actual_MSATX_sectors > expected_MSATX_sectors: 167 | print("[1]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, file=logfile) 168 | offset = 512 + sec_size * sid 169 | MSAT.extend(unpack(fmt, mem[offset:offset+sec_size])) 170 | sid = MSAT.pop() # last sector id is sid of next sector in the chain 171 | 172 | if DEBUG and actual_MSATX_sectors != expected_MSATX_sectors: 173 | print("[2]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, file=logfile) 174 | if DEBUG: 175 | print("MSAT: len =", len(MSAT), file=logfile) 176 | dump_list(MSAT, 10, logfile) 177 | # 178 | # === build the SAT === 179 | # 180 | self.SAT = [] 181 | actual_SAT_sectors = 0 182 | dump_again = 0 183 | for msidx in xrange(len(MSAT)): 184 | msid = MSAT[msidx] 185 | if msid in (FREESID, EOCSID): 186 | # Specification: the MSAT array may be padded with trailing FREESID entries. 187 | # Toleration: a FREESID or EOCSID entry anywhere in the MSAT array will be ignored. 188 | continue 189 | if msid >= mem_data_secs: 190 | if not trunc_warned: 191 | print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile) 192 | print("INFO: Trying to access sector %d but only %d available" 193 | % (msid, mem_data_secs), file=logfile) 194 | trunc_warned = 1 195 | MSAT[msidx] = EVILSID 196 | dump_again = 1 197 | continue 198 | elif msid < -2: 199 | raise CompDocError("MSAT: invalid sector id: %d" % msid) 200 | if seen[msid]: 201 | raise CompDocError("MSAT extension corruption: seen[%d] == %d" % (msid, seen[msid])) 202 | seen[msid] = 2 203 | actual_SAT_sectors += 1 204 | if DEBUG and actual_SAT_sectors > SAT_sectors_reqd: 205 | print("[3]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, actual_SAT_sectors, msid, file=logfile) 206 | offset = 512 + sec_size * msid 207 | self.SAT.extend(unpack(fmt, mem[offset:offset+sec_size])) 208 | 209 | if DEBUG: 210 | print("SAT: len =", len(self.SAT), file=logfile) 211 | dump_list(self.SAT, 10, logfile) 212 | # print >> logfile, "SAT ", 213 | # for i, s in enumerate(self.SAT): 214 | # print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s) 215 | # print >> logfile, "%d:%d " % (i, s), 216 | print(file=logfile) 217 | if DEBUG and dump_again: 218 | print("MSAT: len =", len(MSAT), file=logfile) 219 | dump_list(MSAT, 10, logfile) 220 | for satx in xrange(mem_data_secs, len(self.SAT)): 221 | self.SAT[satx] = EVILSID 222 | print("SAT: len =", len(self.SAT), file=logfile) 223 | dump_list(self.SAT, 10, logfile) 224 | # 225 | # === build the directory === 226 | # 227 | dbytes = self._get_stream( 228 | self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid, 229 | name="directory", seen_id=3) 230 | dirlist = [] 231 | did = -1 232 | for pos in xrange(0, len(dbytes), 128): 233 | did += 1 234 | dirlist.append(DirNode(did, dbytes[pos:pos+128], 0, logfile)) 235 | self.dirlist = dirlist 236 | _build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ... 237 | if DEBUG: 238 | for d in dirlist: 239 | d.dump(DEBUG) 240 | # 241 | # === get the SSCS === 242 | # 243 | sscs_dir = self.dirlist[0] 244 | assert sscs_dir.etype == 5 # root entry 245 | if sscs_dir.first_SID < 0 or sscs_dir.tot_size == 0: 246 | # Problem reported by Frank Hoffsuemmer: some software was 247 | # writing -1 instead of -2 (EOCSID) for the first_SID 248 | # when the SCCS was empty. Not having EOCSID caused assertion 249 | # failure in _get_stream. 250 | # Solution: avoid calling _get_stream in any case when the 251 | # SCSS appears to be empty. 252 | self.SSCS = "" 253 | else: 254 | self.SSCS = self._get_stream( 255 | self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID, 256 | sscs_dir.tot_size, name="SSCS", seen_id=4) 257 | # if DEBUG: print >> logfile, "SSCS", repr(self.SSCS) 258 | # 259 | # === build the SSAT === 260 | # 261 | self.SSAT = [] 262 | if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0: 263 | print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile) 264 | if sscs_dir.tot_size > 0: 265 | sid = SSAT_first_sec_sid 266 | nsecs = SSAT_tot_secs 267 | while sid >= 0 and nsecs > 0: 268 | if seen[sid]: 269 | raise CompDocError("SSAT corruption: seen[%d] == %d" % (sid, seen[sid])) 270 | seen[sid] = 5 271 | nsecs -= 1 272 | start_pos = 512 + sid * sec_size 273 | news = list(unpack(fmt, mem[start_pos:start_pos+sec_size])) 274 | self.SSAT.extend(news) 275 | sid = self.SAT[sid] 276 | if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile) 277 | assert nsecs == 0 and sid == EOCSID 278 | if DEBUG: 279 | print("SSAT", file=logfile) 280 | dump_list(self.SSAT, 10, logfile) 281 | if DEBUG: 282 | print("seen", file=logfile) 283 | dump_list(seen, 20, logfile) 284 | 285 | def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name='', seen_id=None): 286 | # print >> self.logfile, "_get_stream", base, sec_size, start_sid, size 287 | sectors = [] 288 | s = start_sid 289 | if size is None: 290 | # nothing to check against 291 | while s >= 0: 292 | if seen_id is not None: 293 | if self.seen[s]: 294 | raise CompDocError("%s corruption: seen[%d] == %d" % (name, s, self.seen[s])) 295 | self.seen[s] = seen_id 296 | start_pos = base + s * sec_size 297 | sectors.append(mem[start_pos:start_pos+sec_size]) 298 | try: 299 | s = sat[s] 300 | except IndexError: 301 | raise CompDocError( 302 | "OLE2 stream %r: sector allocation table invalid entry (%d)" % 303 | (name, s) 304 | ) 305 | assert s == EOCSID 306 | else: 307 | todo = size 308 | while s >= 0: 309 | if seen_id is not None: 310 | if self.seen[s]: 311 | raise CompDocError("%s corruption: seen[%d] == %d" % (name, s, self.seen[s])) 312 | self.seen[s] = seen_id 313 | start_pos = base + s * sec_size 314 | grab = sec_size 315 | if grab > todo: 316 | grab = todo 317 | todo -= grab 318 | sectors.append(mem[start_pos:start_pos+grab]) 319 | try: 320 | s = sat[s] 321 | except IndexError: 322 | raise CompDocError( 323 | "OLE2 stream %r: sector allocation table invalid entry (%d)" % 324 | (name, s) 325 | ) 326 | assert s == EOCSID 327 | if todo != 0: 328 | fprintf(self.logfile, 329 | "WARNING *** OLE2 stream %r: expected size %d, actual size %d\n", 330 | name, size, size - todo) 331 | 332 | return b''.join(sectors) 333 | 334 | def _dir_search(self, path, storage_DID=0): 335 | # Return matching DirNode instance, or None 336 | head = path[0] 337 | tail = path[1:] 338 | dl = self.dirlist 339 | for child in dl[storage_DID].children: 340 | if dl[child].name.lower() == head.lower(): 341 | et = dl[child].etype 342 | if et == 2: 343 | return dl[child] 344 | if et == 1: 345 | if not tail: 346 | raise CompDocError("Requested component is a 'storage'") 347 | return self._dir_search(tail, child) 348 | dl[child].dump(1) 349 | raise CompDocError("Requested stream is not a 'user stream'") 350 | return None 351 | 352 | 353 | def get_named_stream(self, qname): 354 | """ 355 | Interrogate the compound document's directory; return the stream as a 356 | string if found, otherwise return ``None``. 357 | 358 | :param qname: 359 | Name of the desired stream e.g. ``'Workbook'``. 360 | Should be in Unicode or convertible thereto. 361 | """ 362 | d = self._dir_search(qname.split("/")) 363 | if d is None: 364 | return None 365 | if d.tot_size >= self.min_size_std_stream: 366 | return self._get_stream( 367 | self.mem, 512, self.SAT, self.sec_size, d.first_SID, 368 | d.tot_size, name=qname, seen_id=d.DID+6) 369 | else: 370 | return self._get_stream( 371 | self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, 372 | d.tot_size, name=qname + " (from SSCS)", seen_id=None) 373 | 374 | def locate_named_stream(self, qname): 375 | """ 376 | Interrogate the compound document's directory. 377 | 378 | If the named stream is not found, ``(None, 0, 0)`` will be returned. 379 | 380 | If the named stream is found and is contiguous within the original 381 | byte sequence (``mem``) used when the document was opened, 382 | then ``(mem, offset_to_start_of_stream, length_of_stream)`` is returned. 383 | 384 | Otherwise a new string is built from the fragments and 385 | ``(new_string, 0, length_of_stream)`` is returned. 386 | 387 | :param qname: 388 | Name of the desired stream e.g. ``'Workbook'``. 389 | Should be in Unicode or convertible thereto. 390 | """ 391 | d = self._dir_search(qname.split("/")) 392 | if d is None: 393 | return (None, 0, 0) 394 | if d.tot_size > self.mem_data_len: 395 | raise CompDocError("%r stream length (%d bytes) > file data size (%d bytes)" 396 | % (qname, d.tot_size, self.mem_data_len)) 397 | if d.tot_size >= self.min_size_std_stream: 398 | result = self._locate_stream( 399 | self.mem, 512, self.SAT, self.sec_size, d.first_SID, 400 | d.tot_size, qname, d.DID+6) 401 | if self.DEBUG: 402 | print("\nseen", file=self.logfile) 403 | dump_list(self.seen, 20, self.logfile) 404 | return result 405 | else: 406 | return ( 407 | self._get_stream( 408 | self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, 409 | d.tot_size, qname + " (from SSCS)", None), 410 | 0, 411 | d.tot_size, 412 | ) 413 | 414 | def _locate_stream(self, mem, base, sat, sec_size, start_sid, expected_stream_size, qname, seen_id): 415 | # print >> self.logfile, "_locate_stream", base, sec_size, start_sid, expected_stream_size 416 | s = start_sid 417 | if s < 0: 418 | raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid) 419 | p = -99 # dummy previous SID 420 | start_pos = -9999 421 | end_pos = -8888 422 | slices = [] 423 | tot_found = 0 424 | found_limit = (expected_stream_size + sec_size - 1) // sec_size 425 | while s >= 0: 426 | if self.seen[s]: 427 | if not self.ignore_workbook_corruption: 428 | print("_locate_stream(%s): seen" % qname, file=self.logfile); dump_list(self.seen, 20, self.logfile) 429 | raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s])) 430 | self.seen[s] = seen_id 431 | tot_found += 1 432 | if tot_found > found_limit: 433 | # Note: expected size rounded up to higher sector 434 | raise CompDocError( 435 | "%s: size exceeds expected %d bytes; corrupt?" 436 | % (qname, found_limit * sec_size) 437 | ) 438 | if s == p+1: 439 | # contiguous sectors 440 | end_pos += sec_size 441 | else: 442 | # start new slice 443 | if p >= 0: 444 | # not first time 445 | slices.append((start_pos, end_pos)) 446 | start_pos = base + s * sec_size 447 | end_pos = start_pos + sec_size 448 | p = s 449 | s = sat[s] 450 | assert s == EOCSID 451 | assert tot_found == found_limit 452 | # print >> self.logfile, "_locate_stream(%s): seen" % qname; dump_list(self.seen, 20, self.logfile) 453 | if not slices: 454 | # The stream is contiguous ... just what we like! 455 | return (mem, start_pos, expected_stream_size) 456 | slices.append((start_pos, end_pos)) 457 | # print >> self.logfile, "+++>>> %d fragments" % len(slices) 458 | return (b''.join(mem[start_pos:end_pos] for start_pos, end_pos in slices), 0, expected_stream_size) 459 | 460 | # ========================================================================================== 461 | def x_dump_line(alist, stride, f, dpos, equal=0): 462 | print("%5d%s" % (dpos, " ="[equal]), end=' ', file=f) 463 | for value in alist[dpos:dpos + stride]: 464 | print(str(value), end=' ', file=f) 465 | print(file=f) 466 | 467 | def dump_list(alist, stride, f=sys.stdout): 468 | def _dump_line(dpos, equal=0): 469 | print("%5d%s" % (dpos, " ="[equal]), end=' ', file=f) 470 | for value in alist[dpos:dpos + stride]: 471 | print(str(value), end=' ', file=f) 472 | print(file=f) 473 | pos = None 474 | oldpos = None 475 | for pos in xrange(0, len(alist), stride): 476 | if oldpos is None: 477 | _dump_line(pos) 478 | oldpos = pos 479 | elif alist[pos:pos+stride] != alist[oldpos:oldpos+stride]: 480 | if pos - oldpos > stride: 481 | _dump_line(pos - stride, equal=1) 482 | _dump_line(pos) 483 | oldpos = pos 484 | if oldpos is not None and pos is not None and pos != oldpos: 485 | _dump_line(pos, equal=1) 486 | -------------------------------------------------------------------------------- /xlrd/info.py: -------------------------------------------------------------------------------- 1 | __version__ = __VERSION__ = "2.0.1" 2 | -------------------------------------------------------------------------------- /xlrd/timemachine.py: -------------------------------------------------------------------------------- 1 | ## 2 | #

Copyright (c) 2006-2012 Stephen John Machin, Lingfo Pty Ltd

3 | #

This module is part of the xlrd package, which is released under a BSD-style licence.

4 | ## 5 | 6 | # timemachine.py -- adaptation for single codebase. 7 | # Currently supported: 2.6 to 2.7, 3.2+ 8 | # usage: from timemachine import * 9 | 10 | from __future__ import print_function 11 | 12 | import sys 13 | 14 | python_version = sys.version_info[:2] # e.g. version 2.6 -> (2, 6) 15 | 16 | if python_version >= (3, 0): 17 | # Python 3 18 | BYTES_LITERAL = lambda x: x.encode('latin1') 19 | UNICODE_LITERAL = lambda x: x 20 | BYTES_ORD = lambda byte: byte 21 | from io import BytesIO as BYTES_IO 22 | def fprintf(f, fmt, *vargs): 23 | fmt = fmt.replace("%r", "%a") 24 | if fmt.endswith('\n'): 25 | print(fmt[:-1] % vargs, file=f) 26 | else: 27 | print(fmt % vargs, end=' ', file=f) 28 | EXCEL_TEXT_TYPES = (str, bytes, bytearray) # xlwt: isinstance(obj, EXCEL_TEXT_TYPES) 29 | REPR = ascii 30 | xrange = range 31 | unicode = lambda b, enc: b.decode(enc) 32 | ensure_unicode = lambda s: s 33 | unichr = chr 34 | else: 35 | # Python 2 36 | BYTES_LITERAL = lambda x: x 37 | UNICODE_LITERAL = lambda x: x.decode('latin1') 38 | BYTES_ORD = ord 39 | from cStringIO import StringIO as BYTES_IO 40 | def fprintf(f, fmt, *vargs): 41 | if fmt.endswith('\n'): 42 | print(fmt[:-1] % vargs, file=f) 43 | else: 44 | print(fmt % vargs, end=' ', file=f) 45 | try: 46 | EXCEL_TEXT_TYPES = basestring # xlwt: isinstance(obj, EXCEL_TEXT_TYPES) 47 | except NameError: 48 | EXCEL_TEXT_TYPES = (str, unicode) 49 | REPR = repr 50 | xrange = xrange 51 | # following used only to overcome 2.x ElementTree gimmick which 52 | # returns text as `str` if it's ascii, otherwise `unicode` 53 | ensure_unicode = unicode # used only in xlsx.py 54 | -------------------------------------------------------------------------------- /xlrd/xldate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd 3 | # This module is part of the xlrd package, which is released under a 4 | # BSD-style licence. 5 | # No part of the content of this file was derived from the works of David Giffin. 6 | """ 7 | Tools for working with dates and times in Excel files. 8 | 9 | The conversion from ``days`` to ``(year, month, day)`` starts with 10 | an integral "julian day number" aka JDN. 11 | FWIW: 12 | 13 | - JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713. 14 | 15 | More importantly: 16 | 17 | - Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 18 | - Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 19 | 20 | """ 21 | import datetime 22 | 23 | _JDN_delta = (2415080 - 61, 2416482 - 1) 24 | assert _JDN_delta[1] - _JDN_delta[0] == 1462 25 | 26 | # Pre-calculate the datetime epochs for efficiency. 27 | epoch_1904 = datetime.datetime(1904, 1, 1) 28 | epoch_1900 = datetime.datetime(1899, 12, 31) 29 | epoch_1900_minus_1 = datetime.datetime(1899, 12, 30) 30 | 31 | # This is equivalent to 10000-01-01: 32 | _XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) 33 | 34 | 35 | class XLDateError(ValueError): 36 | "A base class for all datetime-related errors." 37 | 38 | 39 | class XLDateNegative(XLDateError): 40 | "``xldate < 0.00``" 41 | 42 | 43 | class XLDateAmbiguous(XLDateError): 44 | "The 1900 leap-year problem ``(datemode == 0 and 1.0 <= xldate < 61.0)``" 45 | 46 | 47 | class XLDateTooLarge(XLDateError): 48 | "Gregorian year 10000 or later" 49 | 50 | 51 | class XLDateBadDatemode(XLDateError): 52 | "``datemode`` arg is neither 0 nor 1" 53 | 54 | 55 | class XLDateBadTuple(XLDateError): 56 | pass 57 | 58 | 59 | def xldate_as_tuple(xldate, datemode): 60 | """ 61 | Convert an Excel number (presumed to represent a date, a datetime or a time) into 62 | a tuple suitable for feeding to datetime or mx.DateTime constructors. 63 | 64 | :param xldate: The Excel number 65 | :param datemode: 0: 1900-based, 1: 1904-based. 66 | :raises xlrd.xldate.XLDateNegative: 67 | :raises xlrd.xldate.XLDateAmbiguous: 68 | 69 | :raises xlrd.xldate.XLDateTooLarge: 70 | :raises xlrd.xldate.XLDateBadDatemode: 71 | :raises xlrd.xldate.XLDateError: 72 | :returns: Gregorian ``(year, month, day, hour, minute, nearest_second)``. 73 | 74 | .. warning:: 75 | 76 | When using this function to interpret the contents of a workbook, you 77 | should pass in the :attr:`~xlrd.book.Book.datemode` 78 | attribute of that workbook. Whether the workbook has ever been anywhere 79 | near a Macintosh is irrelevant. 80 | 81 | .. admonition:: Special case 82 | 83 | If ``0.0 <= xldate < 1.0``, it is assumed to represent a time; 84 | ``(0, 0, 0, hour, minute, second)`` will be returned. 85 | 86 | .. note:: 87 | 88 | ``1904-01-01`` is not regarded as a valid date in the ``datemode==1`` 89 | system; its "serial number" is zero. 90 | """ 91 | if datemode not in (0, 1): 92 | raise XLDateBadDatemode(datemode) 93 | if xldate == 0.00: 94 | return (0, 0, 0, 0, 0, 0) 95 | if xldate < 0.00: 96 | raise XLDateNegative(xldate) 97 | xldays = int(xldate) 98 | frac = xldate - xldays 99 | seconds = int(round(frac * 86400.0)) 100 | assert 0 <= seconds <= 86400 101 | if seconds == 86400: 102 | hour = minute = second = 0 103 | xldays += 1 104 | else: 105 | # second = seconds % 60; minutes = seconds // 60 106 | minutes, second = divmod(seconds, 60) 107 | # minute = minutes % 60; hour = minutes // 60 108 | hour, minute = divmod(minutes, 60) 109 | if xldays >= _XLDAYS_TOO_LARGE[datemode]: 110 | raise XLDateTooLarge(xldate) 111 | 112 | if xldays == 0: 113 | return (0, 0, 0, hour, minute, second) 114 | 115 | if xldays < 61 and datemode == 0: 116 | raise XLDateAmbiguous(xldate) 117 | 118 | jdn = xldays + _JDN_delta[datemode] 119 | yreg = ((((jdn * 4 + 274277) // 146097) * 3 // 4) + jdn + 1363) * 4 + 3 120 | mp = ((yreg % 1461) // 4) * 535 + 333 121 | d = ((mp % 16384) // 535) + 1 122 | # mp /= 16384 123 | mp >>= 14 124 | if mp >= 10: 125 | return ((yreg // 1461) - 4715, mp - 9, d, hour, minute, second) 126 | else: 127 | return ((yreg // 1461) - 4716, mp + 3, d, hour, minute, second) 128 | 129 | 130 | def xldate_as_datetime(xldate, datemode): 131 | """ 132 | Convert an Excel date/time number into a :class:`datetime.datetime` object. 133 | 134 | :param xldate: The Excel number 135 | :param datemode: 0: 1900-based, 1: 1904-based. 136 | 137 | :returns: A :class:`datetime.datetime` object. 138 | """ 139 | 140 | # Set the epoch based on the 1900/1904 datemode. 141 | if datemode: 142 | epoch = epoch_1904 143 | else: 144 | if xldate < 60: 145 | epoch = epoch_1900 146 | else: 147 | # Workaround Excel 1900 leap year bug by adjusting the epoch. 148 | epoch = epoch_1900_minus_1 149 | 150 | # The integer part of the Excel date stores the number of days since 151 | # the epoch and the fractional part stores the percentage of the day. 152 | days = int(xldate) 153 | fraction = xldate - days 154 | 155 | # Get the the integer and decimal seconds in Excel's millisecond resolution. 156 | seconds = int(round(fraction * 86400000.0)) 157 | seconds, milliseconds = divmod(seconds, 1000) 158 | 159 | return epoch + datetime.timedelta(days, seconds, 0, milliseconds) 160 | 161 | 162 | # === conversions from date/time to xl numbers 163 | 164 | def _leap(y): 165 | if y % 4: return 0 166 | if y % 100: return 1 167 | if y % 400: return 0 168 | return 1 169 | 170 | _days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) 171 | 172 | 173 | def xldate_from_date_tuple(date_tuple, datemode): 174 | """ 175 | Convert a date tuple (year, month, day) to an Excel date. 176 | 177 | :param year: Gregorian year. 178 | :param month: ``1 <= month <= 12`` 179 | :param day: ``1 <= day <= last day of that (year, month)`` 180 | :param datemode: 0: 1900-based, 1: 1904-based. 181 | :raises xlrd.xldate.XLDateAmbiguous: 182 | :raises xlrd.xldate.XLDateBadDatemode: 183 | :raises xlrd.xldate.XLDateBadTuple: 184 | ``(year, month, day)`` is too early/late or has invalid component(s) 185 | :raises xlrd.xldate.XLDateError: 186 | """ 187 | year, month, day = date_tuple 188 | 189 | if datemode not in (0, 1): 190 | raise XLDateBadDatemode(datemode) 191 | 192 | if year == 0 and month == 0 and day == 0: 193 | return 0.00 194 | 195 | if not (1900 <= year <= 9999): 196 | raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),)) 197 | if not (1 <= month <= 12): 198 | raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),)) 199 | if (day < 1 or 200 | (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year)))): 201 | raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),)) 202 | 203 | Yp = year + 4716 204 | M = month 205 | if M <= 2: 206 | Yp = Yp - 1 207 | Mp = M + 9 208 | else: 209 | Mp = M - 3 210 | jdn = (1461 * Yp // 4) + ((979 * Mp + 16) // 32) + \ 211 | day - 1364 - (((Yp + 184) // 100) * 3 // 4) 212 | xldays = jdn - _JDN_delta[datemode] 213 | if xldays <= 0: 214 | raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),)) 215 | if xldays < 61 and datemode == 0: 216 | raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),)) 217 | return float(xldays) 218 | 219 | 220 | def xldate_from_time_tuple(time_tuple): 221 | """ 222 | Convert a time tuple ``(hour, minute, second)`` to an Excel "date" value 223 | (fraction of a day). 224 | 225 | :param hour: ``0 <= hour < 24`` 226 | :param minute: ``0 <= minute < 60`` 227 | :param second: ``0 <= second < 60`` 228 | :raises xlrd.xldate.XLDateBadTuple: Out-of-range hour, minute, or second 229 | """ 230 | hour, minute, second = time_tuple 231 | if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60: 232 | return ((second / 60.0 + minute) / 60.0 + hour) / 24.0 233 | raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),)) 234 | 235 | 236 | def xldate_from_datetime_tuple(datetime_tuple, datemode): 237 | """ 238 | Convert a datetime tuple ``(year, month, day, hour, minute, second)`` to an 239 | Excel date value. 240 | For more details, refer to other xldate_from_*_tuple functions. 241 | 242 | :param datetime_tuple: ``(year, month, day, hour, minute, second)`` 243 | :param datemode: 0: 1900-based, 1: 1904-based. 244 | """ 245 | return ( 246 | xldate_from_date_tuple(datetime_tuple[:3], datemode) + 247 | xldate_from_time_tuple(datetime_tuple[3:]) 248 | ) 249 | --------------------------------------------------------------------------------