├── .carthorse.yml
├── .circleci
    └── config.yml
├── .coveragerc
├── .gitignore
├── .readthedocs.yml
├── CHANGELOG.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── acknowledgements.rst
    ├── api.rst
    ├── changes.rst
    ├── conf.py
    ├── dates.rst
    ├── development.rst
    ├── formatting.rst
    ├── index.rst
    ├── licenses.rst
    ├── make.bat
    ├── on_demand.rst
    ├── references.rst
    └── unicode.rst
├── scripts
    └── runxlrd.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── helpers.py
    ├── samples
    │   ├── Formate.xls
    │   ├── biff4_no_format_no_window2.xls
    │   ├── corrupted_error.xls
    │   ├── formula_test_names.xls
    │   ├── formula_test_sjmachin.xls
    │   ├── issue20.xls
    │   ├── namesdemo.xls
    │   ├── picture_in_cell.xls
    │   ├── profiles.xls
    │   ├── ragged.xls
    │   ├── sample.ods
    │   ├── sample.txt
    │   ├── sample.xlsb
    │   ├── sample.xlsx
    │   ├── sample.zip
    │   └── xf_class.xls
    ├── test_biffh.py
    ├── test_cell.py
    ├── test_formats.py
    ├── test_formulas.py
    ├── test_ignore_workbook_corruption_error.py
    ├── test_inspect.py
    ├── test_missing_records.py
    ├── test_open_workbook.py
    ├── test_sheet.py
    ├── test_workbook.py
    ├── test_xldate.py
    └── test_xldate_to_datetime.py
└── xlrd
    ├── __init__.py
    ├── biffh.py
    ├── book.py
    ├── compdoc.py
    ├── formatting.py
    ├── formula.py
    ├── info.py
    ├── sheet.py
    ├── timemachine.py
    └── xldate.py


/.carthorse.yml:
--------------------------------------------------------------------------------
 1 | carthorse:
 2 |   version-from: setup.py
 3 |   tag-format: "{version}"
 4 |   when:
 5 |     - version-not-tagged
 6 |   actions:
 7 |     - run: "sudo pip install -e .[build]"
 8 |     - run: "twine upload -u __token__ -p $PYPI_TOKEN dist/*"
 9 |     - create-tag
10 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | orbs:
 4 |   python: cjw296/python-ci@2.1
 5 | 
 6 | jobs:
 7 |   coverage:
 8 |     docker:
 9 |       - image: circleci/python:3.8
10 |     steps:
11 |       - checkout
12 |       - attach_workspace:
13 |           at: coverage_output
14 |       - run:
15 |           name: "Check coverage"
16 |           command: |
17 |             sudo pip install coverage
18 |             coverage combine coverage_output/
19 |             bash <(curl -s https://codecov.io/bash)
20 | 
21 |   check-package:
22 |     parameters:
23 |       image:
24 |         type: string
25 |     docker:
26 |       - image: << parameters.image >>
27 |     steps:
28 |       - python/check-package:
29 |           package: "xlrd"
30 |           test:
31 |           - run:
32 |               name: "Check Import"
33 |               command: python -c "import xlrd"
34 |           - run:
35 |               name: "Check no XLS in wheel"
36 |               command: "! unzip -l dist/*.whl | egrep '.xlsx?$'"
37 |           - run:
38 |               name: "Check no XLS in source dist"
39 |               command: "! tar tzf dist/*.tar.gz | egrep '.xlsx?$'"
40 | 
41 | common: &common
42 |   jobs:
43 | 
44 |     - python/pip-run-tests:
45 |         matrix:
46 |           parameters:
47 |             image:
48 |               - circleci/python:2.7
49 |               - circleci/python:3.6
50 |               - circleci/python:3.9
51 | 
52 |     - coverage:
53 |         name: coverage
54 |         requires:
55 |           - python/pip-run-tests
56 | 
57 |     - python/pip-docs:
58 |         name: docs
59 |         requires:
60 |           - coverage
61 | 
62 |     - python/pip-setuptools-build-package:
63 |         name: package
64 |         requires:
65 |           - docs
66 |         filters:
67 |           branches:
68 |             only: master
69 | 
70 |     - check-package:
71 |         matrix:
72 |           parameters:
73 |             image:
74 |               - circleci/python:2.7
75 |               - circleci/python:3.9
76 |         requires:
77 |           - package
78 | 
79 |     - python/release:
80 |         name: release
81 |         config: .carthorse.yml
82 |         requires:
83 |           - check-package
84 |         filters:
85 |           branches:
86 |             only: master
87 | 
88 | workflows:
89 |   push:
90 |     <<: *common
91 |   periodic:
92 |     <<: *common
93 |     triggers:
94 |       - schedule:
95 |           cron: "0 0 11 * *"
96 |           filters:
97 |             branches:
98 |               only: master
99 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source = xlrd,scripts,tests
 3 | 
 4 | [report]
 5 | exclude_lines =
 6 |   # the original exclude
 7 |   pragma: no cover
 8 | 
 9 |   # debug stuff
10 |   if DEBUG:
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /build
 2 | /dist
 3 | *.egg-info
 4 | build/
 5 | _build/
 6 | *.pyc
 7 | /.coverage
 8 | /.tox
 9 | /*.xml
10 | /htmlcov
11 | MANIFEST
12 | /bin
13 | .Python
14 | /include
15 | /lib
16 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | python:
 3 |   version: 3.8
 4 |   install:
 5 |     - method: pip
 6 |       path: .
 7 |       extra_requirements:
 8 |         - docs
 9 | sphinx:
10 |   fail_on_warning: true
11 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | Changes
  2 | =======
  3 | 
  4 | 2.0.1 (11 December 2020)
  5 | ------------------------
  6 | 
  7 | - Use the README as the long description on PyPI.
  8 | 
  9 | 2.0.0 (11 December 2020)
 10 | ------------------------
 11 | 
 12 | - Remove support for anything other than ``.xls`` files.
 13 | - Remove support for ``psyco``.
 14 | - Change the default encoding used when no ``CODEPAGE`` record can be found
 15 |   from ``ascii`` to ``iso-8859-1``.
 16 | - Add support for iterating over :class:`~xlrd.book.Book` objects.
 17 | - Add support for item access from :class:`~xlrd.book.Book` objects,
 18 |   where integer indices and string sheet names are supported.
 19 | - Non-unicode spaces are now stripped from the "last author" information.
 20 | - Workbook corruption errors can now be ignored using the
 21 |   ``ignore_workbook_corruption`` option to :class:`~xlrd.open_workbook`.
 22 | - Handle ``WRITEACCESS`` records with invalid trailing characters.
 23 | - Officially support Python 3.8 and 3.9.
 24 | 
 25 | Thanks to the following for their contributions to this release:
 26 | 
 27 | - Jon Dufresne
 28 | - Tore Lundqvist
 29 | - nayyarv
 30 | - Michael Davis
 31 | - skonik
 32 | 
 33 | 1.2.0 (15 December 2018)
 34 | ------------------------
 35 | 
 36 | - Added support for Python 3.7.
 37 | - Added optional support for defusedxml to help mitigate exploits.
 38 | - Automatically convert ``~`` in file paths to the current user's home
 39 |   directory.
 40 | - Removed ``examples`` directory from the installed package. They are still
 41 |   available in the source distribution.
 42 | - Fixed ``time.clock()`` deprecation warning.
 43 | 
 44 | 1.1.0 (22 August 2017)
 45 | ----------------------
 46 | 
 47 | - Fix for parsing of merged cells containing a single cell reference in xlsx
 48 |   files.
 49 | 
 50 | - Fix for "invalid literal for int() with base 10: 'true'" when reading some
 51 |   xlsx files.
 52 | 
 53 | - Make xldate_as_datetime available to import direct from xlrd.
 54 | 
 55 | - Build universal wheels.
 56 | 
 57 | - Sphinx documentation.
 58 | 
 59 | - Document the problem with XML vulnerabilities in xlsx files and mitigation
 60 |   measures.
 61 | 
 62 | - Fix :class:`NameError` on ``has_defaults is not defined``.
 63 | 
 64 | - Some whitespace and code style tweaks.
 65 | 
 66 | - Make example in README compatible with both Python 2 and 3.
 67 | 
 68 | - Add default value for cells containing errors that causeed parsing of some
 69 |   xlsx files to fail.
 70 | 
 71 | - Add Python 3.6 to the list of supported Python versions, drop 3.3 and 2.6.
 72 | 
 73 | - Use generator expressions to avoid unnecessary lists in memory.
 74 | 
 75 | - Document unicode encoding used in Excel files from Excel 97 onwards.
 76 | 
 77 | - Report hyperlink errors in R1C1 syntax.
 78 | 
 79 | Thanks to the following for their contributions to this release:
 80 | 
 81 | - icereval@gmail.com
 82 | - Daniel Rech
 83 | - Ville Skyttä
 84 | - Yegor Yefremov
 85 | - Maxime Lorant
 86 | - Alexandr N Zamaraev
 87 | - Zhaorong Ma
 88 | - Jon Dufresne
 89 | - Chris McIntyre
 90 | - coltleese@gmail.com
 91 | - Ivan Masá
 92 | 
 93 | 1.0.0 (2 June 2016)
 94 | -------------------
 95 | 
 96 | - Official support, such as it is, is now for 2.6, 2.7, 3.3+
 97 | 
 98 | - Fixes a bug in looking up non-lowercase sheet filenames by ensuring that the
 99 |   sheet targets are transformed the same way as the component_names dict keys.
100 | 
101 | - Fixes a bug for ``ragged_rows=False`` when merged cells increases the number
102 |   of columns in the sheet. This requires all rows to be extended to ensure equal
103 |   row lengths that match the number of columns in the sheet.
104 | 
105 | - Fixes to enable reading of SAP-generated .xls files.
106 | 
107 | - support BIFF4 files with missing FORMAT records.
108 | 
109 | - support files with missing WINDOW2 record.
110 | 
111 | - Empty cells are now always unicode strings, they were a bytestring on
112 |   Python 2 and a unicode string on Python 3.
113 | 
114 | - Fix for ``<cell>`` ``inlineStr`` attribute without ``<si>`` child.
115 | 
116 | - Fix for a zoom of ``None`` causing problems on Python 3.
117 | 
118 | - Fix parsing of bad dimensions.
119 | 
120 | - Fix xlsx sheet to comments relationship.
121 | 
122 | Thanks to the following for their contributions to this release:
123 | 
124 | - Lars-Erik Hannelius
125 | - Deshi Xiao
126 | - Stratos Moro
127 | - Volker Diels-Grabsch
128 | - John McNamara
129 | - Ville Skyttä
130 | - Patrick Fuller
131 | - Dragon Dave McKee
132 | - Gunnlaugur Þór Briem
133 | 
134 | 0.9.4 (14 July 2015)
135 | --------------------
136 | 
137 | - Automated tests are now run on Python 3.4
138 | 
139 | - Use ``ElementTree.iter()`` if available, instead of the deprecated
140 |   ``getiterator()`` when parsing xlsx files.
141 | 
142 | - Fix #106 : Exception Value: unorderable types: Name() < Name()
143 | 
144 | - Create row generator expression with Sheet.get_rows()
145 | 
146 | - Fix for forward slash file separator and lowercase names within xlsx
147 |   internals.
148 | 
149 | Thanks to the following for their contributions to this release:
150 | 
151 | - Corey Farwell
152 | - Jonathan Kamens
153 | - Deepak N
154 | - Brandon R. Stoner
155 | - John McNamara
156 | 
157 | 0.9.3 (8 Apr 2014)
158 | ------------------
159 | 
160 | - Github issue #49
161 | 
162 | - Github issue #64 - skip meaningless chunk of 4 zero bytes between two
163 |   otherwise-valid BIFF records
164 | 
165 | - Github issue #61 - fix updating of escapement attribute of Font objects read
166 |   from workbooks.
167 | 
168 | - Implemented ``Sheet.visibility`` for xlsx files
169 | 
170 | - Ignore anchors (``$``) in cell references
171 | 
172 | - Dropped support for Python 2.5 and earlier, Python 2.6 is now the earliest
173 |   Python release supported
174 | 
175 | - Read xlsx merged cell elements.
176 | 
177 | - Read cell comments in .xlsx files.
178 | 
179 | - Added xldate_as_datetime() function to convert from Excel
180 |   serial date/time to datetime.datetime object.
181 | 
182 | Thanks to the following for their contributions to this release:
183 | 
184 | - John Machin
185 | - Caleb Epstein
186 | - Martin Panter
187 | - John McNamara
188 | - Gunnlaugur Þór Briem
189 | - Stephen Lewis
190 | 
191 | 
192 | 0.9.2 (9 Apr 2013)
193 | ------------------
194 | 
195 | - Fix some packaging issues that meant docs and examples were missing from the tarball.
196 | 
197 | - Fixed a small but serious regression that caused problems opening .xlsx files.
198 | 
199 | 0.9.1 (5 Apr 2013)
200 | ------------------
201 | 
202 | - Many fixes bugs in Python 3 support.
203 | - Fix bug where ragged rows needed fixing when formatting info was being parsed.
204 | - Improved handling of aberrant Excel 4.0 Worksheet files.
205 | - Various bug fixes.
206 | - Simplify a lot of the distribution packaging.
207 | - Remove unused and duplicate imports.
208 | 
209 | Thanks to the following for their contributions to this release:
210 | 
211 | - Thomas Kluyver
212 | 
213 | 0.9.0 (31 Jan 2013)
214 | -------------------
215 | 
216 | - Support for Python 3.2+
217 | - Many new unit test added.
218 | - Continuous integration tests are now run.
219 | - Various bug fixes.
220 | 
221 | Special thanks to Thomas Kluyver and Martin Panter for their work on
222 | Python 3 compatibility.
223 | 
224 | Thanks to Manfred Moitzi for re-licensing his unit tests so we could include
225 | them.
226 | 
227 | Thanks to the following for their contributions to this release:
228 | 
229 | - "holm"
230 | - Victor Safronovich
231 | - Ross Jones
232 | 
233 | 0.8.0 (22 Aug 2012)
234 | -------------------
235 | 
236 | - More work-arounds for broken source files.
237 | - Support for reading .xlsx files.
238 | - Drop support for Python 2.5 and older.
239 | 
240 | 0.7.8 (7 June 2012)
241 | -------------------
242 | 
243 | - Ignore superfluous zero bytes at end of xls OBJECT record.
244 | - Fix assertion error when reading file with xlwt-written bitmap.
245 | 
246 | 0.7.7 (13 Apr 2012)
247 | -------------------
248 | 
249 | - More packaging changes, this time to support 2to3.
250 | 
251 | 0.7.6 (3 Apr 2012)
252 | ------------------
253 | 
254 | - Fix more packaging issues.
255 | 
256 | 0.7.5 (3 Apr 2012)
257 | ------------------
258 | - Fix packaging issue that missed ``version.txt`` from the distributions.
259 | 
260 | 0.7.4 (2 Apr 2012)
261 | ------------------
262 | 
263 | - More tolerance of out-of-spec files.
264 | - Fix bugs reading long text formula results.
265 | 
266 | 0.7.3 (28 Feb 2012)
267 | -------------------
268 | 
269 | - Packaging and documentation updates.
270 | 
271 | 0.7.2 (21 Feb 2012)
272 | -------------------
273 | 
274 | - Tolerant handling of files with extra zero bytes at end of NUMBER record.
275 |   Sample provided by Jan Kraus.
276 | - Added access to cell notes/comments. Many cross-references added to Sheet
277 |   class docs.
278 | - Added code to extract hyperlink (HLINK) records. Based on a patch supplied by
279 |   John Morrisey.
280 | - Extraction of rich text formatting info based on code supplied by
281 |   Nathan van Gheem.
282 | - added handling of BIFF2 WINDOW2 record.
283 | - Included modified version of page breaks patch from Sam Listopad.
284 | - Added reading of the PANE record.
285 | - Reading SCL record. New attribute ``Sheet.scl_mag_factor``.
286 | - Lots of bug fixes.
287 | - Added ``ragged_rows`` functionality.
288 | 
289 | 0.7.1 (31 May 2009)
290 | -------------------
291 | 
292 | - Backed out "slash'n'burn" of sheet resources in unload_sheet().
293 |   Fixed problem with STYLE records on some Mac Excel files.
294 | - quieten warnings
295 | - Integrated on_demand patch by Armando Serrano Lombillo
296 | 
297 | 0.7.0 (11 March 2009)
298 | ---------------------
299 | 
300 | + colname utility function now supports more than 256 columns.
301 | + Fix bug where BIFF record type 0x806 was being regarded as a formula
302 |   opcode.
303 | + Ignore PALETTE record when formatting_info is false.
304 | + Tolerate up to 4 bytes trailing junk on PALETTE record.
305 | + Fixed bug in unused utility function xldate_from_date_tuple which
306 |   affected some years after 2099.
307 | + Added code for inspecting as-yet-unused record types: FILEPASS, TXO,
308 |   NOTE.
309 | + Added inspection code for add_in function calls.
310 | + Added support for unnumbered biff_dump (better for doing diffs).
311 | + ignore distutils cruft
312 | + Avoid assertion error in compdoc when -1 used instead of -2 for
313 |   first_SID of empty SCSS
314 | + Make version numbers match up.
315 | + Enhanced recovery from out-of-order/missing/wrong CODEPAGE record.
316 | + Added Name.area2d convenience method.
317 | + Avoided some checking of XF info when formatting_info is false.
318 | + Minor changes in preparation for XLSX support.
319 | + remove duplicate files that were out of date.
320 | + Basic support for Excel 2.0
321 | + Decouple Book init & load.
322 | + runxlrd: minor fix for xfc.
323 | + More Excel 2.x work.
324 | + is_date_format() tweak.
325 | + Better detection of IronPython.
326 | + Better error message (including first 8 bytes of file) when file is
327 |   not in a supported format.
328 | + More BIFF2 formatting: ROW, COLWIDTH, and COLUMNDEFAULT records;
329 | + finished stage 1 of XF records.
330 | + More work on supporting BIFF2 (Excel 2.x) files.
331 | + Added support for Excel 2.x (BIFF2) files. Data only, no formatting
332 |   info. Alpha.
333 | + Wasn't coping with EXTERNSHEET record followed by CONTINUE
334 |   record(s).
335 | + Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
336 | + Avoid crash when zero-length Unicode string missing options byte.
337 | + Warning message if sector sizes are extremely large.
338 | + Work around corrupt STYLE record
339 | + Added missing entry for blank cell type to ctype_text
340 | + Added "fonts" command to runxlrd script
341 | + Warning: style XF whose parent XF index != 0xFFF
342 | + Logfile arg wasn't being passed from open_workbook to
343 |   compdoc.CompDoc.
344 | 
345 | 
346 | 0.6.1  (10 June 2007)
347 | ---------------------
348 | 
349 | + Version number updated to 0.6.1
350 | + Documented runxlrd.py commands in its usage message. Changed
351 |   commands: dump to biff_dump, count_records to biff_count.
352 | 
353 | 
354 | 0.6.1a5
355 | -------
356 | 
357 | + Bug fixed: Missing "<" in a struct.unpack call means can't open
358 |   files on bigendian platforms. Discovered by "Mihalis".
359 | + Removed antique undocumented Book.get_name_dict method and
360 |   experimental "trimming" facility.
361 | + Meaningful exception instead of IndexError if a SAT (sector
362 |   allocation table) is corrupted.
363 | + If no CODEPAGE record in pre-8.0 file, assume ascii and keep going
364 |   (instead of raising exception).
365 | 
366 | 
367 | 0.6.1a4
368 | -------
369 | 
370 | + At least one source of XLS files writes parent style XF records
371 |   *after* the child cell XF records that refer to them, triggering
372 |   IndexError in 0.5.2 and AssertionError in later versions. Reported
373 |   with sample file by Todd O'Bryan. Fixed by changing to two-pass
374 |   processing of XF records.
375 | + Formatting info in pre-BIFF8 files: Ensured appropriate defaults and
376 |   lossless conversions to make the info BIFF8-compatible. Fixed bug in
377 |   extracting the "used" flags.
378 | + Fixed problems discovered with opening test files from Planmaker
379 |   2006 (http://www.softmaker.com/english/ofwcomp_en.htm): (1) Four files
380 |   have reduced size of PALETTE record (51 and 32 colours; Excel writes
381 |   56 always). xlrd now emits a NOTE to the logfile and continues. (2)
382 |   FORMULA records use the Excel 2.x record code 0x0021 instead of
383 |   0x0221. xlrd now continues silently. (3) In two files, at the OLE2
384 |   compound document level, the internal directory says that the length
385 |   of the Short-Stream Container Stream is 16384 bytes, but the actual
386 |   contents are 11264 and 9728 bytes respectively. xlrd now emits a
387 |   WARNING to the logfile and continues.
388 | + After discussion with Daniel Rentz, the concept of two lists of XF
389 |   (eXtended Format) objects (raw_xf_list and computed_xf_list) has been
390 |   abandoned. There is now a single list, called xf_list
391 | 
392 | 
393 | 0.6.1a3
394 | -------
395 | 
396 | + Added Book.sheets ... for sheetx, sheet in enumerate(book.sheets):
397 | + Formatting info: extraction of sheet-level flags from WINDOW2
398 |   record, and sheet.visibility from BOUNDSHEET record. Added Macintosh-
399 |   only Font attributes "outline" and "shadow'.
400 | 
401 | 
402 | 0.6.1a2
403 | -------
404 | 
405 | + Added extraction of merged cells info.
406 | + pyExcelerator uses "general" instead of "General" for the generic
407 |   "number format". Worked around.
408 | + Crystal Reports writes "WORKBOOK" in the OLE2 Compound Document
409 |   directory instead of "Workbook". Changed to case-insensitive directory
410 |   search. Reported by Vic Simkus.
411 | 
412 | 
413 | 0.6.1a1 (18 Dec 2006)
414 | ---------------------
415 | 
416 | + Added formatting information for cells (font, "number format",
417 |   background, border, alignment and protection) and rows/columns
418 |   (height/width etc). To save memory and time for those who don't need
419 |   it, this information is extracted only if formatting_info=1 is
420 |   supplied to the open_workbook() function. The cell records BLANK and
421 |   MULBLANKS which contain no data, only formatting information, will
422 |   continue to be ignored in the default (no formatting info) case.
423 | + Ralph Heimburger reported a problem with xlrd being intolerant about
424 |   an Excel 4.0 file (created by "some web app") with a DIMENSIONS record
425 |   that omitted Microsoft's usual padding with 2 unused bytes. Fixed.
426 | 
427 | 
428 | 0.6.0a4 (not released)
429 | ----------------------
430 | 
431 | + Added extraction of human-readable formulas from NAME records.
432 | + Worked around OOo Calc writing 9-byte BOOLERR records instead of 8.
433 |   Reported by Rory Campbell-Lange.
434 | + This history file converted to descending chronological order and
435 |   HTML format.
436 | 
437 | 
438 | 0.6.0a3 (19 Sept 2006)
439 | ----------------------
440 | 
441 | + Names: minor bugfixes; added script xlrdnameAPIdemo.py
442 | + ROW records were being used as additional hints for sizing memory
443 |   requirements. In some files the ROW records overstate the number of
444 |   used columns, and/or there are ROW records for rows that have no data
445 |   in them. This would cause xlrd to report sheet.ncols and/or
446 |   sheet.nrows as larger than reasonably expected. Change: ROW records
447 |   are ignored. The number of columns/rows is based solely on the highest
448 |   column/row index seen in non-empty data records. Empty data records
449 |   (types BLANK and MULBLANKS) which contain no data, only formatting
450 |   information, have always been ignored, and this will continue.
451 |   Consequence: trailing rows and columns which contain only empty cells
452 |   will vanish.
453 | 
454 | 
455 | 0.6.0a2 (13 Sept 2006)
456 | ----------------------
457 | 
458 | 
459 | + Fixed a bug reported by Rory Campbell-Lange.: "open failed";
460 |   incorrect assumptions about the layout of array formulas which return
461 |   strings.
462 | + Further work on defined names, especially the API.
463 | 
464 | 
465 | 0.6.0a1 (8 Sept 2006)
466 | ---------------------
467 | 
468 | + Sheet objects have two new convenience methods: col_values(colx,
469 |   start_rowx=0, end_rowx=None) and the corresponding col_types.
470 |   Suggested by Dennis O'Brien.
471 | + BIFF 8 file missing its CODEPAGE record: xlrd will now assume
472 |   utf_16_le encoding (the only possibility) and keep going.
473 | + Older files missing a CODEPAGE record: an exception will be raised.
474 |   Thanks to Sergey Krushinsky for a sample file. The open_workbook()
475 |   function has a new argument (encoding_override) which can be used if
476 |   the CODEPAGE record is missing or incorrect (for example,
477 |   codepage=1251 but the data is actually encoded in koi8_r). The
478 |   runxlrd.py script takes a corresponding -e argument, for example -e
479 |   cp1251
480 | + Further work done on parsing "number formats". Thanks to Chris
481 |   Withers for the ``"General_)"`` example.
482 | + Excel 97 introduced the concept of row and column labels, defined by
483 |   Insert > Name > Labels. The ranges containing the labels are now
484 |   exposed as the Sheet attributes row_label_ranges and col_label_ranges.
485 | + The major effort in this 0.6.0 release has been the provision of
486 |   access to named cell ranges and named constants (Excel:
487 |   Insert/Name/Define). Juan C. Mendez provided very useful real-world
488 |   sample files.
489 | 
490 | 
491 | 0.5.3a1 (24 May 2006)
492 | ---------------------
493 | 
494 | + John Popplewell and Richard Sharp provided sample files which caused
495 |   any reliance at all on DIMENSIONS records and ROW records to be
496 |   abandoned.
497 | + If the file size is not a whole number of OLE sectors, a warning
498 |   message is logged. Previously this caused an exception to be raised.
499 | 
500 | 
501 | 0.5.2 (14 March 2006)
502 | ---------------------
503 | 
504 | + public release
505 | + Updated version numbers, README, HISTORY.
506 | 
507 | 
508 | 0.5.2a3 (13 March 2006)
509 | -----------------------
510 | 
511 | + Gnumeric writes user-defined formats with format codes starting at
512 |   50 instead of 164; worked around.
513 | + Thanks to Didrik Pinte for reporting the need for xlrd to be more
514 |   tolerant of the idiosyncracies of other software, for supplying sample
515 |   files, and for performing alpha testing.
516 | + '_' character in a format should be treated like an escape
517 |   character; fixed.
518 | + An "empty" formula result means a zero-length string, not an empty
519 |   cell! Fixed.
520 | 
521 | 
522 | 0.5.2a2 (9 March 2006)
523 | ----------------------
524 | 
525 | + Found that Gnumeric writes all DIMENSIONS records with nrows and
526 |   ncols each 1 less than they should be (except when it clamps ncols at
527 |   256!), and pyXLwriter doesn't write ROW records. Cell memory pre-
528 |   allocation was generalised to use ROW records if available with fall-
529 |   back to DIMENSIONS records.
530 | 
531 | 
532 | 0.5.2a1 (6 March 2006)
533 | ----------------------
534 | 
535 | 
536 | + pyXLwriter writes DIMENSIONS record with antique opcode 0x0000
537 |   instead of 0x0200; worked around
538 | + A file written by Gnumeric had zeroes in DIMENSIONS record but data
539 |   in cell A1; worked around
540 | 
541 | 
542 | 0.5.1 (18 Feb 2006)
543 | --------------------
544 | 
545 | + released to Journyx
546 | + Python 2.1 mmap requires file to be opened for update access. Added
547 |   fall-back to read-only access without mmap if 2.1 open fails because
548 |   "permission denied".
549 | 
550 | 
551 | 0.5 (7 Feb 2006)
552 | ----------------
553 | 
554 | + released to Journyx
555 | + Now works with Python 2.1. Backporting to Python 2.1 was partially
556 |   funded by Journyx - provider of timesheet and project accounting
557 |   solutions (http://journyx.com/)
558 | + open_workbook() can be given the contents of a file instead of its
559 |   name. Thanks to Remco Boerma for the suggestion.
560 | + New module attribute __VERSION__ (as a string; for example "0.5")
561 | + Minor enhancements to classification of formats as date or not-date.
562 | + Added warnings about files with inconsistent OLE compound document
563 |   structures. Thanks to Roman V. Kiseliov (author of pyExcelerator) for
564 |   the tip-off.
565 | 
566 | 
567 | 0.4a1, (7 Sept 2005)
568 | --------------------
569 | 
570 | + released to Laurent T.
571 | + Book and sheet objects can now be pickled and unpickled. Instead of
572 |   reading a large spreadsheet multiple times, consider pickling it once
573 |   and loading the saved pickle; can be much faster. Thanks to Laurent
574 |   Thioudellet for the enhancement request.
575 | + Using the mmap module can be turned off. But you would only do that
576 |   for benchmarking purposes.
577 | + Handling NUMBER records has been made faster
578 | 
579 | 
580 | 0.3a1 (15 May 2005)
581 | -------------------
582 | 
583 | - first public release
584 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | There are two licenses associated with xlrd. This one relates to the bulk of
 2 | the work done on the library::
 3 | 
 4 |     Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd
 5 |     All rights reserved.
 6 | 
 7 |     Redistribution and use in source and binary forms, with or without
 8 |     modification, are permitted provided that the following conditions are met:
 9 | 
10 |     1. Redistributions of source code must retain the above copyright notice,
11 |     this list of conditions and the following disclaimer.
12 | 
13 |     2. Redistributions in binary form must reproduce the above copyright notice,
14 |     this list of conditions and the following disclaimer in the documentation
15 |     and/or other materials provided with the distribution.
16 | 
17 |     3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
18 |     contributors may be used to endorse or promote products derived from this
19 |     software without specific prior written permission.
20 | 
21 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 |     THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 |     PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
25 |     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31 |     THE POSSIBILITY OF SUCH DAMAGE.
32 | 
33 | This one covers some earlier work::
34 | 
35 |     /*-
36 |      * Copyright (c) 2001 David Giffin.
37 |      * All rights reserved.
38 |      *
39 |      * Based on the the Java version: Andrew Khan Copyright (c) 2000.
40 |      *
41 |      *
42 |      * Redistribution and use in source and binary forms, with or without
43 |      * modification, are permitted provided that the following conditions
44 |      * are met:
45 |      *
46 |      * 1. Redistributions of source code must retain the above copyright
47 |      *    notice, this list of conditions and the following disclaimer.
48 |      *
49 |      * 2. Redistributions in binary form must reproduce the above copyright
50 |      *    notice, this list of conditions and the following disclaimer in
51 |      *    the documentation and/or other materials provided with the
52 |      *    distribution.
53 |      *
54 |      * 3. All advertising materials mentioning features or use of this
55 |      *    software must display the following acknowledgment:
56 |      *    "This product includes software developed by
57 |      *     David Giffin <david@giffin.org>."
58 |      *
59 |      * 4. Redistributions of any form whatsoever must retain the following
60 |      *    acknowledgment:
61 |      *    "This product includes software developed by
62 |      *     David Giffin <david@giffin.org>."
63 |      *
64 |      * THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY
65 |      * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66 |      * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
67 |      * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DAVID GIFFIN OR
68 |      * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
69 |      * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
70 |      * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
71 |      * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 |      * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
73 |      * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 |      * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
75 |      * OF THE POSSIBILITY OF SUCH DAMAGE.
76 |      */
77 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.rst
2 | include LICENSE
3 | include README.md
4 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | xlrd
 2 | ====
 3 | 
 4 | |Build Status|_ |Coverage Status|_ |Documentation|_ |PyPI version|_
 5 | 
 6 | .. |Build Status| image:: https://circleci.com/gh/python-excel/xlrd/tree/master.svg?style=shield
 7 | .. _Build Status: https://circleci.com/gh/python-excel/xlrd/tree/master
 8 | 
 9 | .. |Coverage Status| image:: https://codecov.io/gh/python-excel/xlrd/branch/master/graph/badge.svg?token=lNSqwBBbvk
10 | .. _Coverage Status: https://codecov.io/gh/python-excel/xlrd
11 | 
12 | .. |Documentation| image:: https://readthedocs.org/projects/xlrd/badge/?version=latest
13 | .. _Documentation: http://xlrd.readthedocs.io/en/latest/?badge=latest
14 | 
15 | .. |PyPI version| image:: https://badge.fury.io/py/xlrd.svg
16 | .. _PyPI version: https://badge.fury.io/py/xlrd
17 | 
18 | 
19 | xlrd is a library for reading data and formatting information from Excel
20 | files in the historical ``.xls`` format.
21 | 
22 | .. warning::
23 | 
24 |   This library will no longer read anything other than ``.xls`` files. For
25 |   alternatives that read newer file formats, please see http://www.python-excel.org/.
26 | 
27 | The following are also not supported but will safely and reliably be ignored:
28 | 
29 | *   Charts, Macros, Pictures, any other embedded object, **including** embedded worksheets.
30 | *   VBA modules
31 | *   Formulas, but results of formula calculations are extracted.
32 | *   Comments
33 | *   Hyperlinks
34 | *   Autofilters, advanced filters, pivot tables, conditional formatting, data validation
35 | 
36 | Password-protected files are not supported and cannot be read by this library.
37 | 
38 | Quick start:
39 | 
40 | .. code-block:: bash
41 | 
42 |     pip install xlrd
43 |     
44 | .. code-block:: python
45 | 
46 |     import xlrd
47 |     book = xlrd.open_workbook("myfile.xls")
48 |     print("The number of worksheets is {0}".format(book.nsheets))
49 |     print("Worksheet name(s): {0}".format(book.sheet_names()))
50 |     sh = book.sheet_by_index(0)
51 |     print("{0} {1} {2}".format(sh.name, sh.nrows, sh.ncols))
52 |     print("Cell D30 is {0}".format(sh.cell_value(rowx=29, colx=3)))
53 |     for rx in range(sh.nrows):
54 |         print(sh.row(rx))
55 | 
56 | From the command line, this will show the first, second and last rows of each sheet in each file:
57 | 
58 | .. code-block:: bash
59 | 
60 |     python PYDIR/scripts/runxlrd.py 3rows *blah*.xls
61 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | epub:
 76 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 77 | 	@echo
 78 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 79 | 
 80 | latex:
 81 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 82 | 	@echo
 83 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 84 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
 85 | 	      "(use \`make latexpdf' here to do that automatically)."
 86 | 
 87 | latexpdf:
 88 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 89 | 	@echo "Running LaTeX files through pdflatex..."
 90 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
 91 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 92 | 
 93 | text:
 94 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
 95 | 	@echo
 96 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
 97 | 
 98 | man:
 99 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
100 | 	@echo
101 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
102 | 
103 | texinfo:
104 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
105 | 	@echo
106 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
107 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
108 | 	      "(use \`make info' here to do that automatically)."
109 | 
110 | info:
111 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
112 | 	@echo "Running Texinfo files through makeinfo..."
113 | 	make -C $(BUILDDIR)/texinfo info
114 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
115 | 
116 | gettext:
117 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
118 | 	@echo
119 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
120 | 
121 | changes:
122 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
123 | 	@echo
124 | 	@echo "The overview file is in $(BUILDDIR)/changes."
125 | 
126 | linkcheck:
127 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
128 | 	@echo
129 | 	@echo "Link check complete; look for any errors in the above output " \
130 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
131 | 
132 | doctest:
133 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
134 | 	@echo "Testing of doctests in the sources finished, look at the " \
135 | 	      "results in $(BUILDDIR)/doctest/output.txt."
136 | 


--------------------------------------------------------------------------------
/docs/acknowledgements.rst:
--------------------------------------------------------------------------------
 1 | Acknowledgements
 2 | ================
 3 | 
 4 | Many thanks to to John Machin for originally writing :mod:`xlrd` and tirelessly
 5 | supporting it for many years before retiring.
 6 | 
 7 | *   This package started life as a translation from C into Python of parts of a utility called "xlreader" developed by David Giffin. "This product includes software developed by David Giffin <david@giffin.org>."
 8 | *   OpenOffice.org has truly excellent documentation of the Microsoft Excel file formats and Compound Document file format, authored by Daniel Rentz. See http://sc.openoffice.org
 9 | *   U+5F20 U+654F: over a decade of inspiration, support, and interesting decoding opportunities.
10 | *   Ksenia Marasanova: sample Macintosh and non-Latin1 files, alpha testing
11 | *   Backporting to Python 2.1 was partially funded by Journyx - provider of timesheet and project accounting solutions (http://journyx.com/).
12 | *   Provision of formatting information in version 0.6.1 was funded by `Simplistix Ltd`__.
13 | 
14 | __ http://www.simplistix.co.uk
15 | 
16 | Development of this package would not have been possible without the document
17 | OpenOffice.org's Documentation of the Microsoft Excel File Format"
18 | ("OOo docs" for short).
19 | The latest version is available from OpenOffice.org in
20 | `PDF format`__ and `ODT format`__.
21 | Small portions of the OOo docs are reproduced in this
22 | document. A study of the OOo docs is recommended for those who wish a
23 | deeper understanding of the Excel file layout than the xlrd docs can provide.
24 | 
25 | __ http://sc.openoffice.org/excelfileformat.pdf
26 | 
27 | __ http://sc.openoffice.org/excelfileformat.odt
28 | 
29 | Backporting to Python 2.1 was partially funded by
30 | `Journyx - provider of timesheet and project accounting solutions`__.
31 | 
32 | __ http://journyx.com/
33 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | xlrd
 5 | ----
 6 | 
 7 | .. automodule:: xlrd
 8 |    :members:
 9 | 
10 | xlrd.biffh
11 | ----------
12 | 
13 | .. automodule:: xlrd.biffh
14 |    :members:
15 | 
16 | xlrd.book
17 | ---------
18 | 
19 | .. automodule:: xlrd.book
20 |    :members:
21 | 
22 | xlrd.compdoc
23 | ------------
24 | 
25 | .. automodule:: xlrd.compdoc
26 |    :members:
27 | 
28 | xlrd.formatting
29 | ---------------
30 | 
31 | .. automodule:: xlrd.formatting
32 |    :members:
33 | 
34 | xlrd.formula
35 | -------------
36 | 
37 | .. automodule:: xlrd.formula
38 |    :members:
39 | 
40 | xlrd.sheet
41 | ----------
42 | 
43 | .. currentmodule:: xlrd.sheet
44 | 
45 | .. autoclass:: xlrd.sheet.Sheet
46 |    :members:
47 |    :exclude-members: gcw, col
48 | 
49 |    .. method:: col(colx)
50 | 
51 |      Returns a sequence of the :class:`Cell` objects in the given column.
52 | 
53 |    .. autoattribute:: xlrd.sheet.Sheet.gcw
54 |       :annotation:
55 | 
56 | 
57 | .. automodule:: xlrd.sheet
58 |    :members:
59 |    :exclude-members: Sheet
60 | 
61 | xlrd.xldate
62 | -----------
63 | 
64 | .. currentmodule:: xlrd.xldate
65 | 
66 | .. automodule:: xlrd.xldate
67 |    :members:
68 | 


--------------------------------------------------------------------------------
/docs/changes.rst:
--------------------------------------------------------------------------------
1 | 
2 | .. currentmodule:: xlrd
3 | 
4 | .. include:: ../CHANGELOG.rst
5 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | from xlrd.info import __VERSION__
 5 | 
 6 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
 7 | 
 8 | intersphinx_mapping = {'http://docs.python.org': None}
 9 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx']
10 | source_suffix = '.rst'
11 | master_doc = 'index'
12 | project = u'xlrd'
13 | copyright = (
14 |     '2005-2019 Stephen John Machin, Lingfo Pty Ltd. '
15 |     '2019-%s Chris Withers'
16 | ) % datetime.datetime.now().year
17 | version = release = __VERSION__
18 | exclude_patterns = ['_build']
19 | pygments_style = 'sphinx'
20 | 
21 | if on_rtd:
22 |     html_theme = 'default'
23 | else:
24 |     html_theme = 'classic'
25 | 
26 | htmlhelp_basename = project+'doc'
27 | intersphinx_mapping = {'python': ('http://docs.python.org', None)}
28 | 
29 | autodoc_member_order = 'bysource'
30 | 


--------------------------------------------------------------------------------
/docs/dates.rst:
--------------------------------------------------------------------------------
 1 | Dates in Excel spreadsheets
 2 | ===========================
 3 | 
 4 | .. currentmodule:: xlrd.xldate
 5 | 
 6 | In reality, there are no such things. What you have are floating point
 7 | numbers and pious hope.
 8 | There are several problems with Excel dates:
 9 | 
10 | 1. Dates are not stored as a separate data type; they are stored as
11 |    floating point numbers and you have to rely on:
12 | 
13 |    - the "number format" applied to them in Excel and/or
14 |    - knowing which cells are supposed to have dates in them.
15 | 
16 |    This module helps with the former by inspecting the
17 |    format that has been applied to each number cell;
18 |    if it appears to be a date format, the cell
19 |    is classified as a date rather than a number.
20 | 
21 |    Feedback on this feature, especially from non-English-speaking locales,
22 |    would be appreciated.
23 | 
24 | 2. Excel for Windows stores dates by default as the number of
25 |    days (or fraction thereof) since ``1899-12-31T00:00:00``. Excel for
26 |    Macintosh uses a default start date of ``1904-01-01T00:00:00``.
27 | 
28 |    The date system can be changed in Excel on a per-workbook basis (for example:
29 |    Tools -> Options -> Calculation, tick the "1904 date system" box).
30 |    This is of course a bad idea if there are already dates in the
31 |    workbook. There is no good reason to change it even if there are no
32 |    dates in the workbook.
33 | 
34 |    Which date system is in use is recorded in the
35 |    workbook. A workbook transported from Windows to Macintosh (or vice
36 |    versa) will work correctly with the host Excel.
37 | 
38 |    When using this package's :func:`xldate_as_tuple` function to convert numbers
39 |    from a workbook, you must use the :attr:`~xlrd.Book.datemode` attribute of
40 |    the :class:`~xlrd.Book` object. If you guess, or make a judgement depending
41 |    on where you believe the workbook was created, you run the risk of being 1462
42 |    days out of kilter.
43 | 
44 |    Reference:
45 |    https://support.microsoft.com/en-us/help/180162/xl-the-1900-date-system-vs.-the-1904-date-system
46 | 
47 | 
48 | 3. The Excel implementation of the Windows-default 1900-based date system
49 |    works on the incorrect premise that 1900 was a leap year. It interprets the
50 |    number 60 as meaning ``1900-02-29``, which is not a valid date.
51 | 
52 |    Consequently, any number less than 61 is ambiguous. For example, is 59 the
53 |    result of ``1900-02-28`` entered directly, or is it ``1900-03-01`` minus 2
54 |    days?
55 | 
56 |    The OpenOffice.org Calc program "corrects" the Microsoft problem;
57 |    entering ``1900-02-27`` causes the number 59 to be stored.
58 |    Save as an XLS file, then open the file with Excel and you'll see
59 |    ``1900-02-28`` displayed.
60 | 
61 |    Reference: https://support.microsoft.com/en-us/help/214326/excel-incorrectly-assumes-that-the-year-1900-is-a-leap-year
62 | 
63 | 4. The Macintosh-default 1904-based date system counts ``1904-01-02`` as day 1
64 |    and ``1904-01-01`` as day zero. Thus any number such that
65 |    ``(0.0 <= number < 1.0)`` is ambiguous. Is 0.625 a time of day
66 |    (``15:00:00``), independent of the calendar, or should it be interpreted as
67 |    an instant on a particular day (``1904-01-01T15:00:00``)?
68 | 
69 |    The functions in :mod:`~xlrd.xldate` take the view that such a number is a
70 |    calendar-independent time of day (like Python's :class:`datetime.time` type)
71 |    for both date systems. This is consistent with more recent Microsoft
72 |    documentation. For example, the help file for Excel 2002, which says that the
73 |    first day in the 1904 date system is ``1904-01-02``.
74 | 
75 | 5. Usage of the Excel ``DATE()`` function may leave strange dates in a
76 |    spreadsheet. Quoting the help file in respect of the 1900 date system::
77 | 
78 |      If year is between 0 (zero) and 1899 (inclusive),
79 |      Excel adds that value to 1900 to calculate the year.
80 |      For example, DATE(108,1,2) returns January 2, 2008 (1900+108).
81 | 
82 |    This gimmick, semi-defensible only for arguments up to 99 and only in the
83 |    pre-Y2K-awareness era, means that ``DATE(1899, 12, 31)`` is interpreted as
84 |    ``3799-12-31``.
85 | 
86 |    For further information, please refer to the documentation for the
87 |    functions in :mod:`~xlrd.xldate`.
88 | 


--------------------------------------------------------------------------------
/docs/development.rst:
--------------------------------------------------------------------------------
 1 | Development
 2 | ===========
 3 | 
 4 | .. highlight:: bash
 5 | 
 6 | If you wish to contribute to this project, then you should fork the
 7 | repository found here:
 8 | 
 9 | https://github.com/python-excel/xlrd
10 | 
11 | Once that has been done and you have a checkout, you can follow these
12 | instructions to perform various development tasks:
13 | 
14 | Setting up a virtualenv
15 | -----------------------
16 | 
17 | The recommended way to set up a development environment is to turn
18 | your checkout into a virtualenv and then install the package in
19 | editable form as follows::
20 | 
21 |   $ virtualenv .
22 |   $ bin/pip install -e .[test]
23 | 
24 | Running the tests
25 | -----------------
26 | 
27 | Once you've set up a virtualenv, the tests can be run as follows::
28 | 
29 |   $ source bin/activate
30 |   $ pytest
31 | 
32 | Building the documentation
33 | --------------------------
34 | 
35 | The Sphinx documentation is built by doing the following, having activated
36 | the virtualenv above, from the directory containing setup.py::
37 | 
38 |   $ source bin/activate
39 |   $ cd docs
40 |   $ make html
41 | 
42 | To check that the description that will be used on PyPI renders properly,
43 | do the following::
44 | 
45 |   $ python setup.py --long-description | rst2html.py > desc.html
46 | 
47 | Making a release
48 | ----------------
49 | 
50 | To make a release, just update the version in ``xlrd.info.__VERSION__``, update the change log
51 | and push to https://github.com/python-excel/xlrd
52 | and Carthorse should take care of the rest.
53 | 


--------------------------------------------------------------------------------
/docs/formatting.rst:
--------------------------------------------------------------------------------
  1 | Formatting information in Excel Spreadsheets
  2 | ============================================
  3 | 
  4 | Introduction
  5 | ------------
  6 | 
  7 | This collection of features, new in xlrd version 0.6.1, is intended
  8 | to provide the information needed to:
  9 | 
 10 | - display/render spreadsheet contents (say) on a screen or in a PDF file
 11 | - copy spreadsheet data to another file without losing the ability to
 12 |   display/render it.
 13 | 
 14 | .. _palette:
 15 | 
 16 | The Palette; Colour Indexes
 17 | ---------------------------
 18 | 
 19 | A colour is represented in Excel as a ``(red, green, blue)`` ("RGB") tuple
 20 | with each component in ``range(256)``. However it is not possible to access an
 21 | unlimited number of colours; each spreadsheet is limited to a palette of 64
 22 | different colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0).
 23 | Colours are referenced by an index ("colour index") into this palette.
 24 | 
 25 | Colour indexes 0 to 7 represent 8 fixed built-in colours:
 26 | black, white, red, green, blue, yellow, magenta, and cyan.
 27 | 
 28 | The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
 29 | can be changed by the user. In the Excel 2003 UI,
 30 | Tools -> Options -> Color presents a palette
 31 | of 7 rows of 8 colours. The last two rows are reserved for use in charts.
 32 | 
 33 | The correspondence between this grid and the assigned
 34 | colour indexes is NOT left-to-right top-to-bottom.
 35 | 
 36 | Indexes 8 to 15 correspond to changeable
 37 | parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
 38 | index 15 starts off being cyan but can be changed by the user.
 39 | 
 40 | The default colour for each index depends on the file version; tables of the
 41 | defaults are available in the source code. If the user changes one or more
 42 | colours, a ``PALETTE`` record appears in the XLS file -- it gives the RGB values
 43 | for *all* changeable
 44 | indexes.
 45 | 
 46 | Note that colours can be used in "number formats": ``[CYAN]....`` and
 47 | ``[COLOR8]....`` refer to colour index 7; ``[COLOR16]....`` will produce cyan
 48 | unless the user changes colour index 15 to something else.
 49 | 
 50 | In addition, there are several "magic" colour indexes used by Excel:
 51 | 
 52 | ``0x18`` (BIFF3-BIFF4), ``0x40`` (BIFF5-BIFF8):
 53 |   System window text colour for border lines (used in ``XF``, ``CF``, and
 54 |   ``WINDOW2`` records)
 55 | 
 56 | ``0x19`` (BIFF3-BIFF4), ``0x41`` (BIFF5-BIFF8):
 57 |   System window background colour for pattern background (used in ``XF`` and
 58 |   ``CF`` records )
 59 | 
 60 | ``0x43``:
 61 |   System face colour (dialogue background colour)
 62 | 
 63 | ``0x4D``:
 64 |   System window text colour for chart border lines
 65 | 
 66 | ``0x4E``:
 67 |   System window background colour for chart areas
 68 | 
 69 | ``0x4F``:
 70 |   Automatic colour for chart border lines (seems to be always Black)
 71 | 
 72 | ``0x50``:
 73 |   System ToolTip background colour (used in note objects)
 74 | 
 75 | ``0x51``:
 76 |   System ToolTip text colour (used in note objects)
 77 | 
 78 | ``0x7FFF``:
 79 |   System window text colour for fonts (used in ``FONT`` and ``CF`` records).
 80 | 
 81 |   .. note::
 82 |     ``0x7FFF`` appears to be the *default* colour index.
 83 |     It appears quite often in ``FONT`` records.
 84 | 
 85 | Default Formatting
 86 | ------------------
 87 | 
 88 | Default formatting is applied to all empty cells (those not described by a cell
 89 | record):
 90 | 
 91 | - Firstly, row default information (``ROW`` record, :class:`~xlrd.sheet.Rowinfo`
 92 |   class) is used if available.
 93 | 
 94 | - Failing that, column default information (``COLINFO`` record,
 95 |   :class:`~xlrd.sheet.Colinfo` class) is used if available.
 96 | 
 97 | - As a last resort the worksheet/workbook default cell format will be used; this
 98 |   should always be present in an Excel file,
 99 |   described by the ``XF`` record with the fixed index 15 (0-based).
100 |   By default, it uses the worksheet/workbook default cell style,
101 |   described by the very first ``XF`` record (index 0).
102 | 
103 | Formatting features not included in xlrd
104 | ----------------------------------------
105 | 
106 | - Asian phonetic text (known as "ruby"), used for Japanese furigana.
107 |   See OOo docs s3.4.2 (p15)
108 | 
109 | - Conditional formatting. See OOo docs s5.12, s6.21 (CONDFMT record), s6.16
110 |   (CF record)
111 | 
112 | - Miscellaneous sheet-level and book-level items, e.g. printing layout,
113 |   screen panes.
114 | 
115 | - Modern Excel file versions don't keep most of the built-in
116 |   "number formats" in the file; Excel loads formats according to the
117 |   user's locale. Currently, xlrd's emulation of this is limited to
118 |   a hard-wired table that applies to the US English locale. This may mean
119 |   that currency symbols, date order, thousands separator, decimals separator,
120 |   etc are inappropriate.
121 | 
122 |   .. note::
123 |     This does not affect users who are copying XLS
124 |     files, only those who are visually rendering cells.
125 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ../README.rst
 2 | 
 3 | You may also wish to consult the `tutorial`__.
 4 | 
 5 | __ https://github.com/python-excel/tutorial
 6 | 
 7 | Details:
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    unicode.rst
13 |    dates.rst
14 |    references.rst
15 |    formatting.rst
16 |    on_demand.rst
17 |    api.rst
18 | 
19 | For details of how to get involved in development of this package,
20 | and other meta-information, please see the sections below:
21 | 
22 | .. toctree::
23 |    :maxdepth: 1
24 | 
25 |    development.rst
26 |    changes.rst
27 |    acknowledgements.rst
28 |    licenses.rst
29 | 
30 | Indices and tables
31 | ==================
32 | 
33 | * :ref:`genindex`
34 | * :ref:`modindex`
35 | * :ref:`search`
36 | 


--------------------------------------------------------------------------------
/docs/licenses.rst:
--------------------------------------------------------------------------------
1 | Licenses
2 | ========
3 | 
4 | .. literalinclude:: ../LICENSE
5 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "devhelp" (
 97 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished.
101 | 	goto end
102 | )
103 | 
104 | if "%1" == "epub" (
105 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
106 | 	if errorlevel 1 exit /b 1
107 | 	echo.
108 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "latex" (
113 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
117 | 	goto end
118 | )
119 | 
120 | if "%1" == "text" (
121 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
122 | 	if errorlevel 1 exit /b 1
123 | 	echo.
124 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
125 | 	goto end
126 | )
127 | 
128 | if "%1" == "man" (
129 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
130 | 	if errorlevel 1 exit /b 1
131 | 	echo.
132 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "texinfo" (
137 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "gettext" (
145 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "changes" (
153 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.The overview file is in %BUILDDIR%/changes.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "linkcheck" (
161 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
162 | 	if errorlevel 1 exit /b 1
163 | 	echo.
164 | 	echo.Link check complete; look for any errors in the above output ^
165 | or in %BUILDDIR%/linkcheck/output.txt.
166 | 	goto end
167 | )
168 | 
169 | if "%1" == "doctest" (
170 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
171 | 	if errorlevel 1 exit /b 1
172 | 	echo.
173 | 	echo.Testing of doctests in the sources finished, look at the ^
174 | results in %BUILDDIR%/doctest/output.txt.
175 | 	goto end
176 | )
177 | 
178 | :end
179 | 


--------------------------------------------------------------------------------
/docs/on_demand.rst:
--------------------------------------------------------------------------------
 1 | Loading worksheets on demand
 2 | =============================
 3 | 
 4 | .. currentmodule:: xlrd.book
 5 | 
 6 | This feature, new in version 0.7.1, is governed by the ``on_demand`` argument
 7 | to the :func:`~xlrd.open_workbook` function and allows saving memory and time by
 8 | loading only those sheets that the caller is interested in, and releasing sheets
 9 | when no longer required.
10 | 
11 | ``on_demand=False`` (default):
12 |   No change. :func:`~xlrd.open_workbook` loads global data
13 |   and all sheets, releases resources no longer required (principally the
14 |   :class:`str` or :class:`mmap.mmap` object containing the Workbook stream),
15 |   and returns.
16 | 
17 | ``on_demand=True`` and BIFF version < 5.0:
18 |   A warning message is emitted,
19 |   ``on_demand`` is recorded as ``False``, and the old process is followed.
20 | 
21 | ``on_demand=True`` and BIFF version >= 5.0:
22 |   :func:`~xlrd.open_workbook` loads global
23 |   data and returns without releasing resources. At this stage, the only
24 |   information available about sheets is :attr:`Book.nsheets` and
25 |   :meth:`Book.sheet_names`.
26 | 
27 | :meth:`Book.sheet_by_name` and :meth:`Book.sheet_by_index` will load the
28 | requested sheet if it is not already loaded.
29 | 
30 | :meth:`Book.sheets` will load all unloaded sheets.
31 | 
32 | The caller may save memory by calling
33 | :meth:`Book.unload_sheet` when finished with the sheet.
34 | This applies irrespective of the state of ``on_demand``.
35 | 
36 | The caller may re-load an unloaded sheet by calling :meth:`Book.sheet_by_name`
37 | or :meth:`Book.sheet_by_index`, except if the required resources have been
38 | released (which will
39 | have happened automatically when ``on_demand`` is false). This is the only
40 | case where an exception will be raised.
41 | 
42 | The caller may query the state of a sheet using :meth:`Book.sheet_loaded`.
43 | 
44 | :meth:`Book.release_resources` may used to save memory and close
45 | any memory-mapped file before proceeding to examine already-loaded
46 | sheets. Once resources are released, no further sheets can be loaded.
47 | 
48 | When using on-demand, it is advisable to ensure that
49 | :meth:`Book.release_resources` is always called, even if an exception
50 | is raised in your own code; otherwise if the input file has been
51 | memory-mapped, the :class:`mmap.mmap` object will not be closed and you will
52 | not be able to access the physical file until your Python process
53 | terminates. This can be done by calling :meth:`Book.release_resources`
54 | explicitly in the finally part of a try/finally block.
55 | 
56 | The Book object is also a context manager, so  you can wrap your code in a
57 | ``with`` statement that will make sure underlying resources are closed.
58 | 


--------------------------------------------------------------------------------
/docs/references.rst:
--------------------------------------------------------------------------------
 1 | Named references, constants, formulas, and macros
 2 | =================================================
 3 | 
 4 | .. currentmodule:: xlrd.book
 5 | 
 6 | 
 7 | A name is used to refer to a cell, a group of cells, a constant
 8 | value, a formula, or a macro. Usually the scope of a name is global
 9 | across the whole workbook. However it can be local to a worksheet.
10 | For example, if the sales figures are in different cells in
11 | different sheets, the user may define the name "Sales" in each
12 | sheet. There are built-in names, like "Print_Area" and
13 | "Print_Titles"; these two are naturally local to a sheet.
14 | 
15 | To inspect the names with a user interface like MS Excel, OOo Calc,
16 | or Gnumeric, click on Insert -> Names -> Define. This will show the global
17 | names, plus those local to the currently selected sheet.
18 | 
19 | A :class:`Book` object provides two dictionaries (:attr:`Book.name_map` and
20 | :attr:`Book.name_and_scope_map`) and a list (:attr:`Book.name_obj_list`) which
21 | allow various ways of accessing the :class:`Name` objects.
22 | There is one :class:`Name` object for each `NAME` record found in the workbook.
23 | :class:`Name` objects have many attributes, several of which are relevant only
24 | when ``obj.macro`` is ``1``.
25 | 
26 | In the examples directory you will find ``namesdemo.xls`` which
27 | showcases the many different ways that names can be used, and
28 | ``xlrdnamesAPIdemo.py`` which offers 3 different queries for inspecting
29 | the names in your files, and shows how to extract whatever a name is
30 | referring to. There is currently one "convenience method",
31 | :meth:`Name.cell`, which extracts the value in the case where the name
32 | refers to a single cell. The source code for :meth:`Name.cell` is an extra
33 | source of information on how the :class:`Name` attributes hang together.
34 | 
35 | .. note::
36 | 
37 |   Name information is *not* extracted from files older than
38 |   Excel 5.0 (``Book.biff_version < 50``).
39 | 


--------------------------------------------------------------------------------
/docs/unicode.rst:
--------------------------------------------------------------------------------
 1 | Handling of Unicode
 2 | ===================
 3 | 
 4 | This package presents all text strings as Python unicode objects.
 5 | From Excel 97 onwards, text in Excel spreadsheets has been stored as `UTF-16LE
 6 | <http://unicode.org/faq/utf_bom.html />`_
 7 | (a 16-bit Unicode Transformation Format).
 8 | Older files (Excel 95 and earlier) don't keep strings in Unicode;
 9 | a ``CODEPAGE`` record provides a codepage number (for example, 1252) which is
10 | used by xlrd to derive the encoding (for same example: "cp1252") which is
11 | used to translate to Unicode.
12 | 
13 | If the ``CODEPAGE`` record is missing (possible if the file was created
14 | by third-party software), ``xlrd`` will assume that the encoding is ascii,
15 | and keep going. If the actual encoding is not ascii, a
16 | :class:`UnicodeDecodeError` exception will be raised and
17 | you will need to determine the encoding yourself, and tell xlrd:
18 | 
19 | .. code-block:: python
20 | 
21 |   book = xlrd.open_workbook(..., encoding_override="cp1252")
22 | 
23 | If the ``CODEPAGE`` record exists but is wrong (for example, the codepage
24 | number is 1251, but the strings are actually encoded in koi8_r),
25 | it can be overridden using the same mechanism.
26 | 
27 | The supplied ``runxlrd.py`` has a corresponding command-line argument, which
28 | may be used for experimentation:
29 | 
30 | .. code-block:: bash
31 | 
32 |     runxlrd.py -e koi8_r 3rows myfile.xls
33 | 
34 | The first place to look for an encoding, the "codec name", is
35 | `the Python documentation`__.
36 | 
37 | __ https://docs.python.org/library/codecs.html#standard-encodings
38 | 


--------------------------------------------------------------------------------
/scripts/runxlrd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
  3 | # This script is part of the xlrd package, which is released under a
  4 | # BSD-style licence.
  5 | 
  6 | from __future__ import print_function
  7 | 
  8 | cmd_doc = """
  9 | Commands:
 10 | 
 11 | 2rows           Print the contents of first and last row in each sheet
 12 | 3rows           Print the contents of first, second and last row in each sheet
 13 | bench           Same as "show", but doesn't print -- for profiling
 14 | biff_count[1]   Print a count of each type of BIFF record in the file
 15 | biff_dump[1]    Print a dump (char and hex) of the BIFF records in the file
 16 | fonts           hdr + print a dump of all font objects
 17 | hdr             Mini-overview of file (no per-sheet information)
 18 | hotshot         Do a hotshot profile run e.g. ... -f1 hotshot bench bigfile*.xls
 19 | labels          Dump of sheet.col_label_ranges and ...row... for each sheet
 20 | name_dump       Dump of each object in book.name_obj_list
 21 | names           Print brief information for each NAME record
 22 | ov              Overview of file
 23 | profile         Like "hotshot", but uses cProfile
 24 | show            Print the contents of all rows in each sheet
 25 | version[0]      Print versions of xlrd and Python and exit
 26 | xfc             Print "XF counts" and cell-type counts -- see code for details
 27 | 
 28 | [0] means no file arg
 29 | [1] means only one file arg i.e. no glob.glob pattern
 30 | """
 31 | 
 32 | options = None
 33 | if __name__ == "__main__":
 34 |     import xlrd
 35 |     import sys
 36 |     import time
 37 |     import glob
 38 |     import traceback
 39 |     import gc
 40 | 
 41 |     from xlrd.timemachine import xrange, REPR
 42 | 
 43 | 
 44 |     class LogHandler(object):
 45 | 
 46 |         def __init__(self, logfileobj):
 47 |             self.logfileobj = logfileobj
 48 |             self.fileheading = None
 49 |             self.shown = 0
 50 | 
 51 |         def setfileheading(self, fileheading):
 52 |             self.fileheading = fileheading
 53 |             self.shown = 0
 54 | 
 55 |         def write(self, text):
 56 |             if self.fileheading and not self.shown:
 57 |                 self.logfileobj.write(self.fileheading)
 58 |                 self.shown = 1
 59 |             self.logfileobj.write(text)
 60 | 
 61 |     null_cell = xlrd.empty_cell
 62 | 
 63 |     def show_row(bk, sh, rowx, colrange, printit):
 64 |         if bk.ragged_rows:
 65 |             colrange = range(sh.row_len(rowx))
 66 |         if not colrange: return
 67 |         if printit: print()
 68 |         if bk.formatting_info:
 69 |             for colx, ty, val, cxfx in get_row_data(bk, sh, rowx, colrange):
 70 |                 if printit:
 71 |                     print("cell %s%d: type=%d, data: %r, xfx: %s"
 72 |                         % (xlrd.colname(colx), rowx+1, ty, val, cxfx))
 73 |         else:
 74 |             for colx, ty, val, _unused in get_row_data(bk, sh, rowx, colrange):
 75 |                 if printit:
 76 |                     print("cell %s%d: type=%d, data: %r" % (xlrd.colname(colx), rowx+1, ty, val))
 77 | 
 78 |     def get_row_data(bk, sh, rowx, colrange):
 79 |         result = []
 80 |         dmode = bk.datemode
 81 |         ctys = sh.row_types(rowx)
 82 |         cvals = sh.row_values(rowx)
 83 |         for colx in colrange:
 84 |             cty = ctys[colx]
 85 |             cval = cvals[colx]
 86 |             if bk.formatting_info:
 87 |                 cxfx = str(sh.cell_xf_index(rowx, colx))
 88 |             else:
 89 |                 cxfx = ''
 90 |             if cty == xlrd.XL_CELL_DATE:
 91 |                 try:
 92 |                     showval = xlrd.xldate_as_tuple(cval, dmode)
 93 |                 except xlrd.XLDateError as e:
 94 |                     showval = "%s:%s" % (type(e).__name__, e)
 95 |                     cty = xlrd.XL_CELL_ERROR
 96 |             elif cty == xlrd.XL_CELL_ERROR:
 97 |                 showval = xlrd.error_text_from_code.get(cval, '<Unknown error code 0x%02x>' % cval)
 98 |             else:
 99 |                 showval = cval
100 |             result.append((colx, cty, showval, cxfx))
101 |         return result
102 | 
103 |     def bk_header(bk):
104 |         print()
105 |         print("BIFF version: %s; datemode: %s"
106 |             % (xlrd.biff_text_from_num[bk.biff_version], bk.datemode))
107 |         print("codepage: %r (encoding: %s); countries: %r"
108 |             % (bk.codepage, bk.encoding, bk.countries))
109 |         print("Last saved by: %r" % bk.user_name)
110 |         print("Number of data sheets: %d" % bk.nsheets)
111 |         print("Use mmap: %d; Formatting: %d; On demand: %d"
112 |             % (bk.use_mmap, bk.formatting_info, bk.on_demand))
113 |         print("Ragged rows: %d" % bk.ragged_rows)
114 |         if bk.formatting_info:
115 |             print("FORMATs: %d, FONTs: %d, XFs: %d"
116 |                 % (len(bk.format_list), len(bk.font_list), len(bk.xf_list)))
117 |         if not options.suppress_timing:
118 |             print("Load time: %.2f seconds (stage 1) %.2f seconds (stage 2)"
119 |                 % (bk.load_time_stage_1, bk.load_time_stage_2))
120 |         print()
121 | 
122 |     def show_fonts(bk):
123 |         print("Fonts:")
124 |         for x in xrange(len(bk.font_list)):
125 |             font = bk.font_list[x]
126 |             font.dump(header='== Index %d ==' % x, indent=4)
127 | 
128 |     def show_names(bk, dump=0):
129 |         bk_header(bk)
130 |         if bk.biff_version < 50:
131 |             print("Names not extracted in this BIFF version")
132 |             return
133 |         nlist = bk.name_obj_list
134 |         print("Name list: %d entries" % len(nlist))
135 |         for nobj in nlist:
136 |             if dump:
137 |                 nobj.dump(sys.stdout,
138 |                     header="\n=== Dump of name_obj_list[%d] ===" % nobj.name_index)
139 |             else:
140 |                 print("[%d]\tName:%r macro:%r scope:%d\n\tresult:%r\n"
141 |                     % (nobj.name_index, nobj.name, nobj.macro, nobj.scope, nobj.result))
142 | 
143 |     def print_labels(sh, labs, title):
144 |         if not labs:return
145 |         for rlo, rhi, clo, chi in labs:
146 |             print("%s label range %s:%s contains:"
147 |                 % (title, xlrd.cellname(rlo, clo), xlrd.cellname(rhi-1, chi-1)))
148 |             for rx in xrange(rlo, rhi):
149 |                 for cx in xrange(clo, chi):
150 |                     print("    %s: %r" % (xlrd.cellname(rx, cx), sh.cell_value(rx, cx)))
151 | 
152 |     def show_labels(bk):
153 |         # bk_header(bk)
154 |         hdr = 0
155 |         for shx in range(bk.nsheets):
156 |             sh = bk.sheet_by_index(shx)
157 |             clabs = sh.col_label_ranges
158 |             rlabs = sh.row_label_ranges
159 |             if clabs or rlabs:
160 |                 if not hdr:
161 |                     bk_header(bk)
162 |                     hdr = 1
163 |                 print("sheet %d: name = %r; nrows = %d; ncols = %d" %
164 |                     (shx, sh.name, sh.nrows, sh.ncols))
165 |                 print_labels(sh, clabs, 'Col')
166 |                 print_labels(sh, rlabs, 'Row')
167 |             if bk.on_demand: bk.unload_sheet(shx)
168 | 
169 |     def show(bk, nshow=65535, printit=1):
170 |         bk_header(bk)
171 |         if 0:
172 |             rclist = xlrd.sheet.rc_stats.items()
173 |             rclist = sorted(rclist)
174 |             print("rc stats")
175 |             for k, v in rclist:
176 |                 print("0x%04x %7d" % (k, v))
177 |         if options.onesheet:
178 |             try:
179 |                 shx = int(options.onesheet)
180 |             except ValueError:
181 |                 shx = bk.sheet_by_name(options.onesheet).number
182 |             shxrange = [shx]
183 |         else:
184 |             shxrange = range(bk.nsheets)
185 |         # print("shxrange", list(shxrange))
186 |         for shx in shxrange:
187 |             sh = bk.sheet_by_index(shx)
188 |             nrows, ncols = sh.nrows, sh.ncols
189 |             colrange = range(ncols)
190 |             anshow = min(nshow, nrows)
191 |             print("sheet %d: name = %s; nrows = %d; ncols = %d" %
192 |                 (shx, REPR(sh.name), sh.nrows, sh.ncols))
193 |             if nrows and ncols:
194 |                 # Beat the bounds
195 |                 for rowx in xrange(nrows):
196 |                     nc = sh.row_len(rowx)
197 |                     if nc:
198 |                         sh.row_types(rowx)[nc-1]
199 |                         sh.row_values(rowx)[nc-1]
200 |                         sh.cell(rowx, nc-1)
201 |             for rowx in xrange(anshow-1):
202 |                 if not printit and rowx % 10000 == 1 and rowx > 1:
203 |                     print("done %d rows" % (rowx-1,))
204 |                 show_row(bk, sh, rowx, colrange, printit)
205 |             if anshow and nrows:
206 |                 show_row(bk, sh, nrows-1, colrange, printit)
207 |             print()
208 |             if bk.on_demand: bk.unload_sheet(shx)
209 | 
210 |     def count_xfs(bk):
211 |         bk_header(bk)
212 |         for shx in range(bk.nsheets):
213 |             sh = bk.sheet_by_index(shx)
214 |             nrows = sh.nrows
215 |             print("sheet %d: name = %r; nrows = %d; ncols = %d" %
216 |                 (shx, sh.name, sh.nrows, sh.ncols))
217 |             # Access all xfindexes to force gathering stats
218 |             type_stats = [0, 0, 0, 0, 0, 0, 0]
219 |             for rowx in xrange(nrows):
220 |                 for colx in xrange(sh.row_len(rowx)):
221 |                     xfx = sh.cell_xf_index(rowx, colx)
222 |                     assert xfx >= 0
223 |                     cty = sh.cell_type(rowx, colx)
224 |                     type_stats[cty] += 1
225 |             print("XF stats", sh._xf_index_stats)
226 |             print("type stats", type_stats)
227 |             print()
228 |             if bk.on_demand: bk.unload_sheet(shx)
229 | 
230 |     def main(cmd_args):
231 |         import optparse
232 |         global options
233 |         usage = "\n%prog [options] command [input-file-patterns]\n" + cmd_doc
234 |         oparser = optparse.OptionParser(usage)
235 |         oparser.add_option(
236 |             "-l", "--logfilename",
237 |             default="",
238 |             help="contains error messages")
239 |         oparser.add_option(
240 |             "-v", "--verbosity",
241 |             type="int", default=0,
242 |             help="level of information and diagnostics provided")
243 |         oparser.add_option(
244 |             "-m", "--mmap",
245 |             type="int", default=-1,
246 |             help="1: use mmap; 0: don't use mmap; -1: accept heuristic")
247 |         oparser.add_option(
248 |             "-e", "--encoding",
249 |             default="",
250 |             help="encoding override")
251 |         oparser.add_option(
252 |             "-f", "--formatting",
253 |             type="int", default=0,
254 |             help="0 (default): no fmt info\n"
255 |                  "1: fmt info (all cells)\n",
256 |         )
257 |         oparser.add_option(
258 |             "-g", "--gc",
259 |             type="int", default=0,
260 |             help="0: auto gc enabled; 1: auto gc disabled, manual collect after each file; 2: no gc")
261 |         oparser.add_option(
262 |             "-s", "--onesheet",
263 |             default="",
264 |             help="restrict output to this sheet (name or index)")
265 |         oparser.add_option(
266 |             "-u", "--unnumbered",
267 |             action="store_true", default=0,
268 |             help="omit line numbers or offsets in biff_dump")
269 |         oparser.add_option(
270 |             "-d", "--on-demand",
271 |             action="store_true", default=0,
272 |             help="load sheets on demand instead of all at once")
273 |         oparser.add_option(
274 |             "-t", "--suppress-timing",
275 |             action="store_true", default=0,
276 |             help="don't print timings (diffs are less messy)")
277 |         oparser.add_option(
278 |             "-r", "--ragged-rows",
279 |             action="store_true", default=0,
280 |             help="open_workbook(..., ragged_rows=True)")
281 |         options, args = oparser.parse_args(cmd_args)
282 |         if len(args) == 1 and args[0] in ("version", ):
283 |             pass
284 |         elif len(args) < 2:
285 |             oparser.error("Expected at least 2 args, found %d" % len(args))
286 |         cmd = args[0]
287 |         xlrd_version = getattr(xlrd, "__VERSION__", "unknown; before 0.5")
288 |         if cmd == 'biff_dump':
289 |             xlrd.dump(args[1], unnumbered=options.unnumbered)
290 |             sys.exit(0)
291 |         if cmd == 'biff_count':
292 |             xlrd.count_records(args[1])
293 |             sys.exit(0)
294 |         if cmd == 'version':
295 |             print("xlrd: %s, from %s" % (xlrd_version, xlrd.__file__))
296 |             print("Python:", sys.version)
297 |             sys.exit(0)
298 |         if options.logfilename:
299 |             logfile = LogHandler(open(options.logfilename, 'w'))
300 |         else:
301 |             logfile = sys.stdout
302 |         mmap_opt = options.mmap
303 |         mmap_arg = xlrd.USE_MMAP
304 |         if mmap_opt in (1, 0):
305 |             mmap_arg = mmap_opt
306 |         elif mmap_opt != -1:
307 |             print('Unexpected value (%r) for mmap option -- assuming default' % mmap_opt)
308 |         fmt_opt = options.formatting | (cmd in ('xfc', ))
309 |         gc_mode = options.gc
310 |         if gc_mode:
311 |             gc.disable()
312 |         for pattern in args[1:]:
313 |             for fname in glob.glob(pattern):
314 |                 print("\n=== File: %s ===" % fname)
315 |                 if logfile != sys.stdout:
316 |                     logfile.setfileheading("\n=== File: %s ===\n" % fname)
317 |                 if gc_mode == 1:
318 |                     n_unreachable = gc.collect()
319 |                     if n_unreachable:
320 |                         print("GC before open:", n_unreachable, "unreachable objects")
321 |                 try:
322 |                     t0 = time.time()
323 |                     bk = xlrd.open_workbook(
324 |                         fname,
325 |                         verbosity=options.verbosity, logfile=logfile,
326 |                         use_mmap=mmap_arg,
327 |                         encoding_override=options.encoding,
328 |                         formatting_info=fmt_opt,
329 |                         on_demand=options.on_demand,
330 |                         ragged_rows=options.ragged_rows,
331 |                     )
332 |                     t1 = time.time()
333 |                     if not options.suppress_timing:
334 |                         print("Open took %.2f seconds" % (t1-t0,))
335 |                 except xlrd.XLRDError as e:
336 |                     print("*** Open failed: %s: %s" % (type(e).__name__, e))
337 |                     continue
338 |                 except KeyboardInterrupt:
339 |                     print("*** KeyboardInterrupt ***")
340 |                     traceback.print_exc(file=sys.stdout)
341 |                     sys.exit(1)
342 |                 except BaseException as e:
343 |                     print("*** Open failed: %s: %s" % (type(e).__name__, e))
344 |                     traceback.print_exc(file=sys.stdout)
345 |                     continue
346 |                 t0 = time.time()
347 |                 if cmd == 'hdr':
348 |                     bk_header(bk)
349 |                 elif cmd == 'ov': # OverView
350 |                     show(bk, 0)
351 |                 elif cmd == 'show': # all rows
352 |                     show(bk)
353 |                 elif cmd == '2rows': # first row and last row
354 |                     show(bk, 2)
355 |                 elif cmd == '3rows': # first row, 2nd row and last row
356 |                     show(bk, 3)
357 |                 elif cmd == 'bench':
358 |                     show(bk, printit=0)
359 |                 elif cmd == 'fonts':
360 |                     bk_header(bk)
361 |                     show_fonts(bk)
362 |                 elif cmd == 'names': # named reference list
363 |                     show_names(bk)
364 |                 elif cmd == 'name_dump': # named reference list
365 |                     show_names(bk, dump=1)
366 |                 elif cmd == 'labels':
367 |                     show_labels(bk)
368 |                 elif cmd == 'xfc':
369 |                     count_xfs(bk)
370 |                 else:
371 |                     print("*** Unknown command <%s>" % cmd)
372 |                     sys.exit(1)
373 |                 del bk
374 |                 if gc_mode == 1:
375 |                     n_unreachable = gc.collect()
376 |                     if n_unreachable:
377 |                         print("GC post cmd:", fname, "->", n_unreachable, "unreachable objects")
378 |                 if not options.suppress_timing:
379 |                     t1 = time.time()
380 |                     print("\ncommand took %.2f seconds\n" % (t1-t0,))
381 | 
382 |         return None
383 | 
384 |     av = sys.argv[1:]
385 |     if not av:
386 |         main(av)
387 |     firstarg = av[0].lower()
388 |     if firstarg == "hotshot":
389 |         import hotshot
390 |         import hotshot.stats
391 |         av = av[1:]
392 |         prof_log_name = "XXXX.prof"
393 |         prof = hotshot.Profile(prof_log_name)
394 |         # benchtime, result = prof.runcall(main, *av)
395 |         result = prof.runcall(main, *(av, ))
396 |         print("result", repr(result))
397 |         prof.close()
398 |         stats = hotshot.stats.load(prof_log_name)
399 |         stats.strip_dirs()
400 |         stats.sort_stats('time', 'calls')
401 |         stats.print_stats(20)
402 |     elif firstarg == "profile":
403 |         import cProfile
404 |         av = av[1:]
405 |         cProfile.run('main(av)', 'YYYY.prof')
406 |         import pstats
407 |         p = pstats.Stats('YYYY.prof')
408 |         p.strip_dirs().sort_stats('cumulative').print_stats(30)
409 |     else:
410 |         main(av)
411 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 | 
4 | [metadata]
5 | license_file = LICENSE
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | from xlrd.info import __VERSION__
 4 | 
 5 | setup(
 6 |     name='xlrd',
 7 |     version=__VERSION__,
 8 |     author='Chris Withers',
 9 |     author_email='chris@withers.org',
10 |     url='http://www.python-excel.org/',
11 |     packages=['xlrd'],
12 |     scripts=[
13 |         'scripts/runxlrd.py',
14 |     ],
15 |     description=(
16 |         'Library for developers to extract data from '
17 |         'Microsoft Excel (tm) .xls spreadsheet files'
18 |     ),
19 |     long_description=open('README.rst').read(),
20 |     license='BSD',
21 |     keywords=['xls', 'excel', 'spreadsheet', 'workbook'],
22 |     classifiers=[
23 |         'Development Status :: 5 - Production/Stable',
24 |         'Intended Audience :: Developers',
25 |         'License :: OSI Approved :: BSD License',
26 |         'Programming Language :: Python',
27 |         'Programming Language :: Python :: 2',
28 |         'Programming Language :: Python :: 2.7',
29 |         'Programming Language :: Python :: 3',
30 |         'Programming Language :: Python :: 3.6',
31 |         'Programming Language :: Python :: 3.7',
32 |         'Programming Language :: Python :: 3.8',
33 |         'Programming Language :: Python :: 3.9',
34 |         'Operating System :: OS Independent',
35 |         'Topic :: Database',
36 |         'Topic :: Office/Business',
37 |         'Topic :: Software Development :: Libraries :: Python Modules',
38 |     ],
39 |     python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*",
40 |     extras_require=dict(
41 |         test=['pytest', 'pytest-cov'],
42 |         docs=['sphinx'],
43 |         build=['wheel', 'twine']
44 |     )
45 | )
46 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/__init__.py


--------------------------------------------------------------------------------
/tests/helpers.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | def from_sample(filename):
5 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'samples', filename)
6 | 


--------------------------------------------------------------------------------
/tests/samples/Formate.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/Formate.xls


--------------------------------------------------------------------------------
/tests/samples/biff4_no_format_no_window2.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/biff4_no_format_no_window2.xls


--------------------------------------------------------------------------------
/tests/samples/corrupted_error.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/corrupted_error.xls


--------------------------------------------------------------------------------
/tests/samples/formula_test_names.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/formula_test_names.xls


--------------------------------------------------------------------------------
/tests/samples/formula_test_sjmachin.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/formula_test_sjmachin.xls


--------------------------------------------------------------------------------
/tests/samples/issue20.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/issue20.xls


--------------------------------------------------------------------------------
/tests/samples/namesdemo.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/namesdemo.xls


--------------------------------------------------------------------------------
/tests/samples/picture_in_cell.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/picture_in_cell.xls


--------------------------------------------------------------------------------
/tests/samples/profiles.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/profiles.xls


--------------------------------------------------------------------------------
/tests/samples/ragged.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/ragged.xls


--------------------------------------------------------------------------------
/tests/samples/sample.ods:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.ods


--------------------------------------------------------------------------------
/tests/samples/sample.txt:
--------------------------------------------------------------------------------
1 | This is a text file.
2 | 


--------------------------------------------------------------------------------
/tests/samples/sample.xlsb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.xlsb


--------------------------------------------------------------------------------
/tests/samples/sample.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.xlsx


--------------------------------------------------------------------------------
/tests/samples/sample.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/sample.zip


--------------------------------------------------------------------------------
/tests/samples/xf_class.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/python-excel/xlrd/0c4e80b3d48dfe2250ac4e514c8231a742fee221/tests/samples/xf_class.xls


--------------------------------------------------------------------------------
/tests/test_biffh.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import unittest
 3 | 
 4 | from xlrd import biffh
 5 | 
 6 | if sys.version_info[0] >= 3:
 7 |     from io import StringIO
 8 | else:
 9 |     # Python 2.6+ does have the io module, but io.StringIO is strict about
10 |     # unicode, which won't work for our test.
11 |     from StringIO import StringIO
12 | 
13 | 
14 | class TestHexDump(unittest.TestCase):
15 |     def test_hex_char_dump(self):
16 |         sio = StringIO()
17 |         biffh.hex_char_dump(b"abc\0e\01", 0, 6, fout=sio)
18 |         s = sio.getvalue()
19 |         assert "61 62 63 00 65 01" in s, s
20 |         assert "abc~e?" in s, s
21 | 
22 | if __name__=='__main__':
23 |     unittest.main()
24 | 


--------------------------------------------------------------------------------
/tests/test_cell.py:
--------------------------------------------------------------------------------
 1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence
 2 | 
 3 | import unittest
 4 | 
 5 | import xlrd
 6 | from xlrd.timemachine import UNICODE_LITERAL
 7 | 
 8 | from .helpers import from_sample
 9 | 
10 | 
11 | class TestCell(unittest.TestCase):
12 | 
13 |     def setUp(self):
14 |         self.book = xlrd.open_workbook(from_sample('profiles.xls'), formatting_info=True)
15 |         self.sheet = self.book.sheet_by_name('PROFILEDEF')
16 | 
17 |     def test_empty_cell(self):
18 |         sheet = self.book.sheet_by_name('TRAVERSALCHAINAGE')
19 |         cell = sheet.cell(0, 0)
20 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_EMPTY)
21 |         self.assertEqual(cell.value, '')
22 |         self.assertEqual(type(cell.value), type(UNICODE_LITERAL('')))
23 |         self.assertTrue(cell.xf_index > 0)
24 | 
25 |     def test_string_cell(self):
26 |         cell = self.sheet.cell(0, 0)
27 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT)
28 |         self.assertEqual(cell.value, 'PROFIL')
29 |         self.assertEqual(type(cell.value), type(UNICODE_LITERAL('')))
30 |         self.assertTrue(cell.xf_index > 0)
31 | 
32 |     def test_number_cell(self):
33 |         cell = self.sheet.cell(1, 1)
34 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER)
35 |         self.assertEqual(cell.value, 100)
36 |         self.assertTrue(cell.xf_index > 0)
37 | 
38 |     def test_calculated_cell(self):
39 |         sheet2 = self.book.sheet_by_name('PROFILELEVELS')
40 |         cell = sheet2.cell(1, 3)
41 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER)
42 |         self.assertAlmostEqual(cell.value, 265.131, places=3)
43 |         self.assertTrue(cell.xf_index > 0)
44 | 
45 |     def test_merged_cells(self):
46 |         book = xlrd.open_workbook(from_sample('xf_class.xls'), formatting_info=True)
47 |         sheet3 = book.sheet_by_name('table2')
48 |         row_lo, row_hi, col_lo, col_hi = sheet3.merged_cells[0]
49 |         self.assertEqual(sheet3.cell(row_lo, col_lo).value, 'MERGED')
50 |         self.assertEqual((row_lo, row_hi, col_lo, col_hi), (3, 7, 2, 5))
51 | 


--------------------------------------------------------------------------------
/tests/test_formats.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence
 3 | 
 4 | import sys
 5 | from unittest import TestCase
 6 | 
 7 | import xlrd
 8 | 
 9 | from .helpers import from_sample
10 | 
11 | if sys.version_info[0] >= 3:
12 |     def u(s): return s
13 | else:
14 |     def u(s):
15 |         return s.decode('utf-8')
16 | 
17 | 
18 | class TestCellContent(TestCase):
19 | 
20 |     def setUp(self):
21 |         self.book = xlrd.open_workbook(from_sample('Formate.xls'), formatting_info=True)
22 |         self.sheet = self.book.sheet_by_name(u('Blätt1'))
23 | 
24 |     def test_text_cells(self):
25 |         for row, name in enumerate([u('Huber'), u('Äcker'), u('Öcker')]):
26 |             cell = self.sheet.cell(row, 0)
27 |             self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT)
28 |             self.assertEqual(cell.value, name)
29 |             self.assertTrue(cell.xf_index > 0)
30 | 
31 |     def test_date_cells(self):
32 |         # see also 'Dates in Excel spreadsheets' in the documentation
33 |         # convert: xldate_as_tuple(float, book.datemode) -> (year, month,
34 |         # day, hour, minutes, seconds)
35 |         for row, date in [(0, 2741.), (1, 38406.), (2, 32266.)]:
36 |             cell = self.sheet.cell(row, 1)
37 |             self.assertEqual(cell.ctype, xlrd.book.XL_CELL_DATE)
38 |             self.assertEqual(cell.value, date)
39 |             self.assertTrue(cell.xf_index > 0)
40 | 
41 |     def test_time_cells(self):
42 |         # see also 'Dates in Excel spreadsheets' in the documentation
43 |         # convert: xldate_as_tuple(float, book.datemode) -> (year, month,
44 |         # day, hour, minutes, seconds)
45 |         for row, time in [(3, .273611), (4, .538889), (5, .741123)]:
46 |             cell = self.sheet.cell(row, 1)
47 |             self.assertEqual(cell.ctype, xlrd.book.XL_CELL_DATE)
48 |             self.assertAlmostEqual(cell.value, time, places=6)
49 |             self.assertTrue(cell.xf_index > 0)
50 | 
51 |     def test_percent_cells(self):
52 |         for row, time in [(6, .974), (7, .124)]:
53 |             cell = self.sheet.cell(row, 1)
54 |             self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER)
55 |             self.assertAlmostEqual(cell.value, time, places=3)
56 |             self.assertTrue(cell.xf_index > 0)
57 | 
58 |     def test_currency_cells(self):
59 |         for row, time in [(8, 1000.30), (9, 1.20)]:
60 |             cell = self.sheet.cell(row, 1)
61 |             self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER)
62 |             self.assertAlmostEqual(cell.value, time, places=2)
63 |             self.assertTrue(cell.xf_index > 0)
64 | 
65 |     def test_get_from_merged_cell(self):
66 |         sheet = self.book.sheet_by_name(u('ÖÄÜ'))
67 |         cell = sheet.cell(2, 2)
68 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_TEXT)
69 |         self.assertEqual(cell.value, 'MERGED CELLS')
70 |         self.assertTrue(cell.xf_index > 0)
71 | 
72 |     def test_ignore_diagram(self):
73 |         sheet = self.book.sheet_by_name(u('Blätt3'))
74 |         cell = sheet.cell(0, 0)
75 |         self.assertEqual(cell.ctype, xlrd.book.XL_CELL_NUMBER)
76 |         self.assertEqual(cell.value, 100)
77 |         self.assertTrue(cell.xf_index > 0)
78 | 


--------------------------------------------------------------------------------
/tests/test_formulas.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence
 3 | 
 4 | from unittest import TestCase
 5 | 
 6 | import xlrd
 7 | 
 8 | from .helpers import from_sample
 9 | 
10 | try:
11 |     ascii
12 | except NameError:
13 |     # For Python 2
14 |     def ascii(s):
15 |         a = repr(s)
16 |         if a.startswith(('u"', "u'")):
17 |             a = a[1:]
18 |         return a
19 | 
20 | class TestFormulas(TestCase):
21 | 
22 |     def setUp(self):
23 |         book = xlrd.open_workbook(from_sample('formula_test_sjmachin.xls'))
24 |         self.sheet = book.sheet_by_index(0)
25 | 
26 |     def get_value(self, col, row):
27 |         return ascii(self.sheet.col_values(col)[row])
28 | 
29 |     def test_cell_B2(self):
30 |         self.assertEqual(
31 |             self.get_value(1, 1),
32 |             r"'\u041c\u041e\u0421\u041a\u0412\u0410 \u041c\u043e\u0441\u043a\u0432\u0430'",
33 |         )
34 | 
35 |     def test_cell_B3(self):
36 |         self.assertEqual(self.get_value(1, 2), '0.14285714285714285')
37 | 
38 |     def test_cell_B4(self):
39 |         self.assertEqual(self.get_value(1, 3), "'ABCDEF'")
40 | 
41 |     def test_cell_B5(self):
42 |         self.assertEqual(self.get_value(1, 4), "''")
43 | 
44 |     def test_cell_B6(self):
45 |         self.assertEqual(self.get_value(1, 5), '1')
46 | 
47 |     def test_cell_B7(self):
48 |         self.assertEqual(self.get_value(1, 6), '7')
49 | 
50 |     def test_cell_B8(self):
51 |         self.assertEqual(
52 |             self.get_value(1, 7),
53 |             r"'\u041c\u041e\u0421\u041a\u0412\u0410 \u041c\u043e\u0441\u043a\u0432\u0430'",
54 |         )
55 | 
56 | class TestNameFormulas(TestCase):
57 | 
58 |     def setUp(self):
59 |         book = xlrd.open_workbook(from_sample('formula_test_names.xls'))
60 |         self.sheet = book.sheet_by_index(0)
61 | 
62 |     def get_value(self, col, row):
63 |         return ascii(self.sheet.col_values(col)[row])
64 | 
65 |     def test_unaryop(self):
66 |         self.assertEqual(self.get_value(1, 1), '-7.0')
67 | 
68 |     def test_attrsum(self):
69 |         self.assertEqual(self.get_value(1, 2), '4.0')
70 | 
71 |     def test_func(self):
72 |         self.assertEqual(self.get_value(1, 3), '6.0')
73 | 
74 |     def test_func_var_args(self):
75 |         self.assertEqual(self.get_value(1, 4), '3.0')
76 | 
77 |     def test_if(self):
78 |         self.assertEqual(self.get_value(1, 5), "'b'")
79 | 
80 |     def test_choose(self):
81 |         self.assertEqual(self.get_value(1, 6), "'C'")
82 | 


--------------------------------------------------------------------------------
/tests/test_ignore_workbook_corruption_error.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import xlrd
 4 | 
 5 | from .helpers import from_sample
 6 | 
 7 | 
 8 | class TestIgnoreWorkbookCorruption(TestCase):
 9 | 
10 |     def test_not_corrupted(self):
11 |         with self.assertRaises(Exception) as context:
12 |             xlrd.open_workbook(from_sample('corrupted_error.xls'))
13 |         self.assertTrue('Workbook corruption' in str(context.exception))
14 | 
15 |         xlrd.open_workbook(from_sample('corrupted_error.xls'), ignore_workbook_corruption=True)
16 | 


--------------------------------------------------------------------------------
/tests/test_inspect.py:
--------------------------------------------------------------------------------
 1 | from xlrd import inspect_format
 2 | 
 3 | from .helpers import from_sample
 4 | 
 5 | 
 6 | def test_xlsx():
 7 |     assert inspect_format(from_sample('sample.xlsx')) == 'xlsx'
 8 | 
 9 | 
10 | def test_xlsb():
11 |     assert inspect_format(from_sample('sample.xlsb')) == 'xlsb'
12 | 
13 | 
14 | def test_ods():
15 |     assert inspect_format(from_sample('sample.ods')) == 'ods'
16 | 
17 | 
18 | def test_zip():
19 |     assert inspect_format(from_sample('sample.zip')) == 'zip'
20 | 
21 | 
22 | def test_xls():
23 |     assert inspect_format(from_sample('namesdemo.xls')) == 'xls'
24 | 
25 | 
26 | def test_content():
27 |     with open(from_sample('sample.xlsx'), 'rb') as source:
28 |         assert inspect_format(content=source.read()) == 'xlsx'
29 | 
30 | 
31 | def test_unknown():
32 |     assert inspect_format(from_sample('sample.txt')) is None
33 | 


--------------------------------------------------------------------------------
/tests/test_missing_records.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from xlrd import open_workbook
 4 | from xlrd.biffh import XL_CELL_TEXT
 5 | 
 6 | from .helpers import from_sample
 7 | 
 8 | 
 9 | class TestMissingRecords(TestCase):
10 | 
11 |     def setUp(self):
12 |         path = from_sample('biff4_no_format_no_window2.xls')
13 |         self.book = open_workbook(path)
14 |         self.sheet = self.book.sheet_by_index(0)
15 | 
16 |     def test_default_format(self):
17 |         cell = self.sheet.cell(0, 0)
18 |         self.assertEqual(cell.ctype, XL_CELL_TEXT)
19 | 
20 |     def test_default_window2_options(self):
21 |         self.assertEqual(self.sheet.cached_page_break_preview_mag_factor, 0)
22 |         self.assertEqual(self.sheet.cached_normal_view_mag_factor, 0)
23 | 


--------------------------------------------------------------------------------
/tests/test_open_workbook.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | from unittest import TestCase
 5 | 
 6 | import pytest
 7 | 
 8 | from xlrd import open_workbook, XLRDError
 9 | 
10 | from .helpers import from_sample
11 | 
12 | 
13 | class TestOpen(object):
14 |     # test different uses of open_workbook
15 | 
16 |     def test_names_demo(self):
17 |         # For now, we just check this doesn't raise an error.
18 |         open_workbook(from_sample('namesdemo.xls'))
19 | 
20 |     def test_ragged_rows_tidied_with_formatting(self):
21 |         # For now, we just check this doesn't raise an error.
22 |         open_workbook(from_sample('issue20.xls'),
23 |                       formatting_info=True)
24 | 
25 |     def test_BYTES_X00(self):
26 |         # For now, we just check this doesn't raise an error.
27 |         open_workbook(from_sample('picture_in_cell.xls'),
28 |                       formatting_info=True)
29 | 
30 |     def test_open_xlsx(self):
31 |         with pytest.raises(XLRDError, match='Excel xlsx file; not supported'):
32 |             open_workbook(from_sample('sample.xlsx'))
33 | 
34 |     def test_open_unknown(self):
35 |         with pytest.raises(XLRDError, match="Unsupported format, or corrupt file"):
36 |             open_workbook(from_sample('sample.txt'))
37 | 


--------------------------------------------------------------------------------
/tests/test_sheet.py:
--------------------------------------------------------------------------------
  1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence
  2 | 
  3 | import types
  4 | from unittest import TestCase
  5 | 
  6 | import xlrd
  7 | from xlrd.timemachine import xrange
  8 | 
  9 | from .helpers import from_sample
 10 | 
 11 | SHEETINDEX = 0
 12 | NROWS = 15
 13 | NCOLS = 13
 14 | 
 15 | ROW_ERR = NROWS + 10
 16 | COL_ERR = NCOLS + 10
 17 | 
 18 | 
 19 | class TestSheet(TestCase):
 20 | 
 21 |     sheetnames = ['PROFILEDEF', 'AXISDEF', 'TRAVERSALCHAINAGE',
 22 |                   'AXISDATUMLEVELS', 'PROFILELEVELS']
 23 | 
 24 |     def setUp(self):
 25 |         self.book = xlrd.open_workbook(from_sample('profiles.xls'), formatting_info=True)
 26 | 
 27 |     def check_sheet_function(self, function):
 28 |         self.assertTrue(function(0, 0))
 29 |         self.assertTrue(function(NROWS-1, NCOLS-1))
 30 | 
 31 |     def check_sheet_function_index_error(self, function):
 32 |         self.assertRaises(IndexError, function, ROW_ERR, 0)
 33 |         self.assertRaises(IndexError, function, 0, COL_ERR)
 34 | 
 35 |     def check_col_slice(self, col_function):
 36 |         _slice = col_function(0, 2, NROWS-2)
 37 |         self.assertEqual(len(_slice), NROWS-4)
 38 | 
 39 |     def check_row_slice(self, row_function):
 40 |         _slice = row_function(0, 2, NCOLS-2)
 41 |         self.assertEqual(len(_slice), NCOLS-4)
 42 | 
 43 |     def test_nrows(self):
 44 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 45 |         self.assertEqual(sheet.nrows, NROWS)
 46 | 
 47 |     def test_ncols(self):
 48 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 49 |         self.assertEqual(sheet.ncols, NCOLS)
 50 | 
 51 |     def test_cell(self):
 52 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 53 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0))
 54 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS-1, NCOLS-1))
 55 | 
 56 |     def test_cell_error(self):
 57 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 58 |         self.check_sheet_function_index_error(sheet.cell)
 59 | 
 60 |     def test_cell_type(self):
 61 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 62 |         self.check_sheet_function(sheet.cell_type)
 63 | 
 64 |     def test_cell_type_error(self):
 65 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 66 |         self.check_sheet_function_index_error(sheet.cell_type)
 67 | 
 68 |     def test_cell_value(self):
 69 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 70 |         self.check_sheet_function(sheet.cell_value)
 71 | 
 72 |     def test_cell_value_error(self):
 73 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 74 |         self.check_sheet_function_index_error(sheet.cell_value)
 75 | 
 76 |     def test_cell_xf_index(self):
 77 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 78 |         self.check_sheet_function(sheet.cell_xf_index)
 79 | 
 80 |     def test_cell_xf_index_error(self):
 81 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 82 |         self.check_sheet_function_index_error(sheet.cell_xf_index)
 83 | 
 84 |     def test_col(self):
 85 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 86 |         col = sheet.col(0)
 87 |         self.assertEqual(len(col), NROWS)
 88 | 
 89 |     def test_row(self):
 90 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 91 |         row = sheet.row(0)
 92 |         self.assertEqual(len(row), NCOLS)
 93 | 
 94 |     def test_getitem_int(self):
 95 |         sheet = self.book.sheet_by_index(SHEETINDEX)
 96 |         row = sheet[0]
 97 |         self.assertEqual(len(row), NCOLS)
 98 | 
 99 |     def test_getitem_tuple(self):
100 |         sheet = self.book.sheet_by_index(SHEETINDEX)
101 |         self.assertNotEqual(xlrd.empty_cell, sheet[0, 0])
102 |         self.assertNotEqual(xlrd.empty_cell, sheet[NROWS-1, NCOLS-1])
103 | 
104 |     def test_getitem_failure(self):
105 |         sheet = self.book.sheet_by_index(SHEETINDEX)
106 |         with self.assertRaises(ValueError):
107 |             sheet[0, 0, 0]
108 | 
109 |         with self.assertRaises(TypeError):
110 |             sheet["hi"]
111 | 
112 |     def test_get_rows(self):
113 |         sheet = self.book.sheet_by_index(SHEETINDEX)
114 |         rows = sheet.get_rows()
115 |         self.assertTrue(isinstance(rows, types.GeneratorType), True)
116 |         self.assertEqual(len(list(rows)), sheet.nrows)
117 | 
118 |     def test_iter(self):
119 |         sheet = self.book.sheet_by_index(SHEETINDEX)
120 |         rows = []
121 |         # check syntax
122 |         for row in sheet:
123 |             rows.append(row)
124 |         self.assertEqual(len(rows), sheet.nrows)
125 | 
126 |     def test_col_slice(self):
127 |         sheet = self.book.sheet_by_index(SHEETINDEX)
128 |         self.check_col_slice(sheet.col_slice)
129 | 
130 |     def test_col_types(self):
131 |         sheet = self.book.sheet_by_index(SHEETINDEX)
132 |         self.check_col_slice(sheet.col_types)
133 | 
134 |     def test_col_values(self):
135 |         sheet = self.book.sheet_by_index(SHEETINDEX)
136 |         self.check_col_slice(sheet.col_values)
137 | 
138 |     def test_row_slice(self):
139 |         sheet = self.book.sheet_by_index(SHEETINDEX)
140 |         self.check_row_slice(sheet.row_slice)
141 | 
142 |     def test_row_types(self):
143 |         sheet = self.book.sheet_by_index(SHEETINDEX)
144 |         self.check_row_slice(sheet.col_types)
145 | 
146 |     def test_row_values(self):
147 |         sheet = self.book.sheet_by_index(SHEETINDEX)
148 |         self.check_col_slice(sheet.row_values)
149 | 
150 | 
151 | class TestSheetRagged(TestCase):
152 | 
153 |     def test_read_ragged(self):
154 |         book = xlrd.open_workbook(from_sample('ragged.xls'), ragged_rows=True)
155 |         sheet = book.sheet_by_index(0)
156 |         self.assertEqual(sheet.row_len(0), 3)
157 |         self.assertEqual(sheet.row_len(1), 2)
158 |         self.assertEqual(sheet.row_len(2), 1)
159 |         self.assertEqual(sheet.row_len(3), 4)
160 |         self.assertEqual(sheet.row_len(4), 4)
161 | 


--------------------------------------------------------------------------------
/tests/test_workbook.py:
--------------------------------------------------------------------------------
 1 | # Portions Copyright (C) 2010, Manfred Moitzi under a BSD licence
 2 | 
 3 | from unittest import TestCase
 4 | 
 5 | import xlrd
 6 | from xlrd import open_workbook
 7 | from xlrd.book import Book
 8 | from xlrd.sheet import Sheet
 9 | 
10 | from .helpers import from_sample
11 | 
12 | SHEETINDEX = 0
13 | NROWS = 15
14 | NCOLS = 13
15 | 
16 | 
17 | class TestWorkbook(TestCase):
18 |     sheetnames = ['PROFILEDEF', 'AXISDEF', 'TRAVERSALCHAINAGE',
19 |                   'AXISDATUMLEVELS', 'PROFILELEVELS']
20 | 
21 |     def setUp(self):
22 |         self.book = open_workbook(from_sample('profiles.xls'))
23 | 
24 |     def test_open_workbook(self):
25 |         self.assertTrue(isinstance(self.book, Book))
26 | 
27 |     def test_nsheets(self):
28 |         self.assertEqual(self.book.nsheets, 5)
29 | 
30 |     def test_sheet_by_name(self):
31 |         for name in self.sheetnames:
32 |             sheet = self.book.sheet_by_name(name)
33 |             self.assertTrue(isinstance(sheet, Sheet))
34 |             self.assertEqual(name, sheet.name)
35 | 
36 |     def test_sheet_by_index(self):
37 |         for index in range(5):
38 |             sheet = self.book.sheet_by_index(index)
39 |             self.assertTrue(isinstance(sheet, Sheet))
40 |             self.assertEqual(sheet.name, self.sheetnames[index])
41 | 
42 |     def test_sheets(self):
43 |         sheets = self.book.sheets()
44 |         for index, sheet in enumerate(sheets):
45 |             self.assertTrue(isinstance(sheet, Sheet))
46 |             self.assertEqual(sheet.name, self.sheetnames[index])
47 | 
48 |     def test_sheet_names(self):
49 |         self.assertEqual(self.sheetnames, self.book.sheet_names())
50 | 
51 |     def test_getitem_ix(self):
52 |         sheet = self.book[SHEETINDEX]
53 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0))
54 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS - 1, NCOLS - 1))
55 | 
56 |     def test_getitem_name(self):
57 |         sheet = self.book[self.sheetnames[SHEETINDEX]]
58 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(0, 0))
59 |         self.assertNotEqual(xlrd.empty_cell, sheet.cell(NROWS - 1, NCOLS - 1))
60 | 
61 |     def test_iter(self):
62 |         sheets = [sh.name for sh in self.book]
63 |         self.assertEqual(sheets, self.sheetnames)
64 | 


--------------------------------------------------------------------------------
/tests/test_xldate.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Author:  mozman <mozman@gmx.at>
 3 | # Purpose: test xldate.py
 4 | # Created: 04.12.2010
 5 | # Copyright (C) 2010, Manfred Moitzi
 6 | # License: BSD licence
 7 | 
 8 | import unittest
 9 | 
10 | from xlrd import xldate
11 | 
12 | DATEMODE = 0 # 1900-based
13 | 
14 | class TestXLDate(unittest.TestCase):
15 |     def test_date_as_tuple(self):
16 |         date = xldate.xldate_as_tuple(2741., DATEMODE)
17 |         self.assertEqual(date, (1907, 7, 3, 0, 0, 0))
18 |         date = xldate.xldate_as_tuple(38406., DATEMODE)
19 |         self.assertEqual(date, (2005, 2, 23, 0, 0, 0))
20 |         date = xldate.xldate_as_tuple(32266., DATEMODE)
21 |         self.assertEqual(date, (1988, 5, 3, 0, 0, 0))
22 | 
23 |     def test_time_as_tuple(self):
24 |         time = xldate.xldate_as_tuple(.273611, DATEMODE)
25 |         self.assertEqual(time, (0, 0, 0, 6, 34, 0))
26 |         time = xldate.xldate_as_tuple(.538889, DATEMODE)
27 |         self.assertEqual(time, (0, 0, 0, 12, 56, 0))
28 |         time = xldate.xldate_as_tuple(.741123, DATEMODE)
29 |         self.assertEqual(time, (0, 0, 0, 17, 47, 13))
30 | 
31 |     def test_xldate_from_date_tuple(self):
32 |         date = xldate.xldate_from_date_tuple( (1907, 7, 3), DATEMODE )
33 |         self.assertAlmostEqual(date, 2741.)
34 |         date = xldate.xldate_from_date_tuple( (2005, 2, 23), DATEMODE )
35 |         self.assertAlmostEqual(date, 38406.)
36 |         date = xldate.xldate_from_date_tuple( (1988, 5, 3), DATEMODE )
37 |         self.assertAlmostEqual(date, 32266.)
38 | 
39 |     def test_xldate_from_time_tuple(self):
40 |         time = xldate.xldate_from_time_tuple( (6, 34, 0) )
41 |         self.assertAlmostEqual(time, .273611, places=6)
42 |         time = xldate.xldate_from_time_tuple( (12, 56, 0) )
43 |         self.assertAlmostEqual(time, .538889, places=6)
44 |         time = xldate.xldate_from_time_tuple( (17, 47, 13) )
45 |         self.assertAlmostEqual(time, .741123, places=6)
46 | 
47 |     def test_xldate_from_datetime_tuple(self):
48 |         date = xldate.xldate_from_datetime_tuple( (1907, 7, 3, 6, 34, 0), DATEMODE)
49 |         self.assertAlmostEqual(date, 2741.273611, places=6)
50 |         date = xldate.xldate_from_datetime_tuple( (2005, 2, 23, 12, 56, 0), DATEMODE)
51 |         self.assertAlmostEqual(date, 38406.538889, places=6)
52 |         date = xldate.xldate_from_datetime_tuple( (1988, 5, 3, 17, 47, 13), DATEMODE)
53 |         self.assertAlmostEqual(date, 32266.741123, places=6)
54 | 
55 | if __name__=='__main__':
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/tests/test_xldate_to_datetime.py:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | #
  3 | # Tests for the xlrd xldate.xldate_as_datetime() function.
  4 | #
  5 | 
  6 | import unittest
  7 | from datetime import datetime
  8 | 
  9 | from xlrd import xldate
 10 | 
 11 | not_1904 = False
 12 | is_1904 = True
 13 | 
 14 | 
 15 | class TestConvertToDateTime(unittest.TestCase):
 16 |     """
 17 |     Testcases to test the _xldate_to_datetime() function against dates
 18 |     extracted from Excel files, with 1900/1904 epochs.
 19 | 
 20 |     """
 21 | 
 22 |     def test_dates_and_times_1900_epoch(self):
 23 |         """
 24 |         Test the _xldate_to_datetime() function for dates and times in
 25 |         the Excel standard 1900 epoch.
 26 | 
 27 |         """
 28 |         # Test Excel dates strings and corresponding serial date numbers taken
 29 |         # from an Excel file.
 30 |         excel_dates = [
 31 |             # Excel's 0.0 date in the 1900 epoch is 1 day before 1900.
 32 |             ('1899-12-31T00:00:00.000', 0),
 33 | 
 34 |             # Date/time before the false Excel 1900 leapday.
 35 |             ('1900-02-28T02:11:11.986', 59.09111094906),
 36 | 
 37 |             # Date/time after the false Excel 1900 leapday.
 38 |             ('1900-03-01T05:46:44.068', 61.24078782403),
 39 | 
 40 |             # Random date/times in Excel's 0-9999.9999+ range.
 41 |             ('1982-08-25T00:15:20.213', 30188.010650613425),
 42 |             ('2065-04-19T00:16:48.290', 60376.011670023145),
 43 |             ('3222-06-11T03:08:08.251', 483014.13065105322),
 44 |             ('4379-08-03T06:14:48.580', 905652.26028449077),
 45 |             ('5949-12-30T12:59:54.263', 1479232.5416002662),
 46 | 
 47 |             # End of Excel's date range.
 48 |             ('9999-12-31T23:59:59.000', 2958465.999988426),
 49 |         ]
 50 | 
 51 |         # Convert the Excel date strings to datetime objects and compare
 52 |         # against the dateitme return value of xldate.xldate_as_datetime().
 53 |         for excel_date in excel_dates:
 54 |             exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f")
 55 |             got = xldate.xldate_as_datetime(excel_date[1], not_1904)
 56 | 
 57 |             self.assertEqual(got, exp)
 58 | 
 59 |     def test_dates_only_1900_epoch(self):
 60 |         """
 61 |         Test the _xldate_to_datetime() function for dates in the Excel
 62 |         standard 1900 epoch.
 63 | 
 64 |         """
 65 |         # Test Excel dates strings and corresponding serial date numbers taken
 66 |         # from an Excel file.
 67 |         excel_dates = [
 68 |             # Excel's day 0 in the 1900 epoch is 1 day before 1900.
 69 |             ('1899-12-31', 0),
 70 | 
 71 |             # Excel's day 1 in the 1900 epoch.
 72 |             ('1900-01-01', 1),
 73 | 
 74 |             # Date/time before the false Excel 1900 leapday.
 75 |             ('1900-02-28', 59),
 76 | 
 77 |             # Date/time after the false Excel 1900 leapday.
 78 |             ('1900-03-01', 61),
 79 | 
 80 |             # Random date/times in Excel's 0-9999.9999+ range.
 81 |             ('1902-09-27', 1001),
 82 |             ('1999-12-31', 36525),
 83 |             ('2000-01-01', 36526),
 84 |             ('4000-12-31', 767376),
 85 |             ('4321-01-01', 884254),
 86 |             ('9999-01-01', 2958101),
 87 | 
 88 |             # End of Excel's date range.
 89 |             ('9999-12-31', 2958465),
 90 |         ]
 91 | 
 92 |         # Convert the Excel date strings to datetime objects and compare
 93 |         # against the dateitme return value of xldate.xldate_as_datetime().
 94 |         for excel_date in excel_dates:
 95 |             exp = datetime.strptime(excel_date[0], "%Y-%m-%d")
 96 |             got = xldate.xldate_as_datetime(excel_date[1], not_1904)
 97 | 
 98 |             self.assertEqual(got, exp)
 99 | 
100 |     def test_dates_only_1904_epoch(self):
101 |         """
102 |         Test the _xldate_to_datetime() function for dates in the Excel
103 |         Mac/1904 epoch.
104 | 
105 |         """
106 |         # Test Excel dates strings and corresponding serial date numbers taken
107 |         # from an Excel file.
108 |         excel_dates = [
109 |             # Excel's day 0 in the 1904 epoch.
110 |             ('1904-01-01', 0),
111 | 
112 |             # Random date/times in Excel's 0-9999.9999+ range.
113 |             ('1904-01-31', 30),
114 |             ('1904-08-31', 243),
115 |             ('1999-02-28', 34757),
116 |             ('1999-12-31', 35063),
117 |             ('2000-01-01', 35064),
118 |             ('2400-12-31', 181526),
119 |             ('4000-01-01', 765549),
120 |             ('9999-01-01', 2956639),
121 | 
122 |             # End of Excel's date range.
123 |             ('9999-12-31', 2957003),
124 |         ]
125 | 
126 |         # Convert the Excel date strings to datetime objects and compare
127 |         # against the dateitme return value of xldate.xldate_as_datetime().
128 |         for excel_date in excel_dates:
129 |             exp = datetime.strptime(excel_date[0], "%Y-%m-%d")
130 |             got = xldate.xldate_as_datetime(excel_date[1], is_1904)
131 | 
132 |             self.assertEqual(got, exp)
133 | 
134 |     def test_times_only(self):
135 |         """
136 |         Test the _xldate_to_datetime() function for times only, i.e, the
137 |         fractional part of the Excel date when the serial date is 0.
138 | 
139 |         """
140 |         # Test Excel dates strings and corresponding serial date numbers taken
141 |         # from an Excel file. The 1899-12-31 date is Excel's day 0.
142 |         excel_dates = [
143 |             # Random times in Excel's 0-0.9999+ range for 1 day.
144 |             ('1899-12-31T00:00:00.000', 0),
145 |             ('1899-12-31T00:15:20.213', 1.0650613425925924E-2),
146 |             ('1899-12-31T02:24:37.095', 0.10042934027777778),
147 |             ('1899-12-31T04:56:35.792', 0.2059698148148148),
148 |             ('1899-12-31T07:31:20.407', 0.31343063657407405),
149 |             ('1899-12-31T09:37:23.945', 0.40097158564814817),
150 |             ('1899-12-31T12:09:48.602', 0.50681252314814818),
151 |             ('1899-12-31T14:37:57.451', 0.60969271990740748),
152 |             ('1899-12-31T17:04:02.415', 0.71113906250000003),
153 |             ('1899-12-31T19:14:24.673', 0.80167445601851861),
154 |             ('1899-12-31T21:39:05.944', 0.90215212962962965),
155 |             ('1899-12-31T23:17:12.632', 0.97028509259259266),
156 |             ('1899-12-31T23:59:59.999', 0.99999998842592586),
157 |         ]
158 | 
159 |         # Convert the Excel date strings to datetime objects and compare
160 |         # against the dateitme return value of xldate.xldate_as_datetime().
161 |         for excel_date in excel_dates:
162 |             exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f")
163 |             got = xldate.xldate_as_datetime(excel_date[1], not_1904)
164 | 
165 |             self.assertEqual(got, exp)
166 | 


--------------------------------------------------------------------------------
/xlrd/__init__.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
  2 | # This module is part of the xlrd package, which is released under a
  3 | # BSD-style licence.
  4 | import os
  5 | import pprint
  6 | import sys
  7 | import zipfile
  8 | 
  9 | from . import timemachine
 10 | from .biffh import (
 11 |     XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR,
 12 |     XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num,
 13 |     error_text_from_code,
 14 | )
 15 | from .book import Book, colname, open_workbook_xls
 16 | from .compdoc import SIGNATURE as XLS_SIGNATURE
 17 | from .formula import *  # is constrained by __all__
 18 | from .info import __VERSION__, __version__
 19 | from .sheet import empty_cell
 20 | from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple
 21 | 
 22 | 
 23 | #: descriptions of the file types :mod:`xlrd` can :func:`inspect <inspect_format>`.
 24 | FILE_FORMAT_DESCRIPTIONS = {
 25 |     'xls': 'Excel xls',
 26 |     'xlsb': 'Excel 2007 xlsb file',
 27 |     'xlsx': 'Excel xlsx file',
 28 |     'ods': 'Openoffice.org ODS file',
 29 |     'zip': 'Unknown ZIP file',
 30 |     None: 'Unknown file type',
 31 | }
 32 | 
 33 | ZIP_SIGNATURE = b"PK\x03\x04"
 34 | 
 35 | PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE))
 36 | 
 37 | 
 38 | def inspect_format(path=None, content=None):
 39 |     """
 40 |     Inspect the content at the supplied path or the :class:`bytes` content provided
 41 |     and return the file's type as a :class:`str`, or ``None`` if it cannot
 42 |     be determined.
 43 | 
 44 |     :param path:
 45 |       A :class:`string <str>` path containing the content to inspect.
 46 |       ``~`` will be expanded.
 47 | 
 48 |     :param content:
 49 |       The :class:`bytes` content to inspect.
 50 | 
 51 |     :returns:
 52 |        A :class:`str`, or ``None`` if the format cannot be determined.
 53 |        The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS`
 54 |        to return a human-readable description of the format found.
 55 |     """
 56 |     if content:
 57 |         peek = content[:PEEK_SIZE]
 58 |     else:
 59 |         path = os.path.expanduser(path)
 60 |         with open(path, "rb") as f:
 61 |             peek = f.read(PEEK_SIZE)
 62 | 
 63 |     if peek.startswith(XLS_SIGNATURE):
 64 |         return 'xls'
 65 | 
 66 |     if peek.startswith(ZIP_SIGNATURE):
 67 |         zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path)
 68 | 
 69 |         # Workaround for some third party files that use forward slashes and
 70 |         # lower case names. We map the expected name in lowercase to the
 71 |         # actual filename in the zip container.
 72 |         component_names = {name.replace('\\', '/').lower(): name
 73 |                            for name in zf.namelist()}
 74 | 
 75 |         if 'xl/workbook.xml' in component_names:
 76 |             return 'xlsx'
 77 |         if 'xl/workbook.bin' in component_names:
 78 |             return 'xlsb'
 79 |         if 'content.xml' in component_names:
 80 |             return 'ods'
 81 |         return 'zip'
 82 | 
 83 | 
 84 | def open_workbook(filename=None,
 85 |                   logfile=sys.stdout,
 86 |                   verbosity=0,
 87 |                   use_mmap=True,
 88 |                   file_contents=None,
 89 |                   encoding_override=None,
 90 |                   formatting_info=False,
 91 |                   on_demand=False,
 92 |                   ragged_rows=False,
 93 |                   ignore_workbook_corruption=False
 94 |                   ):
 95 |     """
 96 |     Open a spreadsheet file for data extraction.
 97 | 
 98 |     :param filename: The path to the spreadsheet file to be opened.
 99 | 
100 |     :param logfile: An open file to which messages and diagnostics are written.
101 | 
102 |     :param verbosity: Increases the volume of trace material written to the
103 |                       logfile.
104 | 
105 |     :param use_mmap:
106 | 
107 |       Whether to use the mmap module is determined heuristically.
108 |       Use this arg to override the result.
109 | 
110 |       Current heuristic: mmap is used if it exists.
111 | 
112 |     :param file_contents:
113 | 
114 |       A string or an :class:`mmap.mmap` object or some other behave-alike
115 |       object. If ``file_contents`` is supplied, ``filename`` will not be used,
116 |       except (possibly) in messages.
117 | 
118 |     :param encoding_override:
119 | 
120 |       Used to overcome missing or bad codepage information
121 |       in older-version files. See :doc:`unicode`.
122 | 
123 |     :param formatting_info:
124 | 
125 |       The default is ``False``, which saves memory.
126 |       In this case, "Blank" cells, which are those with their own formatting
127 |       information but no data, are treated as empty by ignoring the file's
128 |       ``BLANK`` and ``MULBLANK`` records.
129 |       This cuts off any bottom or right "margin" of rows of empty or blank
130 |       cells.
131 |       Only :meth:`~xlrd.sheet.Sheet.cell_value` and
132 |       :meth:`~xlrd.sheet.Sheet.cell_type` are available.
133 | 
134 |       When ``True``, formatting information will be read from the spreadsheet
135 |       file. This provides all cells, including empty and blank cells.
136 |       Formatting information is available for each cell.
137 | 
138 |       Note that this will raise a NotImplementedError when used with an
139 |       xlsx file.
140 | 
141 |     :param on_demand:
142 | 
143 |       Governs whether sheets are all loaded initially or when demanded
144 |       by the caller. See :doc:`on_demand`.
145 | 
146 |     :param ragged_rows:
147 | 
148 |       The default of ``False`` means all rows are padded out with empty cells so
149 |       that all rows have the same size as found in
150 |       :attr:`~xlrd.sheet.Sheet.ncols`.
151 | 
152 |       ``True`` means that there are no empty cells at the ends of rows.
153 |       This can result in substantial memory savings if rows are of widely
154 |       varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method.
155 | 
156 | 
157 |     :param ignore_workbook_corruption:
158 | 
159 |       This option allows to read corrupted workbooks.
160 |       When ``False`` you may face CompDocError: Workbook corruption.
161 |       When ``True`` that exception will be ignored.
162 | 
163 |     :returns: An instance of the :class:`~xlrd.book.Book` class.
164 |     """
165 | 
166 |     file_format = inspect_format(filename, file_contents)
167 |     # We have to let unknown file formats pass through here, as some ancient
168 |     # files that xlrd can parse don't start with the expected signature.
169 |     if file_format and file_format != 'xls':
170 |         raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')
171 | 
172 |     bk = open_workbook_xls(
173 |         filename=filename,
174 |         logfile=logfile,
175 |         verbosity=verbosity,
176 |         use_mmap=use_mmap,
177 |         file_contents=file_contents,
178 |         encoding_override=encoding_override,
179 |         formatting_info=formatting_info,
180 |         on_demand=on_demand,
181 |         ragged_rows=ragged_rows,
182 |         ignore_workbook_corruption=ignore_workbook_corruption,
183 |     )
184 | 
185 |     return bk
186 | 
187 | 
188 | def dump(filename, outfile=sys.stdout, unnumbered=False):
189 |     """
190 |     For debugging: dump an XLS file's BIFF records in char & hex.
191 | 
192 |     :param filename: The path to the file to be dumped.
193 |     :param outfile: An open file, to which the dump is written.
194 |     :param unnumbered: If true, omit offsets (for meaningful diffs).
195 |     """
196 |     from .biffh import biff_dump
197 |     bk = Book()
198 |     bk.biff2_8_load(filename=filename, logfile=outfile, )
199 |     biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered)
200 | 
201 | 
202 | def count_records(filename, outfile=sys.stdout):
203 |     """
204 |     For debugging and analysis: summarise the file's BIFF records.
205 |     ie: produce a sorted file of ``(record_name, count)``.
206 | 
207 |     :param filename: The path to the file to be summarised.
208 |     :param outfile: An open file, to which the summary is written.
209 |     """
210 |     from .biffh import biff_count_records
211 |     bk = Book()
212 |     bk.biff2_8_load(filename=filename, logfile=outfile, )
213 |     biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)
214 | 


--------------------------------------------------------------------------------
/xlrd/biffh.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd
  3 | # This module is part of the xlrd package, which is released under a
  4 | # BSD-style licence.
  5 | from __future__ import print_function
  6 | 
  7 | import sys
  8 | from struct import unpack
  9 | 
 10 | from .timemachine import *
 11 | 
 12 | DEBUG = 0
 13 | 
 14 | 
 15 | 
 16 | class XLRDError(Exception):
 17 |     """
 18 |     An exception indicating problems reading data from an Excel file.
 19 |     """
 20 | 
 21 | 
 22 | class BaseObject(object):
 23 |     """
 24 |     Parent of almost all other classes in the package. Defines a common
 25 |     :meth:`dump` method for debugging.
 26 |     """
 27 | 
 28 |     _repr_these = []
 29 | 
 30 | 
 31 |     def dump(self, f=None, header=None, footer=None, indent=0):
 32 |         """
 33 |         :param f: open file object, to which the dump is written
 34 |         :param header: text to write before the dump
 35 |         :param footer: text to write after the dump
 36 |         :param indent: number of leading spaces (for recursive calls)
 37 |         """
 38 |         if f is None:
 39 |             f = sys.stderr
 40 |         if hasattr(self, "__slots__"):
 41 |             alist = []
 42 |             for attr in self.__slots__:
 43 |                 alist.append((attr, getattr(self, attr)))
 44 |         else:
 45 |             alist = self.__dict__.items()
 46 |         alist = sorted(alist)
 47 |         pad = " " * indent
 48 |         if header is not None: print(header, file=f)
 49 |         list_type = type([])
 50 |         dict_type = type({})
 51 |         for attr, value in alist:
 52 |             if getattr(value, 'dump', None) and attr != 'book':
 53 |                 value.dump(f,
 54 |                     header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
 55 |                     indent=indent+4)
 56 |             elif (attr not in self._repr_these and
 57 |                   (isinstance(value, list_type) or isinstance(value, dict_type))):
 58 |                 print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
 59 |             else:
 60 |                 fprintf(f, "%s%s: %r\n", pad, attr, value)
 61 |         if footer is not None: print(footer, file=f)
 62 | 
 63 | FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
 64 | DATEFORMAT = FDT
 65 | NUMBERFORMAT = FNU
 66 | 
 67 | (
 68 |     XL_CELL_EMPTY,
 69 |     XL_CELL_TEXT,
 70 |     XL_CELL_NUMBER,
 71 |     XL_CELL_DATE,
 72 |     XL_CELL_BOOLEAN,
 73 |     XL_CELL_ERROR,
 74 |     XL_CELL_BLANK, # for use in debugging, gathering stats, etc
 75 | ) = range(7)
 76 | 
 77 | biff_text_from_num = {
 78 |     0:  "(not BIFF)",
 79 |     20: "2.0",
 80 |     21: "2.1",
 81 |     30: "3",
 82 |     40: "4S",
 83 |     45: "4W",
 84 |     50: "5",
 85 |     70: "7",
 86 |     80: "8",
 87 |     85: "8X",
 88 | }
 89 | 
 90 | #: This dictionary can be used to produce a text version of the internal codes
 91 | #: that Excel uses for error cells.
 92 | error_text_from_code = {
 93 |     0x00: '#NULL!',  # Intersection of two cell ranges is empty
 94 |     0x07: '#DIV/0!', # Division by zero
 95 |     0x0F: '#VALUE!', # Wrong type of operand
 96 |     0x17: '#REF!',   # Illegal or deleted cell reference
 97 |     0x1D: '#NAME?',  # Wrong function or range name
 98 |     0x24: '#NUM!',   # Value range overflow
 99 |     0x2A: '#N/A',    # Argument or function not available
100 | }
101 | 
102 | BIFF_FIRST_UNICODE = 80
103 | 
104 | XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
105 | XL_WORKBOOK_GLOBALS_4W = 0x100
106 | XL_WORKSHEET = WRKSHEET = 0x10
107 | 
108 | XL_BOUNDSHEET_WORKSHEET = 0x00
109 | XL_BOUNDSHEET_CHART     = 0x02
110 | XL_BOUNDSHEET_VB_MODULE = 0x06
111 | 
112 | # XL_RK2 = 0x7e
113 | XL_ARRAY  = 0x0221
114 | XL_ARRAY2 = 0x0021
115 | XL_BLANK = 0x0201
116 | XL_BLANK_B2 = 0x01
117 | XL_BOF = 0x809
118 | XL_BOOLERR = 0x205
119 | XL_BOOLERR_B2 = 0x5
120 | XL_BOUNDSHEET = 0x85
121 | XL_BUILTINFMTCOUNT = 0x56
122 | XL_CF = 0x01B1
123 | XL_CODEPAGE = 0x42
124 | XL_COLINFO = 0x7D
125 | XL_COLUMNDEFAULT = 0x20 # BIFF2 only
126 | XL_COLWIDTH = 0x24 # BIFF2 only
127 | XL_CONDFMT = 0x01B0
128 | XL_CONTINUE = 0x3c
129 | XL_COUNTRY = 0x8C
130 | XL_DATEMODE = 0x22
131 | XL_DEFAULTROWHEIGHT = 0x0225
132 | XL_DEFCOLWIDTH = 0x55
133 | XL_DIMENSION = 0x200
134 | XL_DIMENSION2 = 0x0
135 | XL_EFONT = 0x45
136 | XL_EOF = 0x0a
137 | XL_EXTERNNAME = 0x23
138 | XL_EXTERNSHEET = 0x17
139 | XL_EXTSST = 0xff
140 | XL_FEAT11 = 0x872
141 | XL_FILEPASS = 0x2f
142 | XL_FONT = 0x31
143 | XL_FONT_B3B4 = 0x231
144 | XL_FORMAT = 0x41e
145 | XL_FORMAT2 = 0x1E # BIFF2, BIFF3
146 | XL_FORMULA = 0x6
147 | XL_FORMULA3 = 0x206
148 | XL_FORMULA4 = 0x406
149 | XL_GCW = 0xab
150 | XL_HLINK = 0x01B8
151 | XL_QUICKTIP = 0x0800
152 | XL_HORIZONTALPAGEBREAKS = 0x1b
153 | XL_INDEX = 0x20b
154 | XL_INTEGER = 0x2 # BIFF2 only
155 | XL_IXFE = 0x44 # BIFF2 only
156 | XL_LABEL = 0x204
157 | XL_LABEL_B2 = 0x04
158 | XL_LABELRANGES = 0x15f
159 | XL_LABELSST = 0xfd
160 | XL_LEFTMARGIN = 0x26
161 | XL_TOPMARGIN = 0x28
162 | XL_RIGHTMARGIN = 0x27
163 | XL_BOTTOMMARGIN = 0x29
164 | XL_HEADER = 0x14
165 | XL_FOOTER = 0x15
166 | XL_HCENTER = 0x83
167 | XL_VCENTER = 0x84
168 | XL_MERGEDCELLS = 0xE5
169 | XL_MSO_DRAWING = 0x00EC
170 | XL_MSO_DRAWING_GROUP = 0x00EB
171 | XL_MSO_DRAWING_SELECTION = 0x00ED
172 | XL_MULRK = 0xbd
173 | XL_MULBLANK = 0xbe
174 | XL_NAME = 0x18
175 | XL_NOTE = 0x1c
176 | XL_NUMBER = 0x203
177 | XL_NUMBER_B2 = 0x3
178 | XL_OBJ = 0x5D
179 | XL_PAGESETUP = 0xA1
180 | XL_PALETTE = 0x92
181 | XL_PANE = 0x41
182 | XL_PRINTGRIDLINES = 0x2B
183 | XL_PRINTHEADERS = 0x2A
184 | XL_RK = 0x27e
185 | XL_ROW = 0x208
186 | XL_ROW_B2 = 0x08
187 | XL_RSTRING = 0xd6
188 | XL_SCL = 0x00A0
189 | XL_SHEETHDR = 0x8F # BIFF4W only
190 | XL_SHEETPR = 0x81
191 | XL_SHEETSOFFSET = 0x8E # BIFF4W only
192 | XL_SHRFMLA = 0x04bc
193 | XL_SST = 0xfc
194 | XL_STANDARDWIDTH = 0x99
195 | XL_STRING = 0x207
196 | XL_STRING_B2 = 0x7
197 | XL_STYLE = 0x293
198 | XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs
199 | XL_TABLEOP = 0x236
200 | XL_TABLEOP2 = 0x37
201 | XL_TABLEOP_B2 = 0x36
202 | XL_TXO = 0x1b6
203 | XL_UNCALCED = 0x5e
204 | XL_UNKNOWN = 0xffff
205 | XL_VERTICALPAGEBREAKS = 0x1a
206 | XL_WINDOW2    = 0x023E
207 | XL_WINDOW2_B2 = 0x003E
208 | XL_WRITEACCESS = 0x5C
209 | XL_WSBOOL = XL_SHEETPR
210 | XL_XF = 0xe0
211 | XL_XF2 = 0x0043 # BIFF2 version of XF record
212 | XL_XF3 = 0x0243 # BIFF3 version of XF record
213 | XL_XF4 = 0x0443 # BIFF4 version of XF record
214 | 
215 | boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
216 | bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
217 | 
218 | XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
219 | 
220 | _cell_opcode_list = [
221 |     XL_BOOLERR,
222 |     XL_FORMULA,
223 |     XL_FORMULA3,
224 |     XL_FORMULA4,
225 |     XL_LABEL,
226 |     XL_LABELSST,
227 |     XL_MULRK,
228 |     XL_NUMBER,
229 |     XL_RK,
230 |     XL_RSTRING,
231 | ]
232 | _cell_opcode_dict = {}
233 | for _cell_opcode in _cell_opcode_list:
234 |     _cell_opcode_dict[_cell_opcode] = 1
235 | 
236 | def is_cell_opcode(c):
237 |     return c in  _cell_opcode_dict
238 | 
239 | def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
240 |     for n, mask, attr in manifest:
241 |         local_setattr(tgt_obj, attr, (src & mask) >> n)
242 | 
243 | def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
244 |     for n, mask, attr in manifest:
245 |         local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
246 | 
247 | def unpack_string(data, pos, encoding, lenlen=1):
248 |     nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
249 |     pos += lenlen
250 |     return unicode(data[pos:pos+nchars], encoding)
251 | 
252 | def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
253 |     if known_len is not None:
254 |         # On a NAME record, the length byte is detached from the front of the string.
255 |         nchars = known_len
256 |     else:
257 |         nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
258 |         pos += lenlen
259 |     newpos = pos + nchars
260 |     return (unicode(data[pos:newpos], encoding), newpos)
261 | 
262 | def unpack_unicode(data, pos, lenlen=2):
263 |     "Return unicode_strg"
264 |     nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
265 |     if not nchars:
266 |         # Ambiguous whether 0-length string should have an "options" byte.
267 |         # Avoid crash if missing.
268 |         return UNICODE_LITERAL("")
269 |     pos += lenlen
270 |     options = BYTES_ORD(data[pos])
271 |     pos += 1
272 |     # phonetic = options & 0x04
273 |     # richtext = options & 0x08
274 |     if options & 0x08:
275 |         # rt = unpack('<H', data[pos:pos+2])[0] # unused
276 |         pos += 2
277 |     if options & 0x04:
278 |         # sz = unpack('<i', data[pos:pos+4])[0] # unused
279 |         pos += 4
280 |     if options & 0x01:
281 |         # Uncompressed UTF-16-LE
282 |         rawstrg = data[pos:pos+2*nchars]
283 |         # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
284 |         strg = unicode(rawstrg, 'utf_16_le')
285 |         # pos += 2*nchars
286 |     else:
287 |         # Note: this is COMPRESSED (not ASCII!) encoding!!!
288 |         # Merely returning the raw bytes would work OK 99.99% of the time
289 |         # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
290 |         # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
291 | 
292 |         strg = unicode(data[pos:pos+nchars], "latin_1")
293 |         # pos += nchars
294 |     # if richtext:
295 |     #     pos += 4 * rt
296 |     # if phonetic:
297 |     #     pos += sz
298 |     # return (strg, pos)
299 |     return strg
300 | 
301 | def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
302 |     "Return (unicode_strg, updated value of pos)"
303 |     if known_len is not None:
304 |         # On a NAME record, the length byte is detached from the front of the string.
305 |         nchars = known_len
306 |     else:
307 |         nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
308 |         pos += lenlen
309 |     if not nchars and not data[pos:]:
310 |         # Zero-length string with no options byte
311 |         return (UNICODE_LITERAL(""), pos)
312 |     options = BYTES_ORD(data[pos])
313 |     pos += 1
314 |     phonetic = options & 0x04
315 |     richtext = options & 0x08
316 |     if richtext:
317 |         rt = unpack('<H', data[pos:pos+2])[0]
318 |         pos += 2
319 |     if phonetic:
320 |         sz = unpack('<i', data[pos:pos+4])[0]
321 |         pos += 4
322 |     if options & 0x01:
323 |         # Uncompressed UTF-16-LE
324 |         strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
325 |         pos += 2*nchars
326 |     else:
327 |         # Note: this is COMPRESSED (not ASCII!) encoding!!!
328 |         strg = unicode(data[pos:pos+nchars], "latin_1")
329 |         pos += nchars
330 |     if richtext:
331 |         pos += 4 * rt
332 |     if phonetic:
333 |         pos += sz
334 |     return (strg, pos)
335 | 
336 | def unpack_cell_range_address_list_update_pos(output_list, data, pos, biff_version, addr_size=6):
337 |     # output_list is updated in situ
338 |     assert addr_size in (6, 8)
339 |     # Used to assert size == 6 if not BIFF8, but pyWLWriter writes
340 |     # BIFF8-only MERGEDCELLS records in a BIFF5 file!
341 |     n, = unpack("<H", data[pos:pos+2])
342 |     pos += 2
343 |     if n:
344 |         if addr_size == 6:
345 |             fmt = "<HHBB"
346 |         else:
347 |             fmt = "<HHHH"
348 |         for _unused in xrange(n):
349 |             ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
350 |             output_list.append((ra, rb+1, ca, cb+1))
351 |             pos += addr_size
352 |     return pos
353 | 
354 | _brecstrg = """\
355 | 0000 DIMENSIONS_B2
356 | 0001 BLANK_B2
357 | 0002 INTEGER_B2_ONLY
358 | 0003 NUMBER_B2
359 | 0004 LABEL_B2
360 | 0005 BOOLERR_B2
361 | 0006 FORMULA
362 | 0007 STRING_B2
363 | 0008 ROW_B2
364 | 0009 BOF_B2
365 | 000A EOF
366 | 000B INDEX_B2_ONLY
367 | 000C CALCCOUNT
368 | 000D CALCMODE
369 | 000E PRECISION
370 | 000F REFMODE
371 | 0010 DELTA
372 | 0011 ITERATION
373 | 0012 PROTECT
374 | 0013 PASSWORD
375 | 0014 HEADER
376 | 0015 FOOTER
377 | 0016 EXTERNCOUNT
378 | 0017 EXTERNSHEET
379 | 0018 NAME_B2,5+
380 | 0019 WINDOWPROTECT
381 | 001A VERTICALPAGEBREAKS
382 | 001B HORIZONTALPAGEBREAKS
383 | 001C NOTE
384 | 001D SELECTION
385 | 001E FORMAT_B2-3
386 | 001F BUILTINFMTCOUNT_B2
387 | 0020 COLUMNDEFAULT_B2_ONLY
388 | 0021 ARRAY_B2_ONLY
389 | 0022 DATEMODE
390 | 0023 EXTERNNAME
391 | 0024 COLWIDTH_B2_ONLY
392 | 0025 DEFAULTROWHEIGHT_B2_ONLY
393 | 0026 LEFTMARGIN
394 | 0027 RIGHTMARGIN
395 | 0028 TOPMARGIN
396 | 0029 BOTTOMMARGIN
397 | 002A PRINTHEADERS
398 | 002B PRINTGRIDLINES
399 | 002F FILEPASS
400 | 0031 FONT
401 | 0032 FONT2_B2_ONLY
402 | 0036 TABLEOP_B2
403 | 0037 TABLEOP2_B2
404 | 003C CONTINUE
405 | 003D WINDOW1
406 | 003E WINDOW2_B2
407 | 0040 BACKUP
408 | 0041 PANE
409 | 0042 CODEPAGE
410 | 0043 XF_B2
411 | 0044 IXFE_B2_ONLY
412 | 0045 EFONT_B2_ONLY
413 | 004D PLS
414 | 0051 DCONREF
415 | 0055 DEFCOLWIDTH
416 | 0056 BUILTINFMTCOUNT_B3-4
417 | 0059 XCT
418 | 005A CRN
419 | 005B FILESHARING
420 | 005C WRITEACCESS
421 | 005D OBJECT
422 | 005E UNCALCED
423 | 005F SAVERECALC
424 | 0063 OBJECTPROTECT
425 | 007D COLINFO
426 | 007E RK2_mythical_?
427 | 0080 GUTS
428 | 0081 WSBOOL
429 | 0082 GRIDSET
430 | 0083 HCENTER
431 | 0084 VCENTER
432 | 0085 BOUNDSHEET
433 | 0086 WRITEPROT
434 | 008C COUNTRY
435 | 008D HIDEOBJ
436 | 008E SHEETSOFFSET
437 | 008F SHEETHDR
438 | 0090 SORT
439 | 0092 PALETTE
440 | 0099 STANDARDWIDTH
441 | 009B FILTERMODE
442 | 009C FNGROUPCOUNT
443 | 009D AUTOFILTERINFO
444 | 009E AUTOFILTER
445 | 00A0 SCL
446 | 00A1 SETUP
447 | 00AB GCW
448 | 00BD MULRK
449 | 00BE MULBLANK
450 | 00C1 MMS
451 | 00D6 RSTRING
452 | 00D7 DBCELL
453 | 00DA BOOKBOOL
454 | 00DD SCENPROTECT
455 | 00E0 XF
456 | 00E1 INTERFACEHDR
457 | 00E2 INTERFACEEND
458 | 00E5 MERGEDCELLS
459 | 00E9 BITMAP
460 | 00EB MSO_DRAWING_GROUP
461 | 00EC MSO_DRAWING
462 | 00ED MSO_DRAWING_SELECTION
463 | 00EF PHONETIC
464 | 00FC SST
465 | 00FD LABELSST
466 | 00FF EXTSST
467 | 013D TABID
468 | 015F LABELRANGES
469 | 0160 USESELFS
470 | 0161 DSF
471 | 01AE SUPBOOK
472 | 01AF PROTECTIONREV4
473 | 01B0 CONDFMT
474 | 01B1 CF
475 | 01B2 DVAL
476 | 01B6 TXO
477 | 01B7 REFRESHALL
478 | 01B8 HLINK
479 | 01BC PASSWORDREV4
480 | 01BE DV
481 | 01C0 XL9FILE
482 | 01C1 RECALCID
483 | 0200 DIMENSIONS
484 | 0201 BLANK
485 | 0203 NUMBER
486 | 0204 LABEL
487 | 0205 BOOLERR
488 | 0206 FORMULA_B3
489 | 0207 STRING
490 | 0208 ROW
491 | 0209 BOF
492 | 020B INDEX_B3+
493 | 0218 NAME
494 | 0221 ARRAY
495 | 0223 EXTERNNAME_B3-4
496 | 0225 DEFAULTROWHEIGHT
497 | 0231 FONT_B3B4
498 | 0236 TABLEOP
499 | 023E WINDOW2
500 | 0243 XF_B3
501 | 027E RK
502 | 0293 STYLE
503 | 0406 FORMULA_B4
504 | 0409 BOF
505 | 041E FORMAT
506 | 0443 XF_B4
507 | 04BC SHRFMLA
508 | 0800 QUICKTIP
509 | 0809 BOF
510 | 0862 SHEETLAYOUT
511 | 0867 SHEETPROTECTION
512 | 0868 RANGEPROTECTION
513 | """
514 | 
515 | biff_rec_name_dict = {}
516 | for _buff in _brecstrg.splitlines():
517 |     _numh, _name = _buff.split()
518 |     biff_rec_name_dict[int(_numh, 16)] = _name
519 | del _buff, _name, _brecstrg
520 | 
521 | def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
522 |     endpos = min(ofs + dlen, len(strg))
523 |     pos = ofs
524 |     numbered = not unnumbered
525 |     num_prefix = ''
526 |     while pos < endpos:
527 |         endsub = min(pos + 16, endpos)
528 |         substrg = strg[pos:endsub]
529 |         lensub = endsub - pos
530 |         if lensub <= 0 or lensub != len(substrg):
531 |             fprintf(
532 |                 sys.stdout,
533 |                 '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
534 |                 ofs, dlen, base, endpos, pos, endsub, substrg)
535 |             break
536 |         hexd = ''.join("%02x " % BYTES_ORD(c) for c in substrg)
537 | 
538 |         chard = ''
539 |         for c in substrg:
540 |             c = chr(BYTES_ORD(c))
541 |             if c == '\0':
542 |                 c = '~'
543 |             elif not (' ' <= c <= '~'):
544 |                 c = '?'
545 |             chard += c
546 |         if numbered:
547 |             num_prefix = "%5d: " %  (base+pos-ofs)
548 | 
549 |         fprintf(fout, "%s     %-48s %s\n", num_prefix, hexd, chard)
550 |         pos = endsub
551 | 
552 | def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
553 |     pos = stream_offset
554 |     stream_end = stream_offset + stream_len
555 |     adj = base - stream_offset
556 |     dummies = 0
557 |     numbered = not unnumbered
558 |     num_prefix = ''
559 |     while stream_end - pos >= 4:
560 |         rc, length = unpack('<HH', mem[pos:pos+4])
561 |         if rc == 0 and length == 0:
562 |             if mem[pos:] == b'\0' * (stream_end - pos):
563 |                 dummies = stream_end - pos
564 |                 savpos = pos
565 |                 pos = stream_end
566 |                 break
567 |             if dummies:
568 |                 dummies += 4
569 |             else:
570 |                 savpos = pos
571 |                 dummies = 4
572 |             pos += 4
573 |         else:
574 |             if dummies:
575 |                 if numbered:
576 |                     num_prefix =  "%5d: " % (adj + savpos)
577 |                 fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
578 |                 dummies = 0
579 |             recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
580 |             if numbered:
581 |                 num_prefix = "%5d: " % (adj + pos)
582 |             fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
583 |             pos += 4
584 |             hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
585 |             pos += length
586 |     if dummies:
587 |         if numbered:
588 |             num_prefix =  "%5d: " % (adj + savpos)
589 |         fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
590 |     if pos < stream_end:
591 |         if numbered:
592 |             num_prefix = "%5d: " % (adj + pos)
593 |         fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
594 |         hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
595 |     elif pos > stream_end:
596 |         fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
597 | 
598 | def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
599 |     pos = stream_offset
600 |     stream_end = stream_offset + stream_len
601 |     tally = {}
602 |     while stream_end - pos >= 4:
603 |         rc, length = unpack('<HH', mem[pos:pos+4])
604 |         if rc == 0 and length == 0:
605 |             if mem[pos:] == b'\0' * (stream_end - pos):
606 |                 break
607 |             recname = "<Dummy (zero)>"
608 |         else:
609 |             recname = biff_rec_name_dict.get(rc, None)
610 |             if recname is None:
611 |                 recname = "Unknown_0x%04X" % rc
612 |         if recname in tally:
613 |             tally[recname] += 1
614 |         else:
615 |             tally[recname] = 1
616 |         pos += length + 4
617 |     slist = sorted(tally.items())
618 |     for recname, count in slist:
619 |         print("%8d %s" % (count, recname), file=fout)
620 | 
621 | encoding_from_codepage = {
622 |     1200 : 'utf_16_le',
623 |     10000: 'mac_roman',
624 |     10006: 'mac_greek', # guess
625 |     10007: 'mac_cyrillic', # guess
626 |     10029: 'mac_latin2', # guess
627 |     10079: 'mac_iceland', # guess
628 |     10081: 'mac_turkish', # guess
629 |     32768: 'mac_roman',
630 |     32769: 'cp1252',
631 | }
632 | # some more guessing, for Indic scripts
633 | # codepage 57000 range:
634 | # 2 Devanagari [0]
635 | # 3 Bengali [1]
636 | # 4 Tamil [5]
637 | # 5 Telegu [6]
638 | # 6 Assamese [1] c.f. Bengali
639 | # 7 Oriya [4]
640 | # 8 Kannada [7]
641 | # 9 Malayalam [8]
642 | # 10 Gujarati [3]
643 | # 11 Gurmukhi [2]
644 | 


--------------------------------------------------------------------------------
/xlrd/compdoc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
  3 | # This module is part of the xlrd package, which is released under a
  4 | # BSD-style licence.
  5 | # No part of the content of this file was derived from the works of
  6 | # David Giffin.
  7 | """
  8 | Implements the minimal functionality required
  9 | to extract a "Workbook" or "Book" stream (as one big string)
 10 | from an OLE2 Compound Document file.
 11 | """
 12 | from __future__ import print_function
 13 | 
 14 | import array
 15 | import sys
 16 | from struct import unpack
 17 | 
 18 | from .timemachine import *
 19 | 
 20 | #: Magic cookie that should appear in the first 8 bytes of the file.
 21 | SIGNATURE = b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
 22 | 
 23 | EOCSID = -2
 24 | FREESID = -1
 25 | SATSID = -3
 26 | MSATSID = -4
 27 | EVILSID = -5
 28 | 
 29 | class CompDocError(Exception):
 30 |     pass
 31 | 
 32 | class DirNode(object):
 33 | 
 34 |     def __init__(self, DID, dent, DEBUG=0, logfile=sys.stdout):
 35 |         # dent is the 128-byte directory entry
 36 |         self.DID = DID
 37 |         self.logfile = logfile
 38 |         (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
 39 |         self.root_DID) = \
 40 |             unpack('<HBBiii', dent[64:80])
 41 |         (self.first_SID, self.tot_size) = \
 42 |             unpack('<ii', dent[116:124])
 43 |         if cbufsize == 0:
 44 |             self.name = UNICODE_LITERAL('')
 45 |         else:
 46 |             self.name = unicode(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
 47 |         self.children = [] # filled in later
 48 |         self.parent = -1 # indicates orphan; fixed up later
 49 |         self.tsinfo = unpack('<IIII', dent[100:116])
 50 |         if DEBUG:
 51 |             self.dump(DEBUG)
 52 | 
 53 |     def dump(self, DEBUG=1):
 54 |         fprintf(
 55 |             self.logfile,
 56 |             "DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d\n",
 57 |             self.DID, self.name, self.etype, self.left_DID,
 58 |             self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size
 59 |         )
 60 |         if DEBUG == 2:
 61 |             # cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
 62 |             print("timestamp info", self.tsinfo, file=self.logfile)
 63 | 
 64 | def _build_family_tree(dirlist, parent_DID, child_DID):
 65 |     if child_DID < 0: return
 66 |     _build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
 67 |     dirlist[parent_DID].children.append(child_DID)
 68 |     dirlist[child_DID].parent = parent_DID
 69 |     _build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
 70 |     if dirlist[child_DID].etype == 1: # storage
 71 |         _build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
 72 | 
 73 | 
 74 | class CompDoc(object):
 75 |     """
 76 |     Compound document handler.
 77 | 
 78 |     :param mem:
 79 |       The raw contents of the file, as a string, or as an :class:`mmap.mmap`
 80 |       object. The only operation it needs to support is slicing.
 81 |     """
 82 | 
 83 | 
 84 |     def __init__(self, mem, logfile=sys.stdout, DEBUG=0, ignore_workbook_corruption=False):
 85 |         self.logfile = logfile
 86 |         self.ignore_workbook_corruption = ignore_workbook_corruption
 87 |         self.DEBUG = DEBUG
 88 |         if mem[0:8] != SIGNATURE:
 89 |             raise CompDocError('Not an OLE2 compound document')
 90 |         if mem[28:30] != b'\xFE\xFF':
 91 |             raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
 92 |         revision, version = unpack('<HH', mem[24:28])
 93 |         if DEBUG:
 94 |             print("\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision), file=logfile)
 95 |         self.mem = mem
 96 |         ssz, sssz = unpack('<HH', mem[30:34])
 97 |         if ssz > 20: # allows for 2**20 bytes i.e. 1MB
 98 |             print("WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..."
 99 |                 % ssz, file=logfile)
100 |             ssz = 9
101 |         if sssz > ssz:
102 |             print("WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..."
103 |                 % sssz, file=logfile)
104 |             sssz = 6
105 |         self.sec_size = sec_size = 1 << ssz
106 |         self.short_sec_size = 1 << sssz
107 |         if self.sec_size != 512 or self.short_sec_size != 64:
108 |             print("@@@@ sec_size=%d short_sec_size=%d" % (self.sec_size, self.short_sec_size), file=logfile)
109 |         (
110 |             SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
111 |             SSAT_first_sec_sid, SSAT_tot_secs,
112 |             MSATX_first_sec_sid, MSATX_tot_secs,
113 |         ) = unpack('<iiiiiiii', mem[44:76])
114 |         mem_data_len = len(mem) - 512
115 |         mem_data_secs, left_over = divmod(mem_data_len, sec_size)
116 |         if left_over:
117 |             #### raise CompDocError("Not a whole number of sectors")
118 |             mem_data_secs += 1
119 |             print("WARNING *** file size (%d) not 512 + multiple of sector size (%d)"
120 |                 % (len(mem), sec_size), file=logfile)
121 |         self.mem_data_secs = mem_data_secs # use for checking later
122 |         self.mem_data_len = mem_data_len
123 |         seen = self.seen = array.array('B', [0]) * mem_data_secs
124 | 
125 |         if DEBUG:
126 |             print('sec sizes', ssz, sssz, sec_size, self.short_sec_size, file=logfile)
127 |             print("mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs), file=logfile)
128 |             print("SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d"
129 |                 % (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,), file=logfile)
130 |             print("SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,), file=logfile)
131 |             print("MSATX_first_sec_sid=%d, MSATX_tot_secs=%d" % (MSATX_first_sec_sid, MSATX_tot_secs,), file=logfile)
132 |         nent = sec_size // 4 # number of SID entries in a sector
133 |         fmt = "<%di" % nent
134 |         trunc_warned = 0
135 |         #
136 |         # === build the MSAT ===
137 |         #
138 |         MSAT = list(unpack('<109i', mem[76:512]))
139 |         SAT_sectors_reqd = (mem_data_secs + nent - 1) // nent
140 |         expected_MSATX_sectors = max(0, (SAT_sectors_reqd - 109 + nent - 2) // (nent - 1))
141 |         actual_MSATX_sectors = 0
142 |         if MSATX_tot_secs == 0 and MSATX_first_sec_sid in (EOCSID, FREESID, 0):
143 |             # Strictly, if there is no MSAT extension, then MSATX_first_sec_sid
144 |             # should be set to EOCSID ... FREESID and 0 have been met in the wild.
145 |             pass # Presuming no extension
146 |         else:
147 |             sid = MSATX_first_sec_sid
148 |             while sid not in (EOCSID, FREESID, MSATSID):
149 |                 # Above should be only EOCSID according to MS & OOo docs
150 |                 # but Excel doesn't complain about FREESID. Zero is a valid
151 |                 # sector number, not a sentinel.
152 |                 if DEBUG > 1:
153 |                     print('MSATX: sid=%d (0x%08X)' % (sid, sid), file=logfile)
154 |                 if sid >= mem_data_secs:
155 |                     msg = "MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
156 |                     if DEBUG > 1:
157 |                         print(msg, file=logfile)
158 |                         break
159 |                     raise CompDocError(msg)
160 |                 elif sid < 0:
161 |                     raise CompDocError("MSAT extension: invalid sector id: %d" % sid)
162 |                 if seen[sid]:
163 |                     raise CompDocError("MSAT corruption: seen[%d] == %d" % (sid, seen[sid]))
164 |                 seen[sid] = 1
165 |                 actual_MSATX_sectors += 1
166 |                 if DEBUG and actual_MSATX_sectors > expected_MSATX_sectors:
167 |                     print("[1]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, file=logfile)
168 |                 offset = 512 + sec_size * sid
169 |                 MSAT.extend(unpack(fmt, mem[offset:offset+sec_size]))
170 |                 sid = MSAT.pop() # last sector id is sid of next sector in the chain
171 | 
172 |         if DEBUG and actual_MSATX_sectors != expected_MSATX_sectors:
173 |             print("[2]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, file=logfile)
174 |         if DEBUG:
175 |             print("MSAT: len =", len(MSAT), file=logfile)
176 |             dump_list(MSAT, 10, logfile)
177 |         #
178 |         # === build the SAT ===
179 |         #
180 |         self.SAT = []
181 |         actual_SAT_sectors = 0
182 |         dump_again = 0
183 |         for msidx in xrange(len(MSAT)):
184 |             msid = MSAT[msidx]
185 |             if msid in (FREESID, EOCSID):
186 |                 # Specification: the MSAT array may be padded with trailing FREESID entries.
187 |                 # Toleration: a FREESID or EOCSID entry anywhere in the MSAT array will be ignored.
188 |                 continue
189 |             if msid >= mem_data_secs:
190 |                 if not trunc_warned:
191 |                     print("WARNING *** File is truncated, or OLE2 MSAT is corrupt!!", file=logfile)
192 |                     print("INFO: Trying to access sector %d but only %d available"
193 |                         % (msid, mem_data_secs), file=logfile)
194 |                     trunc_warned = 1
195 |                 MSAT[msidx] = EVILSID
196 |                 dump_again = 1
197 |                 continue
198 |             elif msid < -2:
199 |                 raise CompDocError("MSAT: invalid sector id: %d" % msid)
200 |             if seen[msid]:
201 |                 raise CompDocError("MSAT extension corruption: seen[%d] == %d" % (msid, seen[msid]))
202 |             seen[msid] = 2
203 |             actual_SAT_sectors += 1
204 |             if DEBUG and actual_SAT_sectors > SAT_sectors_reqd:
205 |                 print("[3]===>>>", mem_data_secs, nent, SAT_sectors_reqd, expected_MSATX_sectors, actual_MSATX_sectors, actual_SAT_sectors, msid, file=logfile)
206 |             offset = 512 + sec_size * msid
207 |             self.SAT.extend(unpack(fmt, mem[offset:offset+sec_size]))
208 | 
209 |         if DEBUG:
210 |             print("SAT: len =", len(self.SAT), file=logfile)
211 |             dump_list(self.SAT, 10, logfile)
212 |             # print >> logfile, "SAT ",
213 |             # for i, s in enumerate(self.SAT):
214 |             #     print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
215 |             #     print >> logfile, "%d:%d " % (i, s),
216 |             print(file=logfile)
217 |         if DEBUG and dump_again:
218 |             print("MSAT: len =", len(MSAT), file=logfile)
219 |             dump_list(MSAT, 10, logfile)
220 |             for satx in xrange(mem_data_secs, len(self.SAT)):
221 |                 self.SAT[satx] = EVILSID
222 |             print("SAT: len =", len(self.SAT), file=logfile)
223 |             dump_list(self.SAT, 10, logfile)
224 |         #
225 |         # === build the directory ===
226 |         #
227 |         dbytes = self._get_stream(
228 |             self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
229 |             name="directory", seen_id=3)
230 |         dirlist = []
231 |         did = -1
232 |         for pos in xrange(0, len(dbytes), 128):
233 |             did += 1
234 |             dirlist.append(DirNode(did, dbytes[pos:pos+128], 0, logfile))
235 |         self.dirlist = dirlist
236 |         _build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
237 |         if DEBUG:
238 |             for d in dirlist:
239 |                 d.dump(DEBUG)
240 |         #
241 |         # === get the SSCS ===
242 |         #
243 |         sscs_dir = self.dirlist[0]
244 |         assert sscs_dir.etype == 5 # root entry
245 |         if sscs_dir.first_SID < 0 or sscs_dir.tot_size == 0:
246 |             # Problem reported by Frank Hoffsuemmer: some software was
247 |             # writing -1 instead of -2 (EOCSID) for the first_SID
248 |             # when the SCCS was empty. Not having EOCSID caused assertion
249 |             # failure in _get_stream.
250 |             # Solution: avoid calling _get_stream in any case when the
251 |             # SCSS appears to be empty.
252 |             self.SSCS = ""
253 |         else:
254 |             self.SSCS = self._get_stream(
255 |                 self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
256 |                 sscs_dir.tot_size, name="SSCS", seen_id=4)
257 |         # if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
258 |         #
259 |         # === build the SSAT ===
260 |         #
261 |         self.SSAT = []
262 |         if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
263 |             print("WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero", file=logfile)
264 |         if sscs_dir.tot_size > 0:
265 |             sid = SSAT_first_sec_sid
266 |             nsecs = SSAT_tot_secs
267 |             while sid >= 0 and nsecs > 0:
268 |                 if seen[sid]:
269 |                     raise CompDocError("SSAT corruption: seen[%d] == %d" % (sid, seen[sid]))
270 |                 seen[sid] = 5
271 |                 nsecs -= 1
272 |                 start_pos = 512 + sid * sec_size
273 |                 news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
274 |                 self.SSAT.extend(news)
275 |                 sid = self.SAT[sid]
276 |             if DEBUG: print("SSAT last sid %d; remaining sectors %d" % (sid, nsecs), file=logfile)
277 |             assert nsecs == 0 and sid == EOCSID
278 |         if DEBUG:
279 |             print("SSAT", file=logfile)
280 |             dump_list(self.SSAT, 10, logfile)
281 |         if DEBUG:
282 |             print("seen", file=logfile)
283 |             dump_list(seen, 20, logfile)
284 | 
285 |     def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name='', seen_id=None):
286 |         # print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
287 |         sectors = []
288 |         s = start_sid
289 |         if size is None:
290 |             # nothing to check against
291 |             while s >= 0:
292 |                 if seen_id is not None:
293 |                     if self.seen[s]:
294 |                         raise CompDocError("%s corruption: seen[%d] == %d" % (name, s, self.seen[s]))
295 |                     self.seen[s] = seen_id
296 |                 start_pos = base + s * sec_size
297 |                 sectors.append(mem[start_pos:start_pos+sec_size])
298 |                 try:
299 |                     s = sat[s]
300 |                 except IndexError:
301 |                     raise CompDocError(
302 |                         "OLE2 stream %r: sector allocation table invalid entry (%d)" %
303 |                         (name, s)
304 |                     )
305 |             assert s == EOCSID
306 |         else:
307 |             todo = size
308 |             while s >= 0:
309 |                 if seen_id is not None:
310 |                     if self.seen[s]:
311 |                         raise CompDocError("%s corruption: seen[%d] == %d" % (name, s, self.seen[s]))
312 |                     self.seen[s] = seen_id
313 |                 start_pos = base + s * sec_size
314 |                 grab = sec_size
315 |                 if grab > todo:
316 |                     grab = todo
317 |                 todo -= grab
318 |                 sectors.append(mem[start_pos:start_pos+grab])
319 |                 try:
320 |                     s = sat[s]
321 |                 except IndexError:
322 |                     raise CompDocError(
323 |                         "OLE2 stream %r: sector allocation table invalid entry (%d)" %
324 |                         (name, s)
325 |                     )
326 |             assert s == EOCSID
327 |             if todo != 0:
328 |                 fprintf(self.logfile,
329 |                     "WARNING *** OLE2 stream %r: expected size %d, actual size %d\n",
330 |                     name, size, size - todo)
331 | 
332 |         return b''.join(sectors)
333 | 
334 |     def _dir_search(self, path, storage_DID=0):
335 |         # Return matching DirNode instance, or None
336 |         head = path[0]
337 |         tail = path[1:]
338 |         dl = self.dirlist
339 |         for child in dl[storage_DID].children:
340 |             if dl[child].name.lower() == head.lower():
341 |                 et = dl[child].etype
342 |                 if et == 2:
343 |                     return dl[child]
344 |                 if et == 1:
345 |                     if not tail:
346 |                         raise CompDocError("Requested component is a 'storage'")
347 |                     return self._dir_search(tail, child)
348 |                 dl[child].dump(1)
349 |                 raise CompDocError("Requested stream is not a 'user stream'")
350 |         return None
351 | 
352 | 
353 |     def get_named_stream(self, qname):
354 |         """
355 |         Interrogate the compound document's directory; return the stream as a
356 |         string if found, otherwise return ``None``.
357 | 
358 |         :param qname:
359 |           Name of the desired stream e.g. ``'Workbook'``.
360 |           Should be in Unicode or convertible thereto.
361 |         """
362 |         d = self._dir_search(qname.split("/"))
363 |         if d is None:
364 |             return None
365 |         if d.tot_size >= self.min_size_std_stream:
366 |             return self._get_stream(
367 |                 self.mem, 512, self.SAT, self.sec_size, d.first_SID,
368 |                 d.tot_size, name=qname, seen_id=d.DID+6)
369 |         else:
370 |             return self._get_stream(
371 |                 self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
372 |                 d.tot_size, name=qname + " (from SSCS)", seen_id=None)
373 | 
374 |     def locate_named_stream(self, qname):
375 |         """
376 |         Interrogate the compound document's directory.
377 | 
378 |         If the named stream is not found, ``(None, 0, 0)`` will be returned.
379 | 
380 |         If the named stream is found and is contiguous within the original
381 |         byte sequence (``mem``) used when the document was opened,
382 |         then ``(mem, offset_to_start_of_stream, length_of_stream)`` is returned.
383 | 
384 |         Otherwise a new string is built from the fragments and
385 |         ``(new_string, 0, length_of_stream)`` is returned.
386 | 
387 |         :param qname:
388 |           Name of the desired stream e.g. ``'Workbook'``.
389 |           Should be in Unicode or convertible thereto.
390 |         """
391 |         d = self._dir_search(qname.split("/"))
392 |         if d is None:
393 |             return (None, 0, 0)
394 |         if d.tot_size > self.mem_data_len:
395 |             raise CompDocError("%r stream length (%d bytes) > file data size (%d bytes)"
396 |                 % (qname, d.tot_size, self.mem_data_len))
397 |         if d.tot_size >= self.min_size_std_stream:
398 |             result = self._locate_stream(
399 |                 self.mem, 512, self.SAT, self.sec_size, d.first_SID,
400 |                 d.tot_size, qname, d.DID+6)
401 |             if self.DEBUG:
402 |                 print("\nseen", file=self.logfile)
403 |                 dump_list(self.seen, 20, self.logfile)
404 |             return result
405 |         else:
406 |             return (
407 |                 self._get_stream(
408 |                     self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
409 |                     d.tot_size, qname + " (from SSCS)", None),
410 |                 0,
411 |                 d.tot_size,
412 |             )
413 | 
414 |     def _locate_stream(self, mem, base, sat, sec_size, start_sid, expected_stream_size, qname, seen_id):
415 |         # print >> self.logfile, "_locate_stream", base, sec_size, start_sid, expected_stream_size
416 |         s = start_sid
417 |         if s < 0:
418 |             raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
419 |         p = -99 # dummy previous SID
420 |         start_pos = -9999
421 |         end_pos = -8888
422 |         slices = []
423 |         tot_found = 0
424 |         found_limit = (expected_stream_size + sec_size - 1) // sec_size
425 |         while s >= 0:
426 |             if self.seen[s]:
427 |                 if not self.ignore_workbook_corruption:
428 |                     print("_locate_stream(%s): seen" % qname, file=self.logfile); dump_list(self.seen, 20, self.logfile)
429 |                     raise CompDocError("%s corruption: seen[%d] == %d" % (qname, s, self.seen[s]))
430 |             self.seen[s] = seen_id
431 |             tot_found += 1
432 |             if tot_found > found_limit:
433 |                 # Note: expected size rounded up to higher sector
434 |                 raise CompDocError(
435 |                     "%s: size exceeds expected %d bytes; corrupt?"
436 |                     % (qname, found_limit * sec_size)
437 |                 )
438 |             if s == p+1:
439 |                 # contiguous sectors
440 |                 end_pos += sec_size
441 |             else:
442 |                 # start new slice
443 |                 if p >= 0:
444 |                     # not first time
445 |                     slices.append((start_pos, end_pos))
446 |                 start_pos = base + s * sec_size
447 |                 end_pos = start_pos + sec_size
448 |             p = s
449 |             s = sat[s]
450 |         assert s == EOCSID
451 |         assert tot_found == found_limit
452 |         # print >> self.logfile, "_locate_stream(%s): seen" % qname; dump_list(self.seen, 20, self.logfile)
453 |         if not slices:
454 |             # The stream is contiguous ... just what we like!
455 |             return (mem, start_pos, expected_stream_size)
456 |         slices.append((start_pos, end_pos))
457 |         # print >> self.logfile, "+++>>> %d fragments" % len(slices)
458 |         return (b''.join(mem[start_pos:end_pos] for start_pos, end_pos in slices), 0, expected_stream_size)
459 | 
460 | # ==========================================================================================
461 | def x_dump_line(alist, stride, f, dpos, equal=0):
462 |     print("%5d%s" % (dpos, " ="[equal]), end=' ', file=f)
463 |     for value in alist[dpos:dpos + stride]:
464 |         print(str(value), end=' ', file=f)
465 |     print(file=f)
466 | 
467 | def dump_list(alist, stride, f=sys.stdout):
468 |     def _dump_line(dpos, equal=0):
469 |         print("%5d%s" % (dpos, " ="[equal]), end=' ', file=f)
470 |         for value in alist[dpos:dpos + stride]:
471 |             print(str(value), end=' ', file=f)
472 |         print(file=f)
473 |     pos = None
474 |     oldpos = None
475 |     for pos in xrange(0, len(alist), stride):
476 |         if oldpos is None:
477 |             _dump_line(pos)
478 |             oldpos = pos
479 |         elif alist[pos:pos+stride] != alist[oldpos:oldpos+stride]:
480 |             if pos - oldpos > stride:
481 |                 _dump_line(pos - stride, equal=1)
482 |             _dump_line(pos)
483 |             oldpos = pos
484 |     if oldpos is not None and pos is not None and pos != oldpos:
485 |         _dump_line(pos, equal=1)
486 | 


--------------------------------------------------------------------------------
/xlrd/info.py:
--------------------------------------------------------------------------------
1 | __version__ = __VERSION__ = "2.0.1"
2 | 


--------------------------------------------------------------------------------
/xlrd/timemachine.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | # <p>Copyright (c) 2006-2012 Stephen John Machin, Lingfo Pty Ltd</p>
 3 | # <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
 4 | ##
 5 | 
 6 | # timemachine.py -- adaptation for single codebase.
 7 | # Currently supported: 2.6 to 2.7, 3.2+
 8 | # usage: from timemachine import *
 9 | 
10 | from __future__ import print_function
11 | 
12 | import sys
13 | 
14 | python_version = sys.version_info[:2] # e.g. version 2.6 -> (2, 6)
15 | 
16 | if python_version >= (3, 0):
17 |     # Python 3
18 |     BYTES_LITERAL = lambda x: x.encode('latin1')
19 |     UNICODE_LITERAL = lambda x: x
20 |     BYTES_ORD = lambda byte: byte
21 |     from io import BytesIO as BYTES_IO
22 |     def fprintf(f, fmt, *vargs):
23 |         fmt = fmt.replace("%r", "%a")
24 |         if fmt.endswith('\n'):
25 |             print(fmt[:-1] % vargs, file=f)
26 |         else:
27 |             print(fmt % vargs, end=' ', file=f)
28 |     EXCEL_TEXT_TYPES = (str, bytes, bytearray) # xlwt: isinstance(obj, EXCEL_TEXT_TYPES)
29 |     REPR = ascii
30 |     xrange = range
31 |     unicode = lambda b, enc: b.decode(enc)
32 |     ensure_unicode = lambda s: s
33 |     unichr = chr
34 | else:
35 |     # Python 2
36 |     BYTES_LITERAL = lambda x: x
37 |     UNICODE_LITERAL = lambda x: x.decode('latin1')
38 |     BYTES_ORD = ord
39 |     from cStringIO import StringIO as BYTES_IO
40 |     def fprintf(f, fmt, *vargs):
41 |         if fmt.endswith('\n'):
42 |             print(fmt[:-1] % vargs, file=f)
43 |         else:
44 |             print(fmt % vargs, end=' ', file=f)
45 |     try:
46 |         EXCEL_TEXT_TYPES = basestring # xlwt: isinstance(obj, EXCEL_TEXT_TYPES)
47 |     except NameError:
48 |         EXCEL_TEXT_TYPES = (str, unicode)
49 |     REPR = repr
50 |     xrange = xrange
51 |     # following used only to overcome 2.x ElementTree gimmick which
52 |     # returns text as `str` if it's ascii, otherwise `unicode`
53 |     ensure_unicode = unicode # used only in xlsx.py
54 | 


--------------------------------------------------------------------------------
/xlrd/xldate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) 2005-2008 Stephen John Machin, Lingfo Pty Ltd
  3 | # This module is part of the xlrd package, which is released under a
  4 | # BSD-style licence.
  5 | # No part of the content of this file was derived from the works of David Giffin.
  6 | """
  7 | Tools for working with dates and times in Excel files.
  8 | 
  9 | The conversion from ``days`` to ``(year, month, day)`` starts with
 10 | an integral "julian day number" aka JDN.
 11 | FWIW:
 12 | 
 13 | - JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
 14 | 
 15 | More importantly:
 16 | 
 17 | - Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
 18 | - Noon on Gregorian 1904-01-02 (day  1 in the 1904-based system) is JDN 2416482.0
 19 | 
 20 | """
 21 | import datetime
 22 | 
 23 | _JDN_delta = (2415080 - 61, 2416482 - 1)
 24 | assert _JDN_delta[1] - _JDN_delta[0] == 1462
 25 | 
 26 | # Pre-calculate the datetime epochs for efficiency.
 27 | epoch_1904 = datetime.datetime(1904, 1, 1)
 28 | epoch_1900 = datetime.datetime(1899, 12, 31)
 29 | epoch_1900_minus_1 = datetime.datetime(1899, 12, 30)
 30 | 
 31 | # This is equivalent to 10000-01-01:
 32 | _XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462)
 33 | 
 34 | 
 35 | class XLDateError(ValueError):
 36 |     "A base class for all datetime-related errors."
 37 | 
 38 | 
 39 | class XLDateNegative(XLDateError):
 40 |     "``xldate < 0.00``"
 41 | 
 42 | 
 43 | class XLDateAmbiguous(XLDateError):
 44 |     "The 1900 leap-year problem ``(datemode == 0 and 1.0 <= xldate < 61.0)``"
 45 | 
 46 | 
 47 | class XLDateTooLarge(XLDateError):
 48 |     "Gregorian year 10000 or later"
 49 | 
 50 | 
 51 | class XLDateBadDatemode(XLDateError):
 52 |     "``datemode`` arg is neither 0 nor 1"
 53 | 
 54 | 
 55 | class XLDateBadTuple(XLDateError):
 56 |     pass
 57 | 
 58 | 
 59 | def xldate_as_tuple(xldate, datemode):
 60 |     """
 61 |     Convert an Excel number (presumed to represent a date, a datetime or a time) into
 62 |     a tuple suitable for feeding to datetime or mx.DateTime constructors.
 63 | 
 64 |     :param xldate: The Excel number
 65 |     :param datemode: 0: 1900-based, 1: 1904-based.
 66 |     :raises xlrd.xldate.XLDateNegative:
 67 |     :raises xlrd.xldate.XLDateAmbiguous:
 68 | 
 69 |     :raises xlrd.xldate.XLDateTooLarge:
 70 |     :raises xlrd.xldate.XLDateBadDatemode:
 71 |     :raises xlrd.xldate.XLDateError:
 72 |     :returns: Gregorian ``(year, month, day, hour, minute, nearest_second)``.
 73 | 
 74 |     .. warning::
 75 | 
 76 |       When using this function to interpret the contents of a workbook, you
 77 |       should pass in the :attr:`~xlrd.book.Book.datemode`
 78 |       attribute of that workbook. Whether the workbook has ever been anywhere
 79 |       near a Macintosh is irrelevant.
 80 | 
 81 |     .. admonition:: Special case
 82 | 
 83 |         If ``0.0 <= xldate < 1.0``, it is assumed to represent a time;
 84 |         ``(0, 0, 0, hour, minute, second)`` will be returned.
 85 | 
 86 |     .. note::
 87 | 
 88 |         ``1904-01-01`` is not regarded as a valid date in the ``datemode==1``
 89 |         system; its "serial number" is zero.
 90 |     """
 91 |     if datemode not in (0, 1):
 92 |         raise XLDateBadDatemode(datemode)
 93 |     if xldate == 0.00:
 94 |         return (0, 0, 0, 0, 0, 0)
 95 |     if xldate < 0.00:
 96 |         raise XLDateNegative(xldate)
 97 |     xldays = int(xldate)
 98 |     frac = xldate - xldays
 99 |     seconds = int(round(frac * 86400.0))
100 |     assert 0 <= seconds <= 86400
101 |     if seconds == 86400:
102 |         hour = minute = second = 0
103 |         xldays += 1
104 |     else:
105 |         # second = seconds % 60; minutes = seconds // 60
106 |         minutes, second = divmod(seconds, 60)
107 |         # minute = minutes % 60; hour    = minutes // 60
108 |         hour, minute = divmod(minutes, 60)
109 |     if xldays >= _XLDAYS_TOO_LARGE[datemode]:
110 |         raise XLDateTooLarge(xldate)
111 | 
112 |     if xldays == 0:
113 |         return (0, 0, 0, hour, minute, second)
114 | 
115 |     if xldays < 61 and datemode == 0:
116 |         raise XLDateAmbiguous(xldate)
117 | 
118 |     jdn = xldays + _JDN_delta[datemode]
119 |     yreg = ((((jdn * 4 + 274277) // 146097) * 3 // 4) + jdn + 1363) * 4 + 3
120 |     mp = ((yreg % 1461) // 4) * 535 + 333
121 |     d = ((mp % 16384) // 535) + 1
122 |     # mp /= 16384
123 |     mp >>= 14
124 |     if mp >= 10:
125 |         return ((yreg // 1461) - 4715, mp - 9, d, hour, minute, second)
126 |     else:
127 |         return ((yreg // 1461) - 4716, mp + 3, d, hour, minute, second)
128 | 
129 | 
130 | def xldate_as_datetime(xldate, datemode):
131 |     """
132 |     Convert an Excel date/time number into a :class:`datetime.datetime` object.
133 | 
134 |     :param xldate: The Excel number
135 |     :param datemode: 0: 1900-based, 1: 1904-based.
136 | 
137 |     :returns: A :class:`datetime.datetime` object.
138 |     """
139 | 
140 |     # Set the epoch based on the 1900/1904 datemode.
141 |     if datemode:
142 |         epoch = epoch_1904
143 |     else:
144 |         if xldate < 60:
145 |             epoch = epoch_1900
146 |         else:
147 |             # Workaround Excel 1900 leap year bug by adjusting the epoch.
148 |             epoch = epoch_1900_minus_1
149 | 
150 |     # The integer part of the Excel date stores the number of days since
151 |     # the epoch and the fractional part stores the percentage of the day.
152 |     days = int(xldate)
153 |     fraction = xldate - days
154 | 
155 |     # Get the the integer and decimal seconds in Excel's millisecond resolution.
156 |     seconds = int(round(fraction * 86400000.0))
157 |     seconds, milliseconds = divmod(seconds, 1000)
158 | 
159 |     return epoch + datetime.timedelta(days, seconds, 0, milliseconds)
160 | 
161 | 
162 | # === conversions from date/time to xl numbers
163 | 
164 | def _leap(y):
165 |     if y % 4: return 0
166 |     if y % 100: return 1
167 |     if y % 400: return 0
168 |     return 1
169 | 
170 | _days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
171 | 
172 | 
173 | def xldate_from_date_tuple(date_tuple, datemode):
174 |     """
175 |     Convert a date tuple (year, month, day) to an Excel date.
176 | 
177 |     :param year: Gregorian year.
178 |     :param month: ``1 <= month <= 12``
179 |     :param day: ``1 <= day <= last day of that (year, month)``
180 |     :param datemode: 0: 1900-based, 1: 1904-based.
181 |     :raises xlrd.xldate.XLDateAmbiguous:
182 |     :raises xlrd.xldate.XLDateBadDatemode:
183 |     :raises xlrd.xldate.XLDateBadTuple:
184 |       ``(year, month, day)`` is too early/late or has invalid component(s)
185 |     :raises xlrd.xldate.XLDateError:
186 |     """
187 |     year, month, day = date_tuple
188 | 
189 |     if datemode not in (0, 1):
190 |         raise XLDateBadDatemode(datemode)
191 | 
192 |     if year == 0 and month == 0 and day == 0:
193 |         return 0.00
194 | 
195 |     if not (1900 <= year <= 9999):
196 |         raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
197 |     if not (1 <= month <= 12):
198 |         raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
199 |     if  (day < 1 or
200 |          (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year)))):
201 |         raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
202 | 
203 |     Yp = year + 4716
204 |     M = month
205 |     if M <= 2:
206 |         Yp = Yp - 1
207 |         Mp = M + 9
208 |     else:
209 |         Mp = M - 3
210 |     jdn = (1461 * Yp // 4) + ((979 * Mp + 16) // 32) + \
211 |         day - 1364 - (((Yp + 184) // 100) * 3 // 4)
212 |     xldays = jdn - _JDN_delta[datemode]
213 |     if xldays <= 0:
214 |         raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
215 |     if xldays < 61 and datemode == 0:
216 |         raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
217 |     return float(xldays)
218 | 
219 | 
220 | def xldate_from_time_tuple(time_tuple):
221 |     """
222 |     Convert a time tuple ``(hour, minute, second)`` to an Excel "date" value
223 |     (fraction of a day).
224 | 
225 |     :param hour: ``0 <= hour < 24``
226 |     :param minute: ``0 <= minute < 60``
227 |     :param second: ``0 <= second < 60``
228 |     :raises xlrd.xldate.XLDateBadTuple: Out-of-range hour, minute, or second
229 |     """
230 |     hour, minute, second = time_tuple
231 |     if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
232 |         return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
233 |     raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
234 | 
235 | 
236 | def xldate_from_datetime_tuple(datetime_tuple, datemode):
237 |     """
238 |     Convert a datetime tuple ``(year, month, day, hour, minute, second)`` to an
239 |     Excel date value.
240 |     For more details, refer to other xldate_from_*_tuple functions.
241 | 
242 |     :param datetime_tuple: ``(year, month, day, hour, minute, second)``
243 |     :param datemode: 0: 1900-based, 1: 1904-based.
244 |     """
245 |     return (
246 |         xldate_from_date_tuple(datetime_tuple[:3], datemode) +
247 |         xldate_from_time_tuple(datetime_tuple[3:])
248 |     )
249 | 


--------------------------------------------------------------------------------