├── .gitignore ├── CHANGELOG ├── LICENSE ├── README.md ├── copytext.py ├── docs ├── Makefile ├── conf.py └── index.rst ├── examples ├── from_google.xlsx └── test_copy.xlsx ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── tests ├── __init__.py └── test_copytext.py └── tox.ini /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | .DS_Store 4 | build 5 | copytext.egg-info 6 | reference 7 | dist 8 | *.swo 9 | docs/_build 10 | .tox 11 | .coverage 12 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.2.1 2 | ----- 3 | 4 | * Add support for Python 3.6, remove support for Python 2.6 5 | 6 | 0.2.0 7 | ----- 8 | 9 | * Add support for Python 3.3, 3.4, and 3.5 10 | 11 | 0.1.9 12 | ----- 13 | 14 | 0.1.8 15 | ----- 16 | 17 | * Better e.g. Google XLSX with empty columns. (#20) 18 | * Update openpyxl and fix related errors. 19 | 20 | 0.1.7 21 | ----- 22 | 23 | * Maintain column order in JSON keys 24 | 25 | 0.1.6 26 | ----- 27 | 28 | * Explicitly convert cells to text internally. (#16) 29 | * Support serializing a Copy or Sheet object to JSON. (#17) 30 | 31 | 0.1.5 32 | ----- 33 | 34 | * Properly serialize rows with keys, but not values. 35 | 36 | 0.1.4 37 | ----- 38 | 39 | * Skip header row when iterating over rows in a sheet. 40 | 41 | 0.1.3 42 | ----- 43 | 44 | * Fixes for Markup handling. 45 | 46 | 0.1.2 47 | ----- 48 | 49 | * Support null checking on cells. (#15) 50 | * Integrate markupsafe and elminate cell_class_wrapper. 51 | * Errors and rows can now test false. (#14) 52 | 53 | 0.1.1 54 | ----- 55 | 56 | * First draft of docs. (#4) 57 | * Explicit handling of fields with dates/numbers. (#8) 58 | * Tests for unicode support. (#1) 59 | 60 | 0.1.0 61 | ----- 62 | 63 | * Initial import. 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2018 NPR 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | copytext is a library for accessing a spreadsheet as a native Python object suitable for templating. 2 | 3 | On the NPR Visuals team we use this as part of our app-template. Whenever a project is rendered we fetch a Google Spreadsheet containing all the project's editable text. This spreadsheet is passed to copytext, which produces an object suitable for using in our Flask templates. This allows us to give our writers and editors a document to write in which they are more comfortable with than they would be editing the code directly. 4 | 5 | * Repository: https://github.com/nprapps/copytext 6 | * Issues: https://github.com/nprapps/copytext/issues 7 | * Documentation: http://copytext.readthedocs.org/ 8 | * Visuals blog: http://blog.apps.npr.org/ 9 | -------------------------------------------------------------------------------- /copytext.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from collections import OrderedDict 3 | 4 | import json 5 | import six 6 | 7 | from openpyxl.reader.excel import load_workbook 8 | 9 | 10 | class CopyException(Exception): 11 | pass 12 | 13 | 14 | class Error(object): 15 | """ 16 | An error object that can mimic the structure of the COPY data, 17 | whether the error happens at the Copy, Sheet or Row level. 18 | Will print the error whenever it gets repr'ed. 19 | """ 20 | _error = '' 21 | 22 | def __init__(self, error): 23 | self._error = error 24 | 25 | def __getitem__(self, i): 26 | return self 27 | 28 | def __iter__(self): 29 | return iter([self]) 30 | 31 | def __len__(self): 32 | return 1 33 | 34 | def __repr__(self): 35 | return self._error 36 | 37 | def __bool__(self): 38 | return False 39 | 40 | def __nonzero__(self): 41 | return False 42 | 43 | 44 | @six.python_2_unicode_compatible 45 | class Row(object): 46 | """ 47 | Wraps a row of copy for error handling. 48 | """ 49 | _sheet = None 50 | _row = [] 51 | _columns = [] 52 | _index = 0 53 | 54 | def __init__(self, sheet, row, columns, index): 55 | self._sheet = sheet 56 | self._row = row 57 | self._columns = columns 58 | self._index = index 59 | 60 | def __getitem__(self, i): 61 | """ 62 | Allow dict-style item access by index (column id), or by column name. 63 | """ 64 | if isinstance(i, int): 65 | if i >= len(self._row): 66 | return Error('COPY.%s.%i.%i [column index outside range]' % ( 67 | self._sheet.name, 68 | self._index, i 69 | )) 70 | 71 | value = self._row[i] 72 | 73 | if six.PY3: 74 | return str(value or '') 75 | else: 76 | return unicode(value or '') 77 | 78 | if i not in self._columns: 79 | return Error('COPY.%s.%i.%s [column does not exist in sheet]' % ( 80 | self._sheet.name, 81 | self._index, 82 | i 83 | )) 84 | 85 | value = self._row[self._columns.index(i)] 86 | 87 | if six.PY3: 88 | return str(value or '') 89 | else: 90 | return unicode(value or '') 91 | 92 | def __iter__(self): 93 | return iter(self._row) 94 | 95 | def __len__(self): 96 | return len(self._row) 97 | 98 | def __str__(self): 99 | if 'value' in self._columns: 100 | value = self._row[self._columns.index('value')] 101 | return str(value or '') 102 | 103 | return Error('COPY.%s.%s [no value column in sheet]' % ( 104 | self._sheet.name, 105 | self._row[self._columns.index('key')] 106 | )) 107 | 108 | def __html__(self): 109 | return self.__str__() 110 | 111 | def __bool__(self): 112 | if 'value' in self._columns: 113 | val = self._row[self._columns.index('value')] 114 | 115 | if not val: 116 | return False 117 | 118 | return bool(len(val)) 119 | 120 | return True 121 | 122 | def __nonzero__(self): 123 | if 'value' in self._columns: 124 | val = self._row[self._columns.index('value')] 125 | 126 | if not val: 127 | return False 128 | 129 | return bool(len(val)) 130 | 131 | return True 132 | 133 | 134 | class Sheet(object): 135 | """ 136 | Wrap copy text, for a single worksheet, for error handling. 137 | """ 138 | name = None 139 | _sheet = [] 140 | _columns = [] 141 | 142 | def __init__(self, name, data, columns): 143 | self.name = name 144 | self._sheet = [ 145 | Row(self, [row[c] for c in columns], columns, i) 146 | for i, row in enumerate(data) 147 | ] 148 | self._columns = columns 149 | 150 | def __getitem__(self, i): 151 | """ 152 | Allow dict-style item access by index (row id), or by 153 | row name ("key" column). 154 | """ 155 | if isinstance(i, int): 156 | if i >= len(self._sheet): 157 | return Error('COPY.%s.%i [row index outside range]' % ( 158 | self.name, 159 | i 160 | )) 161 | 162 | return self._sheet[i] 163 | 164 | if 'key' not in self._columns: 165 | return Error('COPY.%s.%s [no key column in sheet]' % ( 166 | self.name, 167 | i 168 | )) 169 | 170 | for row in self._sheet: 171 | if row['key'] == i: 172 | return row 173 | 174 | return Error('COPY.%s.%s [key does not exist in sheet]' % ( 175 | self.name, 176 | i 177 | )) 178 | 179 | def __iter__(self): 180 | return iter(self._sheet) 181 | 182 | def __len__(self): 183 | return len(self._sheet) 184 | 185 | def _serialize(self): 186 | """ 187 | Serialize the sheet in a JSON-ready format. 188 | """ 189 | obj = OrderedDict() 190 | 191 | if 'key' in self._columns and 'value' in self._columns: 192 | for row in self: 193 | obj[row['key']] = row['value'] 194 | elif 'key' in self._columns: 195 | for row in self: 196 | obj[row['key']] = OrderedDict() 197 | 198 | for column in self._columns: 199 | if column == 'key': 200 | continue 201 | 202 | value = row[column] 203 | 204 | obj[row['key']][column] = value 205 | else: 206 | obj = [] 207 | 208 | for row in self: 209 | row_obj = OrderedDict() 210 | 211 | for i, column in enumerate(row): 212 | row_obj[self._columns[i]] = column 213 | 214 | obj.append(row_obj) 215 | 216 | return obj 217 | 218 | def json(self): 219 | """ 220 | Serialize the sheet as JSON. 221 | """ 222 | return json.dumps(self._serialize()) 223 | 224 | 225 | class Copy(object): 226 | """ 227 | Wraps copy text, for multiple worksheets, for error handling. 228 | """ 229 | 230 | def __init__(self, filename): 231 | self._filename = filename 232 | self._copy = {} 233 | self.load() 234 | 235 | def __getitem__(self, name): 236 | """ 237 | Allow dict-style item access by sheet name. 238 | """ 239 | if name not in self._copy: 240 | return Error('COPY.%s [sheet does not exist]' % name) 241 | 242 | return self._copy[name] 243 | 244 | def load(self): 245 | """ 246 | Parses the downloaded Excel file. 247 | """ 248 | try: 249 | book = load_workbook(self._filename, data_only=True) 250 | except IOError: 251 | raise CopyException( 252 | '"%s" does not exist. Have you run "fab update_copy"?' 253 | % self._filename 254 | ) 255 | 256 | for sheet in book: 257 | columns = [] 258 | rows = [] 259 | 260 | for i, row in enumerate(sheet.rows): 261 | if i == 0: 262 | for c in row: 263 | d = c.internal_value 264 | 265 | # Columns cease once an empty header is found 266 | if d is None: 267 | break 268 | 269 | if six.PY3: 270 | columns.append(str(d)) 271 | else: 272 | columns.append(unicode(d)) 273 | 274 | continue 275 | 276 | row_data = [] 277 | 278 | for c in row[0:len(columns)]: 279 | d = c.internal_value 280 | 281 | if d is None: 282 | row_data.append(None) 283 | else: 284 | if six.PY3: 285 | row_data.append(str(d)) 286 | else: 287 | row_data.append(unicode(d)) 288 | 289 | # If nothing in a row then it doesn't matter 290 | if all([c is None for c in row_data]): 291 | continue 292 | 293 | clean_data = {} 294 | 295 | # Don't include columns with None headers 296 | for i, c in enumerate(columns): 297 | if c is None: 298 | continue 299 | 300 | clean_data[c] = row_data[i] 301 | 302 | rows.append(clean_data) 303 | 304 | self._copy[sheet.title] = Sheet(sheet.title, rows, columns) 305 | 306 | def _serialize(self): 307 | """ 308 | Serialize the copy as an OrderedDict 309 | """ 310 | obj = OrderedDict() 311 | 312 | for name, sheet in self._copy.items(): 313 | obj[name] = sheet._serialize() 314 | 315 | return obj 316 | 317 | def json(self): 318 | """ 319 | Serialize the copy as JSON. 320 | """ 321 | import json 322 | 323 | return json.dumps(self._serialize()) 324 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | 15 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest 16 | 17 | help: 18 | @echo "Please use \`make ' where is one of" 19 | @echo " html to make standalone HTML files" 20 | @echo " dirhtml to make HTML files named index.html in directories" 21 | @echo " singlehtml to make a single large HTML file" 22 | @echo " pickle to make pickle files" 23 | @echo " json to make JSON files" 24 | @echo " htmlhelp to make HTML files and a HTML help project" 25 | @echo " qthelp to make HTML files and a qthelp project" 26 | @echo " devhelp to make HTML files and a Devhelp project" 27 | @echo " epub to make an epub" 28 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 29 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 30 | @echo " text to make text files" 31 | @echo " man to make manual pages" 32 | @echo " changes to make an overview of all changed/added/deprecated items" 33 | @echo " linkcheck to check all external links for integrity" 34 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 35 | 36 | clean: 37 | -rm -rf $(BUILDDIR)/* 38 | 39 | html: 40 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 41 | @echo 42 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 43 | 44 | dirhtml: 45 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 48 | 49 | singlehtml: 50 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 51 | @echo 52 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 53 | 54 | pickle: 55 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 56 | @echo 57 | @echo "Build finished; now you can process the pickle files." 58 | 59 | json: 60 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 61 | @echo 62 | @echo "Build finished; now you can process the JSON files." 63 | 64 | htmlhelp: 65 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 66 | @echo 67 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 68 | ".hhp project file in $(BUILDDIR)/htmlhelp." 69 | 70 | qthelp: 71 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 72 | @echo 73 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 74 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 75 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/copytext.qhcp" 76 | @echo "To view the help file:" 77 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/copytext.qhc" 78 | 79 | devhelp: 80 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 81 | @echo 82 | @echo "Build finished." 83 | @echo "To view the help file:" 84 | @echo "# mkdir -p $$HOME/.local/share/devhelp/copytext" 85 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/copytext" 86 | @echo "# devhelp" 87 | 88 | epub: 89 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 90 | @echo 91 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 92 | 93 | latex: 94 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 95 | @echo 96 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 97 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 98 | "(use \`make latexpdf' here to do that automatically)." 99 | 100 | latexpdf: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo "Running LaTeX files through pdflatex..." 103 | make -C $(BUILDDIR)/latex all-pdf 104 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 105 | 106 | text: 107 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 108 | @echo 109 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 110 | 111 | man: 112 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 113 | @echo 114 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 115 | 116 | changes: 117 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 118 | @echo 119 | @echo "The overview file is in $(BUILDDIR)/changes." 120 | 121 | linkcheck: 122 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 123 | @echo 124 | @echo "Link check complete; look for any errors in the above output " \ 125 | "or in $(BUILDDIR)/linkcheck/output.txt." 126 | 127 | doctest: 128 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 129 | @echo "Testing of doctests in the sources finished, look at the " \ 130 | "results in $(BUILDDIR)/doctest/output.txt." 131 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its containing dir. 4 | # 5 | # Note that not all possible configuration values are present in this 6 | # autogenerated file. 7 | # 8 | # All configuration values have a default; values that are commented out 9 | # serve to show the default. 10 | 11 | import os 12 | import sys 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | # -- General configuration ----------------------------------------------------- 20 | 21 | # If your documentation needs a minimal Sphinx version, state it here. 22 | #needs_sphinx = '1.0' 23 | 24 | # Add any Sphinx extension module names here, as strings. They can be extensions 25 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 26 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx'] 27 | autodoc_member_order = 'bysource' 28 | 29 | intersphinx_mapping = { 30 | 'python': ('http://docs.python.org/2.7', None) 31 | } 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'copytext' 47 | copyright = u'2018, NPR' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '0.2.1' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '0.2.1' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all documents. 73 | #default_role = None 74 | 75 | # If true, '()' will be appended to :func: etc. cross-reference text. 76 | #add_function_parentheses = True 77 | 78 | # If true, the current module name will be prepended to all description 79 | # unit titles (such as .. function::). 80 | #add_module_names = True 81 | 82 | # If true, sectionauthor and moduleauthor directives will be shown in the 83 | # output. They are ignored by default. 84 | #show_authors = False 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = 'sphinx' 88 | 89 | # A list of ignored prefixes for module index sorting. 90 | #modindex_common_prefix = [] 91 | 92 | 93 | # -- Options for HTML output --------------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | html_theme = 'default' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | #html_theme_options = {} 103 | 104 | # Add any paths that contain custom themes here, relative to this directory. 105 | #html_theme_path = [] 106 | 107 | # The name for this set of Sphinx documents. If None, it defaults to 108 | # " v documentation". 109 | #html_title = None 110 | 111 | # A shorter title for the navigation bar. Default is the same as html_title. 112 | #html_short_title = None 113 | 114 | # The name of an image file (relative to this directory) to place at the top 115 | # of the sidebar. 116 | #html_logo = None 117 | 118 | # The name of an image file (within the static path) to use as favicon of the 119 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 120 | # pixels large. 121 | #html_favicon = None 122 | 123 | # Add any paths that contain custom static files (such as style sheets) here, 124 | # relative to this directory. They are copied after the builtin static files, 125 | # so a file named "default.css" will overwrite the builtin "default.css". 126 | # html_static_path = ['_static'] 127 | 128 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 129 | # using the given strftime format. 130 | #html_last_updated_fmt = '%b %d, %Y' 131 | 132 | # If true, SmartyPants will be used to convert quotes and dashes to 133 | # typographically correct entities. 134 | #html_use_smartypants = True 135 | 136 | # Custom sidebar templates, maps document names to template names. 137 | #html_sidebars = {} 138 | 139 | # Additional templates that should be rendered to pages, maps page names to 140 | # template names. 141 | #html_additional_pages = {} 142 | 143 | # If false, no module index is generated. 144 | #html_domain_indices = True 145 | 146 | # If false, no index is generated. 147 | #html_use_index = True 148 | 149 | # If true, the index is split into individual pages for each letter. 150 | #html_split_index = False 151 | 152 | # If true, links to the reST sources are added to the pages. 153 | #html_show_sourcelink = True 154 | 155 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 156 | #html_show_sphinx = True 157 | 158 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 159 | #html_show_copyright = True 160 | 161 | # If true, an OpenSearch description file will be output, and all pages will 162 | # contain a tag referring to it. The value of this option must be the 163 | # base URL from which the finished HTML is served. 164 | #html_use_opensearch = '' 165 | 166 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 167 | #html_file_suffix = None 168 | 169 | # Output file base name for HTML help builder. 170 | htmlhelp_basename = 'copytextdoc' 171 | 172 | 173 | # -- Options for LaTeX output -------------------------------------------------- 174 | 175 | # The paper size ('letter' or 'a4'). 176 | #latex_paper_size = 'letter' 177 | 178 | # The font size ('10pt', '11pt' or '12pt'). 179 | #latex_font_size = '10pt' 180 | 181 | # Grouping the document tree into LaTeX files. List of tuples 182 | # (source start file, target name, title, author, documentclass [howto/manual]). 183 | latex_documents = [ 184 | ('index', 'copytext.tex', u'copytext Documentation', 185 | u'NPR', 'manual'), 186 | ] 187 | 188 | # The name of an image file (relative to this directory) to place at the top of 189 | # the title page. 190 | #latex_logo = None 191 | 192 | # For "manual" documents, if this is true, then toplevel headings are parts, 193 | # not chapters. 194 | #latex_use_parts = False 195 | 196 | # If true, show page references after internal links. 197 | #latex_show_pagerefs = False 198 | 199 | # If true, show URL addresses after external links. 200 | #latex_show_urls = False 201 | 202 | # Additional stuff for the LaTeX preamble. 203 | #latex_preamble = '' 204 | 205 | # Documents to append as an appendix to all manuals. 206 | #latex_appendices = [] 207 | 208 | # If false, no module index is generated. 209 | #latex_domain_indices = True 210 | 211 | 212 | # -- Options for manual page output -------------------------------------------- 213 | 214 | # One entry per manual page. List of tuples 215 | # (source start file, name, description, authors, manual section). 216 | man_pages = [ 217 | # ('scripts/csvcut', 'csvcut', u'csvcut Documentation', 218 | # [u'Christopher Groskopf'], 1), 219 | ] 220 | 221 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | copytext |release| 3 | ================== 4 | 5 | About 6 | ===== 7 | 8 | .. include:: ../README.md 9 | 10 | Installation 11 | ============ 12 | 13 | Users 14 | ----- 15 | 16 | If you only want to use copytext, install it this way:: 17 | 18 | pip install copytext 19 | 20 | Developers 21 | ---------- 22 | 23 | If you are a developer that also wants to hack on copytext, install it this way:: 24 | 25 | git clone git://github.com/nprapps/copytext.git 26 | cd copytext 27 | mkvirtualenv --no-site-packages copytext 28 | pip install -r requirements.txt 29 | python setup.py develop 30 | nosetests --with-coverage --cover-package=copytext 31 | 32 | Usage 33 | ===== 34 | 35 | Here is an example spreadsheet: 36 | 37 | .. raw:: html 38 | 39 | 40 | 41 | And here is code using this data: 42 | 43 | :: 44 | 45 | import copytext 46 | 47 | # Instantiate our copy, this parses the XLSX workbook 48 | copy = copytext.Copy('examples/test_copy.xlsx') 49 | 50 | # Get a sheet named "content" 51 | sheet = copy['content'] 52 | 53 | # The sheet has "key" and "value" columns 54 | # This tells copytext to access the value by the key 55 | 56 | # Print the value where the "key" is named "lorem_ipsum" 57 | print sheet['lorem_ipsum'] 58 | 59 | # Print the value in the third row (counting headers) 60 | print sheet[2] 61 | 62 | # The rows themselves are also objects 63 | row = sheet['lorem_ipsum'] 64 | 65 | # You can access the columns by indexing into the row 66 | 67 | # Print the key column of the row 68 | print row['key'] 69 | 70 | # Print the first column in the row 71 | print row[0] 72 | 73 | # You can also iterate over rows 74 | for row in sheet: 75 | # Print the value 76 | print row 77 | 78 | # Print the key/value pair 79 | print row['key'], row['value'] 80 | 81 | # This sheet has "term" and "definition" columns, but no "key" 82 | sheet = copy['example_list'] 83 | 84 | # This won't work 85 | # print sheet[0] 86 | 87 | # But this will 88 | for row in sheet: 89 | print row['term'], row['definition'] 90 | 91 | # You can have as many rows and columns as you want! 92 | 93 | # Serialize a sheet to json 94 | js = sheet.json() 95 | 96 | # Serialize an entire workbook to json 97 | js = copy.json() 98 | 99 | .. note:: 100 | 101 | Copytext only understands ``xlsx`` files, and all cells must be converted to text formatting. Copytext does not grok dates or numbers. 102 | 103 | Using with Flask 104 | ================ 105 | 106 | Probably the most significant use case for copytext is as an input to a template system. For example, here is how you would use it with Flask's Jinja-based templates: 107 | 108 | Your view:: 109 | 110 | from flask import render_template 111 | 112 | import copytext 113 | 114 | @app.route('/') 115 | def index(): 116 | context = { 117 | 'COPY': copytext.Copy('examples/test_copy.xlsx') 118 | } 119 | 120 | return render_template('index.html', **context) 121 | 122 | And in your template:: 123 | 124 |
125 |

{{ COPY.content.header_title }}

126 |

{{ COPY.content.lorem_ipsum }}

127 |
128 | 129 |
130 | {% for row in COPY.example_list %} 131 |
{{ row.term }}
{{ row.definition }}
132 | {% endfor %} 133 |
134 | 135 | copytext automatically marks all strings as safe (``Markup`` in Jinja parlance). 136 | 137 | .. note:: 138 | 139 | Jinja templates automatically proxy attribute access to property access, which is why you see ``row.term`` instead of ``row['term']`` in these examples. This means you can also do ``row.0`` to access the first column. 140 | 141 | Need a JSON version of your copytext for the client? 142 | 143 | :: 144 | 145 | 148 | 149 | License 150 | ======= 151 | 152 | .. include:: ../LICENSE 153 | 154 | Changelog 155 | ========= 156 | 157 | .. include:: ../CHANGELOG 158 | 159 | Indices and tables 160 | ================== 161 | 162 | * :ref:`genindex` 163 | * :ref:`modindex` 164 | * :ref:`search` 165 | 166 | -------------------------------------------------------------------------------- /examples/from_google.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nprapps/copytext/8a363ec4ec05e54711c2112db50385f05da5cd97/examples/from_google.xlsx -------------------------------------------------------------------------------- /examples/test_copy.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nprapps/copytext/8a363ec4ec05e54711c2112db50385f05da5cd97/examples/test_copy.xlsx -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -e .[dev] 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup( 6 | name='copytext', 7 | version='0.2.1', 8 | description='A library for accessing a spreadsheet as a native Python object suitable for templating.', 9 | long_description=open('README.md').read(), 10 | author='NPR Visuals Team', 11 | author_email='nprapps@npr.org', 12 | url='http://copytext.readthedocs.org/', 13 | license='MIT', 14 | classifiers=[ 15 | 'Development Status :: 5 - Production/Stable', 16 | 'Intended Audience :: Developers', 17 | 'License :: OSI Approved :: MIT License', 18 | 'Natural Language :: English', 19 | 'Operating System :: OS Independent', 20 | 'Programming Language :: Python', 21 | 'Programming Language :: Python :: 2.7', 22 | 'Programming Language :: Python :: 3.3', 23 | 'Programming Language :: Python :: 3.4', 24 | 'Programming Language :: Python :: 3.5', 25 | 'Programming Language :: Python :: 3.6', 26 | 'Topic :: Software Development :: Libraries :: Python Modules', 27 | ], 28 | py_modules=['copytext'], 29 | install_requires=[ 30 | 'openpyxl>=2.1.4', 31 | 'six>=1.10.0' 32 | ], 33 | extras_require={ 34 | 'dev': [ 35 | 'Sphinx==1.5.6', 36 | 'nose==1.1.2', 37 | 'unittest2==0.5.1', 38 | 'coverage==3.7.1', 39 | 'flake8==3.5.0', 40 | 'tox==3.0.0' 41 | ] 42 | } 43 | ) 44 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nprapps/copytext/8a363ec4ec05e54711c2112db50385f05da5cd97/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_copytext.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import json 5 | import six 6 | import unittest2 as unittest 7 | 8 | from six import string_types 9 | 10 | import copytext 11 | 12 | class CopyTestCase(unittest.TestCase): 13 | """ 14 | Test the Copy object. 15 | """ 16 | def setUp(self): 17 | self.copy = copytext.Copy('examples/test_copy.xlsx') 18 | 19 | def test_sheet_by_item_name(self): 20 | sheet = self.copy['content'] 21 | self.assertTrue(isinstance(sheet, copytext.Sheet)) 22 | 23 | def test_sheet_by_prop_name(self): 24 | with self.assertRaises(AttributeError): 25 | self.copy.content 26 | 27 | def test_sheet_does_not_exist(self): 28 | error = self.copy['foo'] 29 | self.assertTrue(isinstance(error, copytext.Error)) 30 | self.assertEquals(error._error, 'COPY.foo [sheet does not exist]') 31 | 32 | def test_json(self): 33 | s = self.copy.json() 34 | data = json.loads(s) 35 | 36 | self.assertTrue('attribution' in data) 37 | self.assertTrue('content' in data) 38 | self.assertTrue('example_list' in data) 39 | self.assertTrue('key_without_value' in data) 40 | 41 | attribution = data['attribution'] 42 | 43 | self.assertIsInstance(attribution, dict) 44 | self.assertTrue('byline' in attribution) 45 | self.assertEqual(attribution['byline'], u'Uñicodë') 46 | 47 | example_list = data['example_list'] 48 | 49 | self.assertIsInstance(example_list, list) 50 | self.assertIsInstance(example_list[0], dict) 51 | self.assertEqual(example_list[0], { 'term': 'jabberwocky', 'definition': 'Invented or meaningless language; nonsense.' }) 52 | 53 | key_without_value = data['key_without_value'] 54 | 55 | self.assertIsInstance(key_without_value, dict) 56 | self.assertIsInstance(key_without_value['first-last'], dict) 57 | self.assertEqual(key_without_value['first-last']['name'], 'first last') 58 | 59 | class SheetTestCase(unittest.TestCase): 60 | """ 61 | Test the Sheet object. 62 | """ 63 | def setUp(self): 64 | self.copy = copytext.Copy('examples/test_copy.xlsx') 65 | self.sheet = self.copy['content'] 66 | 67 | def test_column_count(self): 68 | self.assertEqual(len(self.sheet._columns), 2) 69 | 70 | def test_row_by_key_item_index(self): 71 | row = self.sheet[1] 72 | self.assertTrue(isinstance(row, copytext.Row)) 73 | 74 | def test_row_by_key_item_name(self): 75 | row = self.sheet['header_title'] 76 | self.assertTrue(isinstance(row, copytext.Row)) 77 | 78 | def test_row_by_key_prop_name(self): 79 | with self.assertRaises(AttributeError): 80 | self.sheet.header_title 81 | 82 | def test_key_does_not_exist(self): 83 | error = self.sheet['foo'] 84 | self.assertTrue(isinstance(error, copytext.Error)) 85 | self.assertEquals(error._error, 'COPY.content.foo [key does not exist in sheet]') 86 | 87 | def test_column_index_outside_range(self): 88 | error = self.sheet[65] 89 | self.assertTrue(isinstance(error, copytext.Error)) 90 | self.assertEquals(error._error, 'COPY.content.65 [row index outside range]') 91 | 92 | def test_json(self): 93 | s = self.copy['attribution'].json() 94 | data = json.loads(s) 95 | 96 | self.assertIsInstance(data, dict) 97 | self.assertTrue('byline' in data) 98 | self.assertEqual(data['byline'], u'Uñicodë') 99 | 100 | s = self.copy['example_list'].json() 101 | data = json.loads(s) 102 | 103 | self.assertIsInstance(data, list) 104 | self.assertIsInstance(data[0], dict) 105 | self.assertEqual(data[0], {'term': 'jabberwocky', 'definition': 'Invented or meaningless language; nonsense.'}) 106 | 107 | s = self.copy['key_without_value'].json() 108 | data = json.loads(s) 109 | 110 | self.assertIsInstance(data, dict) 111 | self.assertIsInstance(data['first-last'], dict) 112 | self.assertEqual(data['first-last']['name'], 'first last') 113 | 114 | class KeyValueRowTestCase(unittest.TestCase): 115 | """ 116 | Test the Row object. 117 | """ 118 | def setUp(self): 119 | copy = copytext.Copy('examples/test_copy.xlsx') 120 | self.sheet = copy['content'] 121 | self.row = self.sheet['header_title'] 122 | 123 | def test_column_count(self): 124 | self.assertEqual(len(self.row._columns), 2) 125 | self.assertEqual(len(self.row._row), 2) 126 | 127 | def test_cell_by_value_unicode(self): 128 | cell = str(self.row) 129 | self.assertTrue(isinstance(cell, string_types)) 130 | self.assertEqual(cell, 'Across-The-Top Header') 131 | 132 | def test_null_cell_value(self): 133 | row = self.sheet['nothing'] 134 | self.assertIs(True if row else False, False) 135 | self.assertIs(True if row[1] else False, False) 136 | 137 | def test_cell_by_index(self): 138 | cell = self.row[1] 139 | self.assertTrue(isinstance(cell, string_types)) 140 | self.assertEqual(cell, 'Across-The-Top Header') 141 | 142 | def test_cell_by_item_name(self): 143 | cell = self.row['value'] 144 | self.assertTrue(isinstance(cell, string_types)) 145 | self.assertEqual(cell, 'Across-The-Top Header') 146 | 147 | def test_cell_by_prop_name(self): 148 | with self.assertRaises(AttributeError): 149 | self.row.value 150 | 151 | def test_column_does_not_exist(self): 152 | error = self.row['foo'] 153 | self.assertTrue(isinstance(error, copytext.Error)) 154 | self.assertEquals(error._error, 'COPY.content.0.foo [column does not exist in sheet]') 155 | 156 | def test_column_index_outside_range(self): 157 | error = self.row[2] 158 | self.assertTrue(isinstance(error, copytext.Error)) 159 | self.assertEquals(error._error, 'COPY.content.0.2 [column index outside range]') 160 | 161 | def test_row_truthiness(self): 162 | self.assertIs(True if self.sheet['foo'] else False, False) 163 | self.assertIs(True if self.sheet['header_title'] else False, True) 164 | 165 | class ListRowTestCase(unittest.TestCase): 166 | def setUp(self): 167 | copy = copytext.Copy('examples/test_copy.xlsx') 168 | self.sheet = copy['example_list'] 169 | 170 | def test_iteration(self): 171 | i = iter(self.sheet) 172 | row = six.next(i) 173 | 174 | self.assertEqual(row[0], 'jabberwocky') 175 | self.assertEqual(row[1], 'Invented or meaningless language; nonsense.') 176 | 177 | six.next(i) 178 | six.next(i) 179 | six.next(i) 180 | 181 | with self.assertRaises(StopIteration): 182 | six.next(i) 183 | 184 | def test_row_truthiness(self): 185 | row = self.sheet[0] 186 | 187 | self.assertIs(True if row else False, True) 188 | 189 | row = self.sheet[100] 190 | 191 | self.assertIs(True if row else False, False) 192 | 193 | class GoogleDocsTestCase(unittest.TestCase): 194 | """ 195 | Test with an XLSX from Google Doc's. 196 | """ 197 | def setUp(self): 198 | self.copy = copytext.Copy('examples/from_google.xlsx') 199 | 200 | def test_column_count(self): 201 | sheet = self.copy['data_bar'] 202 | 203 | self.assertEqual(len(sheet._columns), 2) 204 | 205 | row = sheet[0] 206 | 207 | self.assertEqual(len(row._columns), 2) 208 | self.assertEqual(len(row._row), 2) 209 | 210 | class MarkupTestCase(unittest.TestCase): 211 | """ 212 | Test strings get Markup'd. 213 | """ 214 | def setUp(self): 215 | copy = copytext.Copy('examples/test_copy.xlsx') 216 | self.sheet = copy['content'] 217 | 218 | def test_markup_row(self): 219 | row = self.sheet['footer_title'] 220 | 221 | self.assertTrue(isinstance(row.__html__(), string_types)) 222 | self.assertEqual(row.__html__(), 'This content goes to 12') 223 | 224 | def test_markup_cell(self): 225 | cell = self.sheet['footer_title'].__str__() 226 | print(type(cell)) 227 | 228 | 229 | self.assertTrue(isinstance(cell, string_types)) 230 | self.assertEqual(cell, 'This content goes to 12') 231 | 232 | class CellTypeTestCase(unittest.TestCase): 233 | """ 234 | Test various cell "types". 235 | 236 | NB: These tests are fake. They only work if the input data is formatted as text. 237 | 238 | Things which are actually non-string don't work and can't be supported. 239 | """ 240 | def setUp(self): 241 | copy = copytext.Copy('examples/test_copy.xlsx') 242 | self.sheet = copy['attribution'] 243 | 244 | def test_date(self): 245 | row = self.sheet['pubdate'] 246 | val = str(row) 247 | 248 | self.assertEquals(val, '1/22/2013') 249 | 250 | def test_time(self): 251 | row = self.sheet['pubtime'] 252 | val = str(row) 253 | 254 | self.assertEqual(val, '3:37 AM') 255 | 256 | class ErrorTestCase(unittest.TestCase): 257 | """ 258 | Test for Error object. 259 | """ 260 | def setUp(self): 261 | self.error = copytext.Error('foobar') 262 | 263 | def test_getitem(self): 264 | child_error = self.error['bing'] 265 | self.assertIs(child_error, self.error) 266 | self.assertEqual(str(child_error), 'foobar') 267 | 268 | def test_getitem_index(self): 269 | child_error = self.error[1] 270 | self.assertIs(child_error, self.error) 271 | self.assertEqual(str(child_error), 'foobar') 272 | 273 | def test_iter(self): 274 | i = iter(self.error) 275 | child_error = six.next(i) 276 | self.assertIs(child_error, self.error) 277 | self.assertEqual(str(child_error), 'foobar') 278 | 279 | with self.assertRaises(StopIteration): 280 | six.next(i) 281 | 282 | def test_len(self): 283 | self.assertEqual(len(self.error), 1) 284 | 285 | def test_unicode(self): 286 | self.assertEqual(str(self.error), 'foobar') 287 | 288 | def test_falsey(self): 289 | self.assertIs(True if self.error else False, False) 290 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py33,py34,py35,pypy 3 | 4 | [testenv] 5 | deps=-e.[dev] 6 | whitelist_externals=make 7 | commands= 8 | make -C docs html 9 | flake8 copytext.py 10 | nosetests 11 | --------------------------------------------------------------------------------