├── .gitchangelog.rc ├── .github └── workflows │ └── python-package.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── VERSION ├── docs ├── Makefile ├── conf.py ├── index.rst └── make.bat ├── gspread_dataframe.py ├── pyproject.toml ├── tests ├── __init__.py ├── cell_list.json ├── gspread_dataframe_integration.py ├── gspread_dataframe_test.py ├── mock_worksheet.py ├── sheet_contents_evaluated.json ├── sheet_contents_formulas.json └── tests.config.example └── tox.ini /.gitchangelog.rc: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8; mode: python -*- 2 | ## 3 | ## Format 4 | ## 5 | ## ACTION: [AUDIENCE:] COMMIT_MSG [!TAG ...] 6 | ## 7 | ## Description 8 | ## 9 | ## ACTION is one of 'chg', 'fix', 'new' 10 | ## 11 | ## Is WHAT the change is about. 12 | ## 13 | ## 'chg' is for refactor, small improvement, cosmetic changes... 14 | ## 'fix' is for bug fixes 15 | ## 'new' is for new features, big improvement 16 | ## 17 | ## AUDIENCE is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' 18 | ## 19 | ## Is WHO is concerned by the change. 20 | ## 21 | ## 'dev' is for developpers (API changes, refactors...) 22 | ## 'usr' is for final users (UI changes) 23 | ## 'pkg' is for packagers (packaging changes) 24 | ## 'test' is for testers (test only related changes) 25 | ## 'doc' is for doc guys (doc only changes) 26 | ## 27 | ## COMMIT_MSG is ... well ... the commit message itself. 28 | ## 29 | ## TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' 30 | ## 31 | ## They are preceded with a '!' or a '@' (prefer the former, as the 32 | ## latter is wrongly interpreted in github.) Commonly used tags are: 33 | ## 34 | ## 'refactor' is obviously for refactoring code only 35 | ## 'minor' is for a very meaningless change (a typo, adding a comment) 36 | ## 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) 37 | ## 'wip' is for partial functionality but complete subfunctionality. 38 | ## 39 | ## Example: 40 | ## 41 | ## new: usr: support of bazaar implemented 42 | ## chg: re-indentend some lines !cosmetic 43 | ## new: dev: updated code to be compatible with last version of killer lib. 44 | ## fix: pkg: updated year of licence coverage. 45 | ## new: test: added a bunch of test around user usability of feature X. 46 | ## fix: typo in spelling my name in comment. !minor 47 | ## 48 | ## Please note that multi-line commit message are supported, and only the 49 | ## first line will be considered as the "summary" of the commit message. So 50 | ## tags, and other rules only applies to the summary. The body of the commit 51 | ## message will be displayed in the changelog without reformatting. 52 | 53 | 54 | ## 55 | ## ``ignore_regexps`` is a line of regexps 56 | ## 57 | ## Any commit having its full commit message matching any regexp listed here 58 | ## will be ignored and won't be reported in the changelog. 59 | ## 60 | ignore_regexps = [ 61 | r'@minor', r'!minor', 62 | r'@cosmetic', r'!cosmetic', 63 | r'@refactor', r'!refactor', 64 | r'@wip', r'!wip', 65 | r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[p|P]kg:', 66 | r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[d|D]ev:', 67 | r'^(.{3,3}\s*:)?\s*[fF]irst commit.?\s*$', 68 | r'^$', ## ignore commits with empty messages 69 | ] 70 | 71 | 72 | ## ``section_regexps`` is a list of 2-tuples associating a string label and a 73 | ## list of regexp 74 | ## 75 | ## Commit messages will be classified in sections thanks to this. Section 76 | ## titles are the label, and a commit is classified under this section if any 77 | ## of the regexps associated is matching. 78 | ## 79 | ## Please note that ``section_regexps`` will only classify commits and won't 80 | ## make any changes to the contents. So you'll probably want to go check 81 | ## ``subject_process`` (or ``body_process``) to do some changes to the subject, 82 | ## whenever you are tweaking this variable. 83 | ## 84 | section_regexps = [ 85 | ('New', [ 86 | r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', 87 | ]), 88 | ('Changes', [ 89 | r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', 90 | ]), 91 | ('Fix', [ 92 | r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', 93 | ]), 94 | 95 | ('Other', None ## Match all lines 96 | ), 97 | 98 | ] 99 | 100 | 101 | ## ``body_process`` is a callable 102 | ## 103 | ## This callable will be given the original body and result will 104 | ## be used in the changelog. 105 | ## 106 | ## Available constructs are: 107 | ## 108 | ## - any python callable that take one txt argument and return txt argument. 109 | ## 110 | ## - ReSub(pattern, replacement): will apply regexp substitution. 111 | ## 112 | ## - Indent(chars=" "): will indent the text with the prefix 113 | ## Please remember that template engines gets also to modify the text and 114 | ## will usually indent themselves the text if needed. 115 | ## 116 | ## - Wrap(regexp=r"\n\n"): re-wrap text in separate paragraph to fill 80-Columns 117 | ## 118 | ## - noop: do nothing 119 | ## 120 | ## - ucfirst: ensure the first letter is uppercase. 121 | ## (usually used in the ``subject_process`` pipeline) 122 | ## 123 | ## - final_dot: ensure text finishes with a dot 124 | ## (usually used in the ``subject_process`` pipeline) 125 | ## 126 | ## - strip: remove any spaces before or after the content of the string 127 | ## 128 | ## - SetIfEmpty(msg="No commit message."): will set the text to 129 | ## whatever given ``msg`` if the current text is empty. 130 | ## 131 | ## Additionally, you can `pipe` the provided filters, for instance: 132 | #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') | Indent(chars=" ") 133 | #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') 134 | #body_process = noop 135 | body_process = ReSub(r'((^|\n)[A-Z]\w+(-\w+)*: .*(\n\s+.*)*)+$', r'') | strip 136 | 137 | 138 | ## ``subject_process`` is a callable 139 | ## 140 | ## This callable will be given the original subject and result will 141 | ## be used in the changelog. 142 | ## 143 | ## Available constructs are those listed in ``body_process`` doc. 144 | subject_process = (strip | 145 | ReSub(r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$', r'\4') | 146 | SetIfEmpty("No commit message.") | ucfirst | final_dot) 147 | 148 | 149 | ## ``tag_filter_regexp`` is a regexp 150 | ## 151 | ## Tags that will be used for the changelog must match this regexp. 152 | ## 153 | tag_filter_regexp = r'^v[0-9]+\.[0-9]+(\.[0-9]+)?$' 154 | 155 | 156 | ## ``unreleased_version_label`` is a string or a callable that outputs a string 157 | ## 158 | ## This label will be used as the changelog Title of the last set of changes 159 | ## between last valid tag and HEAD if any. 160 | unreleased_version_label = "(unreleased)" 161 | 162 | 163 | ## ``output_engine`` is a callable 164 | ## 165 | ## This will change the output format of the generated changelog file 166 | ## 167 | ## Available choices are: 168 | ## 169 | ## - rest_py 170 | ## 171 | ## Legacy pure python engine, outputs ReSTructured text. 172 | ## This is the default. 173 | ## 174 | ## - mustache() 175 | ## 176 | ## Template name could be any of the available templates in 177 | ## ``templates/mustache/*.tpl``. 178 | ## Requires python package ``pystache``. 179 | ## Examples: 180 | ## - mustache("markdown") 181 | ## - mustache("restructuredtext") 182 | ## 183 | ## - makotemplate() 184 | ## 185 | ## Template name could be any of the available templates in 186 | ## ``templates/mako/*.tpl``. 187 | ## Requires python package ``mako``. 188 | ## Examples: 189 | ## - makotemplate("restructuredtext") 190 | ## 191 | output_engine = rest_py 192 | #output_engine = mustache("restructuredtext") 193 | #output_engine = mustache("markdown") 194 | #output_engine = makotemplate("restructuredtext") 195 | 196 | 197 | ## ``include_merge`` is a boolean 198 | ## 199 | ## This option tells git-log whether to include merge commits in the log. 200 | ## The default is to include them. 201 | include_merge = True 202 | 203 | 204 | ## ``log_encoding`` is a string identifier 205 | ## 206 | ## This option tells gitchangelog what encoding is outputed by ``git log``. 207 | ## The default is to be clever about it: it checks ``git config`` for 208 | ## ``i18n.logOutputEncoding``, and if not found will default to git's own 209 | ## default: ``utf-8``. 210 | #log_encoding = 'utf-8' 211 | 212 | 213 | ## ``publish`` is a callable 214 | ## 215 | ## Sets what ``gitchangelog`` should do with the output generated by 216 | ## the output engine. ``publish`` is a callable taking one argument 217 | ## that is an interator on lines from the output engine. 218 | ## 219 | ## Some helper callable are provided: 220 | ## 221 | ## Available choices are: 222 | ## 223 | ## - stdout 224 | ## 225 | ## Outputs directly to standard output 226 | ## (This is the default) 227 | ## 228 | ## - FileInsertAtFirstRegexMatch(file, pattern, idx=lamda m: m.start()) 229 | ## 230 | ## Creates a callable that will parse given file for the given 231 | ## regex pattern and will insert the output in the file. 232 | ## ``idx`` is a callable that receive the matching object and 233 | ## must return a integer index point where to insert the 234 | ## the output in the file. Default is to return the position of 235 | ## the start of the matched string. 236 | ## 237 | ## - FileRegexSubst(file, pattern, replace, flags) 238 | ## 239 | ## Apply a replace inplace in the given file. Your regex pattern must 240 | ## take care of everything and might be more complex. Check the README 241 | ## for a complete copy-pastable example. 242 | ## 243 | # publish = FileInsertIntoFirstRegexMatch( 244 | # "CHANGELOG.rst", 245 | # r'/(?P[0-9]+\.[0-9]+(\.[0-9]+)?)\s+\([0-9]+-[0-9]{2}-[0-9]{2}\)\n--+\n/', 246 | # idx=lambda m: m.start(1) 247 | # ) 248 | #publish = stdout 249 | 250 | 251 | ## ``revs`` is a list of callable or a list of string 252 | ## 253 | ## callable will be called to resolve as strings and allow dynamical 254 | ## computation of these. The result will be used as revisions for 255 | ## gitchangelog (as if directly stated on the command line). This allows 256 | ## to filter exaclty which commits will be read by gitchangelog. 257 | ## 258 | ## To get a full documentation on the format of these strings, please 259 | ## refer to the ``git rev-list`` arguments. There are many examples. 260 | ## 261 | ## Using callables is especially useful, for instance, if you 262 | ## are using gitchangelog to generate incrementally your changelog. 263 | ## 264 | ## Some helpers are provided, you can use them:: 265 | ## 266 | ## - FileFirstRegexMatch(file, pattern): will return a callable that will 267 | ## return the first string match for the given pattern in the given file. 268 | ## If you use named sub-patterns in your regex pattern, it'll output only 269 | ## the string matching the regex pattern named "rev". 270 | ## 271 | ## - Caret(rev): will return the rev prefixed by a "^", which is a 272 | ## way to remove the given revision and all its ancestor. 273 | ## 274 | ## Please note that if you provide a rev-list on the command line, it'll 275 | ## replace this value (which will then be ignored). 276 | ## 277 | ## If empty, then ``gitchangelog`` will act as it had to generate a full 278 | ## changelog. 279 | ## 280 | ## The default is to use all commits to make the changelog. 281 | #revs = ["^1.0.3", ] 282 | #revs = [ 283 | # Caret( 284 | # FileFirstRegexMatch( 285 | # "CHANGELOG.rst", 286 | # r"(?P[0-9]+\.[0-9]+(\.[0-9]+)?)\s+\([0-9]+-[0-9]{2}-[0-9]{2}\)\n--+\n")), 287 | # "HEAD" 288 | #] 289 | revs = [] 290 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Build and Test 5 | 6 | on: 7 | push: 8 | tags-ignore: 9 | - '**' 10 | branches: 11 | - '**' 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | strategy: 18 | fail-fast: false 19 | max-parallel: 1 20 | matrix: 21 | python-version: ["3.8", "3.13"] 22 | 23 | steps: 24 | - uses: actions/checkout@v4 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | python -m pip install build tox tox-gh-actions 33 | - name: Build sdist and wheel 34 | run: | 35 | python -m build 36 | - name: Test with tox 37 | run: | 38 | echo "${GSHEETS_CREDENTIALS}" > tests/creds.json 39 | echo "${TESTS_CONFIG}" > tests/tests.config 40 | tox -v 41 | env: 42 | GSHEETS_CREDENTIALS: ${{secrets.GSHEETS_CREDENTIALS}} 43 | TESTS_CONFIG: ${{secrets.TESTS_CONFIG}} 44 | 45 | 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | /tests/tests.config 92 | /tests/creds.json 93 | /.venv 94 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | configuration: docs/conf.py 5 | builder: html 6 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 5 | (unreleased) 6 | ------------ 7 | - Bump to v4.0.0b2. [Robin Thomas] 8 | - Convert to pyproject file, end python 2.7 support for upcoming 9 | releases, move to GitHub actions (#65) [Robin Thomas] 10 | 11 | * remove six usage; improve coverage 12 | 13 | * test made pd-version-aware; remove setup.py; make testenvs vary pd version use 14 | 15 | * removed reqs file 16 | 17 | * relax coverage fail threshold 18 | 19 | * remove travis secrets from gitignore 20 | 21 | * 4.0.0-beta version for now 22 | 23 | * correct build badge and python version support in README 24 | 25 | * renamed gh workflow 26 | 27 | * 4.0.0b1 not -beta 28 | - Create python-package.yml. [Robin Thomas] 29 | - Correct typo in README, remove unneeded docs badge. [Robin Thomas] 30 | - Reworded and rst-formatted new paragraph in readme. [Robin Thomas] 31 | 32 | 33 | v4.0.0 (2024-06-12) 34 | ------------------- 35 | - Bump to v4.0.0. [Robin Thomas] 36 | - Removed test of prefix= param removed in Pandas 2.x. [Robin Thomas] 37 | - Support drop_empty_rows and drop_empty_columns parameters, True by 38 | default (#62) [Robin Thomas] 39 | 40 | Fixes #61. 41 | - Fixes #54. Worksheet names that are valid cell references no longer 42 | cause Sheets API errors. (#58) [Robin Thomas] 43 | - Corrected Travis CI badge url. [Robin Thomas] 44 | - Keep test credentials out of sdist. [Robin Thomas] 45 | 46 | 47 | v3.3.1 (2023-06-16) 48 | ------------------- 49 | - Bump to v3.3.1. [Robin Thomas] 50 | - Update gspread_dataframe.py (#53) [Rulowizard] 51 | 52 | Fixes #52. Update the variable WORKSHEET_MAX_CELL_COUNT from 5000000 to 10000000. 53 | - Fixes #50. Take the row and col arguments into consideration when 54 | resizing the worksheet (#51) [Motin] 55 | - Removed stray endline. [Robin Thomas] 56 | 57 | 58 | v3.3.0 (2022-04-04) 59 | ------------------- 60 | - Bump to v3.3.0. [Robin Thomas] 61 | - Handle all cases of header row writing/reading (#44) (#47) [Robin 62 | Thomas] 63 | 64 | Fixes #44. Writes header rows properly for all cases of include_index, MultiIndex columns or index itself, and names for columns object levels or index levels. 65 | - Added :param entries for row and col in docstring. [Robin Thomas] 66 | - Remove 3.10, still not released on travis. [Robin Thomas] 67 | - Remove pypy from build matrix, add 3.10. [Robin Thomas] 68 | - Try default html sphnix builder for RTD. [Robin Thomas] 69 | - Removed pandas version checker because pandas 0.24.0 was a LONG time 70 | ago. [Robin Thomas] 71 | - Try autodoc mock imports. [Robin Thomas] 72 | - Push proper dir onto sys.path. [Robin Thomas] 73 | - Try pushing onto sys.path to get automodule to work for RTD. [Robin 74 | Thomas] 75 | - Rename to yaml file. [Robin Thomas] 76 | - Add readthedocs yml file. [Robin Thomas] 77 | 78 | 79 | v3.2.2 (2021-11-27) 80 | ------------------- 81 | - Bump to v3.2.2. [Robin Thomas] 82 | - Fixes #40. Import Cell from main gspread package to avoid breakages 83 | with gspread>=5.0.0. [Robin Thomas] 84 | 85 | 86 | v3.2.1 (2021-02-03) 87 | ------------------- 88 | - Bump to v3.2.1. [Robin Thomas] 89 | - Fixes #37. [Robin Thomas] 90 | 91 | Fixes #37. Discovered that Sheets API applies rowCount first, and THEN (#38) checks cell limits, so that if new rowCount * existing colCount > 5000000, then worksheet exceeds limit of 5000000 cells and the whole resize operation is aborted. Solution is to determine if such a condition would occur and then issue the smaller columnCount first as a separate call to reduce 92 | cell count. Full test coverage added. 93 | 94 | 95 | v3.2.0 (2020-12-21) 96 | ------------------- 97 | - Bump to v3.2.0. [Robin Thomas] 98 | - Fixes #35. Use `to_numpy('object')` to perform needed type conversions 99 | (#36) [Robin Thomas] 100 | 101 | Fixes #35. Use `to_numpy('object')` to perform needed type conversions (#36) on frame and index values before attempting to update cell values in the worksheet. Now requires Pandas>=0.24.0 instead of >=0.14.0. 102 | 103 | 104 | v3.1.3 (2020-12-06) 105 | ------------------- 106 | - Bump to v3.1.3. [Robin Thomas] 107 | - Fixes #34 by correctly escaping formula expressions =XXX when 108 | allow_formulas=False. [Robin Thomas] 109 | - Add 3.9 to travis build. [Robin Thomas] 110 | - Pin six to >=1.12.0 to avoid weird environmental dependency problem. 111 | [Robin Thomas] 112 | - Move to travis-ci.com. [Robin Thomas] 113 | 114 | 115 | v3.1.2 (2020-11-30) 116 | ------------------- 117 | - Bump to v3.1.2. [Robin Thomas] 118 | - Allow for DataFrame cell input values to be unicode objects in Python 119 | 2. [Robin Thomas] 120 | - Black/flake8 all the things. [Robin Thomas] 121 | 122 | 123 | v3.1.1 (2020-10-14) 124 | ------------------- 125 | - Bump to v3.1.1. [Robin Thomas] 126 | - Send numeric values in DataFrames as JSON numeric values to avoid 127 | locale-specific misinterpretation (#30) [Robin Thomas] 128 | 129 | * Fixes #29. Ensures that numeric values in DataFrames are sent to Sheets API as JSON numeric 130 | values, so that locale-specific parsing of decimal separators cannot misinterpret 131 | the stringified decimal values. Test coverage included. 132 | - 3.9-dev pandas not ready for travis-ci use. [Robin Thomas] 133 | - And let's try 3.9-dev for travis too. [Robin Thomas] 134 | - Add pypy (not yet pypy3) to Travis build. [Robin Thomas] 135 | 136 | 137 | v3.1.0 (2020-09-15) 138 | ------------------- 139 | - Bump to 3.1.0. [Robin Thomas] 140 | - String_escaping now a parameter to set_with_dataframe (#27) [Robin 141 | Thomas] 142 | 143 | Fixes #26 . 144 | 145 | A new parameter, string_escaping, is added to set_with_dataframe: values are 'default', 'off', 'full', or . 146 | 147 | 148 | v3.0.8 (2020-08-20) 149 | ------------------- 150 | - Bump to 3.0.8. [Robin Thomas] 151 | - Update README.rst. [Robin Thomas] 152 | 153 | whitespace tweak 154 | - Rst fix. [Robin Thomas] 155 | - Add dataframe formatting doc section, with link to formatting package. 156 | [Robin Thomas] 157 | - Argh no 3.9-dev yet. [Robin Thomas] 158 | - Corrected version reference in sphinx docs. [Robin Thomas] 159 | - Removed 3.6, added 3.9-dev to travis build. [Robin Thomas] 160 | - Make collections.abc imports 3.9-compatible. [Robin Thomas] 161 | - Use full version string in sphnix docs. [Robin Thomas] 162 | - Add docs badge to README. [Robin Thomas] 163 | - Add downloads badge. [Robin Thomas] 164 | - Accept nrows only as keyword parameter just as for all other pandas 165 | parser-related arguments; remove nrows parameter documentation since 166 | pandas, not this package, owns the parameter and its meaning. [Robin 167 | Thomas] 168 | 169 | 170 | v3.0.7 (2020-05-29) 171 | ------------------- 172 | - Bump to 3.0.7. [Robin Thomas] 173 | - Support nrows (pandas) parameter properly in get_as_dataframe. [Robin 174 | Thomas] 175 | - Added to MANIFEST.in using check-manifest tool. [Robin Thomas] 176 | 177 | 178 | v3.0.6 (2020-04-19) 179 | ------------------- 180 | - Bump to 3.0.6. [Robin Thomas] 181 | - Add MANIFEST.in to include VERSION file in sdist. [Robin Thomas] 182 | - Try link again. [Robin Thomas] 183 | - Add pandas doc link. [Robin Thomas] 184 | 185 | 186 | v3.0.5 (2020-04-02) 187 | ------------------- 188 | - Bump to v3.0.5. [Robin Thomas] 189 | - Multiindex column headers (#21) [Robin Thomas] 190 | 191 | * Fixes #20. support for MultiIndex object used as the columns for a DataFrame, and also MultiIndex object used as the index of a DataFrame. 192 | - Support for MultiIndex (hierarchical) indexes for set_with_dataframe. 193 | [Robin Thomas] 194 | - Fewer py builds. [Robin Thomas] 195 | - 2.7 compatible config reading. [Robin Thomas] 196 | - Try pip cache for travis. [Robin Thomas] 197 | - Bundle up travis secrets for CI build. [Robin Thomas] 198 | 199 | 200 | v3.0.4 (2020-02-09) 201 | ------------------- 202 | - Bump to 3.0.4. [Robin Thomas] 203 | - Fix deprecation warnings due to invalid escape sequences. [Karthikeyan 204 | Singaravelan] 205 | 206 | 207 | v3.0.3 (2019-08-06) 208 | ------------------- 209 | - Changelog updated for v3.0.3. [Robin Thomas] 210 | - Fixup setup.py for tests_require, bump to 3.0.3. [Robin Thomas] 211 | - Fixes robin900/gspread-dataframe#16. [Robin Thomas] 212 | 213 | Adds integration test coverage (for #16 fix and for future testing). 214 | - Added fury badge. [Robin Thomas] 215 | - Tweak docstring. [Robin Thomas] 216 | 217 | 218 | v3.0.2 (2018-07-24) 219 | ------------------- 220 | - Bump to 3.0.2. [Robin Thomas] 221 | - Rbt fix 13 (#14) [Robin Thomas] 222 | 223 | * Fixes #13. Test coverage added to ensure that include_index=True 224 | and include_index=False result in the proper cell list sent to gspread. 225 | - Tightened up README intro. [Robin Thomas] 226 | 227 | 228 | v3.0.1 (2018-04-20) 229 | ------------------- 230 | - Bump to 3.0.1. [Robin Thomas] 231 | - Use https for sphinx upload. [Robin Thomas] 232 | - Add long_description for package; indicate that code is 233 | production/stable. [Robin Thomas] 234 | 235 | 236 | v3.0.0 (2018-04-19) 237 | ------------------- 238 | - Bump VERSION to 3.0.0. [Robin Thomas] 239 | - Changelog for 3.0.0. [Robin Thomas] 240 | - Support for gspread 3.0.0; entire suite of tests refactored to (#12) 241 | [Robin Thomas] 242 | 243 | use gspread 3.0.0 and its v4 sheets API. 244 | 245 | Fixes #11. 246 | - Updated CHANGES. [Robin Thomas] 247 | 248 | 249 | v2.1.1 (2018-04-19) 250 | ------------------- 251 | - Bump to 2.1.1. [Robin Thomas] 252 | - Update README. [Robin Thomas] 253 | - Prepare for bugfix release by requiring gspread<3.0.0. [Robin Thomas] 254 | 255 | 256 | v2.1.0 (2017-07-27) 257 | ------------------- 258 | - CHANGELOG for 2.1.0. [Robin Thomas] 259 | - Bump version to 2.1.0. [Robin Thomas] 260 | - Safely perform _cellrepr on list objects, since list objects can be 261 | cell values (#7) [Robin Thomas] 262 | 263 | in a DataFrame. Deal with regression where float precision is mangled 264 | during round-trip testing, by using repr() on float values and str() 265 | on other values. 266 | 267 | Fixes #6. 268 | - Complete basic write test. [Robin Thomas] 269 | - Remove stray print stmt. [Robin Thomas] 270 | 271 | 272 | v2.0.1 (2017-03-31) 273 | ------------------- 274 | - CHANGELOG for 2.0.1. [Robin Thomas] 275 | - Bump version to 2.0.1. [Robin Thomas] 276 | - Fixing #4: Respecting the minimum number of cols (#5) [Thorbjørn Wolf] 277 | - Overcome bad default repository url for upload_sphinx. [Robin Thomas] 278 | - Switch to upload3 package. [Robin Thomas] 279 | 280 | 281 | v2.0.0 (2017-03-29) 282 | ------------------- 283 | - Changelog for v2.0.0. [Robin Thomas] 284 | - Get_as_dataframe uses pandas TextParser (#3) [Robin Thomas] 285 | 286 | * pretty easy to hook up TextParser; let's see how all of the option 287 | handling works in later commits. 288 | 289 | * support evaluate_formulas 290 | 291 | * added basics of unit test suite, with accurate mock worksheet cell feed. 292 | 293 | * strip google sheet ID just to make mock XML smaller 294 | 295 | * fixed docs; added dev requirements in prep to use gitchangelog 296 | 297 | * gitchangelog.rc 298 | 299 | * gitchangelog config file in proper location 300 | 301 | * added latest generated CHANGELOG 302 | 303 | * externalized VERSION file; nearly complete test suite 304 | 305 | * completed test suite 306 | 307 | * updated CHANGELOG 308 | 309 | * back to 2.6-friendly %-based string formatting 310 | 311 | * dispensed with the now-silly-looking lazy ImportError for pandas import. 312 | 313 | * mention pandas.read_csv keyword argument support in README 314 | 315 | * avoid misinterpretation of ** in docstring by sphinx. 316 | 317 | * tighten up all the sphinx stuff 318 | 319 | * show |version| in docs index. parse version properly. 320 | 321 | * remove duplicate sphnix req 322 | 323 | * unworking attempt; need ws entry from worksheets feed to make 324 | a fully-functioning mock worksheet for writes. 325 | 326 | * write test works now 327 | 328 | * fix bytes/str problem in tests 329 | 330 | 331 | v1.1.0 (2017-03-28) 332 | ------------------- 333 | - LICENSE file via metadata, and correct upload-dir for docs. [Robin 334 | Thomas] 335 | - Change default include_index=False since that's the common case. Bump 336 | version to 1.1.0. Complete documentation index.rst. [Robin Thomas] 337 | 338 | 339 | v1.0.0 (2017-03-28) 340 | ------------------- 341 | - List Pandas as dep. [Robin Thomas] 342 | - Aded some sphinx support for steup cfg. [Robin Thomas] 343 | - Initial pre-release commit. [Robin Thomas] 344 | - Initial commit. [Robin Thomas] 345 | 346 | 347 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Robin Thomas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include VERSION 2 | include *.rc 3 | include *.rst 4 | include *.txt 5 | recursive-include docs *.bat 6 | recursive-include docs *.py 7 | recursive-include docs *.rst 8 | recursive-include docs Makefile 9 | recursive-include tests *.example 10 | recursive-include tests *.json 11 | recursive-include tests *.py 12 | exclude tests/creds.json 13 | exclude tests/tests.config 14 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | gspread-dataframe 2 | ----------------- 3 | 4 | .. image:: https://badge.fury.io/py/gspread-dataframe.svg 5 | :target: https://badge.fury.io/py/gspread-dataframe 6 | 7 | .. image:: https://github.com/robin900/gspread-dataframe/actions/workflows/python-package.yml/badge.svg?branch=master 8 | :target: https://github.com/robin900/gspread-dataframe/actions/workflows/python-package.yml 9 | 10 | .. image:: https://img.shields.io/pypi/dm/gspread-dataframe.svg 11 | :target: https://pypi.org/project/gspread-dataframe 12 | 13 | This package allows easy data flow between a worksheet in a Google spreadsheet 14 | and a Pandas DataFrame. Any worksheet you can obtain using the ``gspread`` package 15 | can be retrieved as a DataFrame with ``get_as_dataframe``; DataFrame objects can 16 | be written to a worksheet using ``set_with_dataframe``: 17 | 18 | .. code:: python 19 | 20 | import pandas as pd 21 | from gspread_dataframe import get_as_dataframe, set_with_dataframe 22 | 23 | worksheet = some_worksheet_obtained_from_gspread_client 24 | 25 | df = pd.DataFrame.from_records([{'a': i, 'b': i * 2} for i in range(100)]) 26 | set_with_dataframe(worksheet, df) 27 | 28 | df2 = get_as_dataframe(worksheet) 29 | 30 | The ``get_as_dataframe`` function supports the keyword arguments 31 | that are supported by your Pandas version's text parsing readers, 32 | such as ``pandas.read_csv``. Consult `your Pandas documentation for a full list of options `__. Since the ``'python'`` engine in Pandas is used for parsing, 33 | only options supported by that engine are acceptable: 34 | 35 | .. code:: python 36 | 37 | import pandas as pd 38 | from gspread_dataframe import get_as_dataframe 39 | 40 | worksheet = some_worksheet_obtained_from_gspread_client 41 | 42 | df = get_as_dataframe(worksheet, parse_dates=True, usecols=[0,2], skiprows=1, header=None) 43 | 44 | New in version 4.0.0: ``drop_empty_rows`` and ``drop_empty_columns`` parameters, both ``True`` 45 | by default, are now accepted by ``get_as_dataframe``. If you created a Google sheet with the default 46 | number of columns and rows (26 columns, 1000 rows), but have meaningful values for the DataFrame 47 | only in the top left corner of the worksheet, these parameters will cause any empty rows 48 | or columns to be discarded automatically and absent from the returned DataFrame. 49 | 50 | Formatting Google worksheets for DataFrames 51 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | 53 | If you install the ``gspread-formatting`` package, you can additionally format a Google worksheet to suit the 54 | DataFrame data you've just written. See the `package documentation for details `__, but here's a short example using the default formatter: 55 | 56 | .. code:: python 57 | 58 | import pandas as pd 59 | from gspread_dataframe import get_as_dataframe, set_with_dataframe 60 | from gspread_formatting.dataframe import format_with_dataframe 61 | 62 | worksheet = some_worksheet_obtained_from_gspread_client 63 | 64 | df = pd.DataFrame.from_records([{'a': i, 'b': i * 2} for i in range(100)]) 65 | set_with_dataframe(worksheet, df) 66 | format_with_dataframe(worksheet, df, include_column_header=True) 67 | 68 | 69 | Installation 70 | ------------ 71 | 72 | Requirements 73 | ~~~~~~~~~~~~ 74 | 75 | * Python 3 only, for releases 4.0.0 and later 76 | * Python 2.7 and 3 for releases prior to 4.0.0 77 | * gspread (>=3.0.0; to use older versions of gspread, use gspread-dataframe releases of 2.1.1 or earlier) 78 | * Pandas >= 0.24.0 79 | 80 | From PyPI 81 | ~~~~~~~~~ 82 | 83 | .. code:: sh 84 | 85 | pip install gspread-dataframe 86 | 87 | From GitHub 88 | ~~~~~~~~~~~ 89 | 90 | .. code:: sh 91 | 92 | git clone https://github.com/robin900/gspread-dataframe.git 93 | cd gspread-dataframe 94 | python setup.py install 95 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 4.0.0b2 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = gspread-dataframe 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # gspread-dataframe documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Mar 10 22:46:18 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = ['sphinx.ext.autodoc'] 35 | 36 | # autodoc "mock imports" so that autodoc does not need heavy dependencies 37 | # installed in order to function. (Helpful for ReadTheDocs.org.) 38 | 39 | autodoc_mock_imports = ['gspread', 'pandas'] 40 | 41 | # Add any paths that contain templates here, relative to this directory. 42 | templates_path = ['_templates'] 43 | 44 | # The suffix(es) of source filenames. 45 | # You can specify multiple suffix as a list of string: 46 | # 47 | # source_suffix = ['.rst', '.md'] 48 | source_suffix = '.rst' 49 | 50 | # The master toctree document. 51 | master_doc = 'index' 52 | 53 | # General information about the project. 54 | project = 'gspread-dataframe' 55 | copyright = '2017, Robin Thomas' 56 | author = 'Robin Thomas' 57 | 58 | # The version info for the project you're documenting, acts as replacement for 59 | # |version| and |release|, also used in various other places throughout the 60 | # built documents. 61 | # 62 | import os.path 63 | 64 | with open(os.path.join(os.path.dirname(__file__), '../VERSION'), 'r') as f: 65 | # The full version, including alpha/beta/rc tags. 66 | version = f.read().strip() 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This patterns also effect to html_static_path and html_extra_path 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | # If true, `todo` and `todoList` produce output, else they produce nothing. 84 | todo_include_todos = False 85 | 86 | 87 | # -- Options for HTML output ---------------------------------------------- 88 | 89 | # The theme to use for HTML and HTML Help pages. See the documentation for 90 | # a list of builtin themes. 91 | # 92 | html_theme = 'alabaster' 93 | 94 | # Theme options are theme-specific and customize the look and feel of a theme 95 | # further. For a list of options available for each theme, see the 96 | # documentation. 97 | # 98 | # html_theme_options = {} 99 | 100 | # Add any paths that contain custom static files (such as style sheets) here, 101 | # relative to this directory. They are copied after the builtin static files, 102 | # so a file named "default.css" will overwrite the builtin "default.css". 103 | html_static_path = ['_static'] 104 | 105 | 106 | # -- Options for HTMLHelp output ------------------------------------------ 107 | 108 | # Output file base name for HTML help builder. 109 | htmlhelp_basename = 'gspread-dataframedoc' 110 | 111 | 112 | # -- Options for LaTeX output --------------------------------------------- 113 | 114 | latex_elements = { 115 | # The paper size ('letterpaper' or 'a4paper'). 116 | # 117 | # 'papersize': 'letterpaper', 118 | 119 | # The font size ('10pt', '11pt' or '12pt'). 120 | # 121 | # 'pointsize': '10pt', 122 | 123 | # Additional stuff for the LaTeX preamble. 124 | # 125 | # 'preamble': '', 126 | 127 | # Latex figure (float) alignment 128 | # 129 | # 'figure_align': 'htbp', 130 | } 131 | 132 | # Grouping the document tree into LaTeX files. List of tuples 133 | # (source start file, target name, title, 134 | # author, documentclass [howto, manual, or own class]). 135 | latex_documents = [ 136 | (master_doc, 'gspread-dataframe.tex', 'gspread-dataframe Documentation', 137 | 'Robin Thomas', 'manual'), 138 | ] 139 | 140 | 141 | # -- Options for manual page output --------------------------------------- 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [ 146 | (master_doc, 'gspread-dataframe', 'gspread-dataframe Documentation', 147 | [author], 1) 148 | ] 149 | 150 | 151 | # -- Options for Texinfo output ------------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | (master_doc, 'gspread-dataframe', 'gspread-dataframe Documentation', 158 | author, 'gspread-dataframe', 'Read/write gspread worksheets using pandas DataFrames.', 159 | 'Miscellaneous'), 160 | ] 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. gspread-dataframe documentation master file, created by 2 | sphinx-quickstart on Fri Mar 10 22:46:18 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to gspread-dataframe's documentation! 7 | ============================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | .. include:: ../README.rst 14 | 15 | Module Documentation - Version |version| 16 | ---------------------------------------- 17 | 18 | .. automodule:: gspread_dataframe 19 | :members: get_as_dataframe, set_with_dataframe 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=gspread-dataframe 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /gspread_dataframe.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | gspread_dataframe 5 | ~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains functions to retrieve a gspread worksheet as a 8 | `pandas.DataFrame`, and to set the contents of a worksheet 9 | using a `pandas.DataFrame`. To use these functions, have 10 | Pandas 0.14.0 or greater installed. 11 | """ 12 | from gspread.utils import fill_gaps 13 | from gspread import Cell 14 | import pandas as pd 15 | import numpy as np 16 | from pandas.io.parsers import TextParser 17 | import logging 18 | import re 19 | from numbers import Real 20 | 21 | try: 22 | from collections.abc import defaultdict 23 | except ImportError: 24 | from collections import defaultdict 25 | try: 26 | from itertools import chain, zip_longest 27 | except ImportError: 28 | from itertools import chain, izip_longest as zip_longest 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | __all__ = ("set_with_dataframe", "get_as_dataframe") 33 | 34 | WORKSHEET_MAX_CELL_COUNT = 10000000 35 | 36 | UNNAMED_COLUMN_NAME_PATTERN = re.compile(r'^Unnamed:\s\d+(?:_level_\d+)?$') 37 | 38 | def _escaped_string(value, string_escaping): 39 | if value in (None, ""): 40 | return "" 41 | if string_escaping == "default": 42 | if value.startswith("'"): 43 | return "'%s" % value 44 | elif string_escaping == "off": 45 | return value 46 | elif string_escaping == "full": 47 | return "'%s" % value 48 | elif callable(string_escaping): 49 | if string_escaping(value): 50 | return "'%s" % value 51 | else: 52 | raise ValueError( 53 | "string_escaping parameter must be one of: " 54 | "'default', 'off', 'full', any callable taking one parameter" 55 | ) 56 | return value 57 | 58 | 59 | def _cellrepr(value, allow_formulas, string_escaping): 60 | """ 61 | Get a string representation of dataframe value. 62 | 63 | :param :value: the value to represent 64 | :param :allow_formulas: if True, allow values starting with '=' 65 | to be interpreted as formulas; otherwise, escape 66 | them with an apostrophe to avoid formula interpretation. 67 | """ 68 | if pd.isnull(value) is True: 69 | return "" 70 | if isinstance(value, Real): 71 | return value 72 | if not isinstance(value, str): 73 | value = str(value) 74 | 75 | if (not allow_formulas) and value.startswith("="): 76 | return "'%s" % value 77 | else: 78 | return _escaped_string(value, string_escaping) 79 | 80 | 81 | def _resize_to_minimum(worksheet, rows=None, cols=None): 82 | """ 83 | Resize the worksheet to guarantee a minimum size, either in rows, 84 | or columns, or both. 85 | 86 | Both rows and cols are optional. 87 | """ 88 | current_rows, current_cols = (worksheet.row_count, worksheet.col_count) 89 | desired_rows, desired_cols = (rows, cols) 90 | if desired_rows is not None and desired_rows <= current_rows: 91 | desired_rows = current_rows 92 | if desired_cols is not None and desired_cols <= current_cols: 93 | desired_cols = current_cols 94 | resize_cols_first = False 95 | if desired_rows is not None and desired_cols is not None: 96 | # special case: if desired sheet size now > cell limit for sheet, 97 | # resize to exactly rows x cols, which in certain cases will 98 | # allow worksheet to stay within cell limit. 99 | if desired_rows * desired_cols > WORKSHEET_MAX_CELL_COUNT: 100 | desired_rows, desired_cols = (rows, cols) 101 | 102 | # Large increase that requires exact re-sizing to avoid exceeding 103 | # cell limit might be, for example, 1000000 rows and 2 columns, 104 | # for a worksheet that currently has 100 rows and 26 columns.. 105 | # The sheets API, however, applies new rowCount first, then 106 | # checks against cell count limit before applying new colCount! 107 | # In the above case, applying new rowCount produces 26 million 108 | # cells, the limit is exceeded, and API aborts the change and 109 | # returns a 400 response. 110 | # So to avoid a 400 response, we must in these cases have 111 | # _resize_to_minimum call resize twice, first with the value 112 | # that will reduce cell count and second with the value that 113 | # will increase cell count. 114 | # We don't seem to need to address the reversed case, where 115 | # columnCount is applied first, since Sheets API seems to apply 116 | # rowCount first in all cases. There is test coverage of this 117 | # reversed case, to guard against Sheets API changes in future. 118 | if ( 119 | cols is not None and 120 | cols < current_cols and 121 | desired_rows * current_cols > WORKSHEET_MAX_CELL_COUNT 122 | ): 123 | resize_cols_first = True 124 | 125 | if desired_cols is not None or desired_rows is not None: 126 | if resize_cols_first: 127 | worksheet.resize(cols=desired_cols) 128 | worksheet.resize(rows=desired_rows) 129 | else: 130 | worksheet.resize(desired_rows, desired_cols) 131 | 132 | 133 | def _quote_worksheet_title(title): 134 | return "'" + title.replace("'", "''") + "'" 135 | 136 | 137 | def _get_all_values(worksheet, evaluate_formulas): 138 | data = worksheet.spreadsheet.values_get( 139 | _quote_worksheet_title(worksheet.title), 140 | params={ 141 | "valueRenderOption": ( 142 | "UNFORMATTED_VALUE" if evaluate_formulas else "FORMULA" 143 | ), 144 | "dateTimeRenderOption": "FORMATTED_STRING", 145 | }, 146 | ) 147 | (row_offset, column_offset) = (1, 1) 148 | (last_row, last_column) = (worksheet.row_count, worksheet.col_count) 149 | values = data.get("values", []) 150 | 151 | rect_values = fill_gaps( 152 | values, 153 | rows=last_row - row_offset + 1, 154 | cols=last_column - column_offset + 1, 155 | ) 156 | 157 | cells = [ 158 | Cell(row=i + row_offset, col=j + column_offset, value=value) 159 | for i, row in enumerate(rect_values) 160 | for j, value in enumerate(row) 161 | ] 162 | 163 | # defaultdicts fill in gaps for empty rows/cells not returned by gdocs 164 | rows = defaultdict(lambda: defaultdict(str)) 165 | for cell in cells: 166 | row = rows.setdefault(int(cell.row), defaultdict(str)) 167 | row[cell.col] = cell.value 168 | 169 | if not rows: 170 | return [] 171 | 172 | all_row_keys = chain.from_iterable(row.keys() for row in rows.values()) 173 | rect_cols = range(1, max(all_row_keys) + 1) 174 | rect_rows = range(1, max(rows.keys()) + 1) 175 | 176 | return [[rows[i][j] for j in rect_cols] for i in rect_rows] 177 | 178 | 179 | def get_as_dataframe(worksheet, evaluate_formulas=False, drop_empty_rows=True, drop_empty_columns=True, **options): 180 | r""" 181 | Returns the worksheet contents as a DataFrame. 182 | 183 | :param worksheet: the worksheet. 184 | :param evaluate_formulas: if True, get the value of a cell after 185 | formula evaluation; otherwise get the formula itself if present. 186 | Defaults to False. 187 | :param drop_empty_rows: if True, drop any rows from the DataFrame that have 188 | only empty (NaN) values. Defaults to True. 189 | :param drop_empty_columns: if True, drop any columns from the DataFrame 190 | that have only empty (NaN) values and have no column name 191 | (that is, no header value). Named columns (those with a header value) 192 | that are otherwise empty are retained. Defaults to True. 193 | :param \*\*options: all the options for pandas.io.parsers.TextParser, 194 | according to the version of pandas that is installed. 195 | (Note: TextParser supports only the default 'python' parser engine, 196 | not the C engine.) 197 | :returns: pandas.DataFrame 198 | """ 199 | all_values = _get_all_values(worksheet, evaluate_formulas) 200 | df = TextParser(all_values, **options).read(options.get("nrows", None)) 201 | 202 | # if squeeze=True option was used, df may be a Series. 203 | # There is special Series logic for our two drop options. 204 | if isinstance(df, pd.Series): 205 | if drop_empty_rows: 206 | df = df.dropna() 207 | # if this Series is empty and unnamed, it's droppable, 208 | # and we should return an empty DataFrame instead. 209 | if drop_empty_columns and df.empty and (not df.name or UNNAMED_COLUMN_NAME_PATTERN.search(df.name)): 210 | df = pd.DataFrame() 211 | 212 | # Else df is a DataFrame. 213 | else: 214 | if drop_empty_rows: 215 | df = df.dropna(how='all', axis=0) 216 | _reconstruct_if_multi_index(df, 'index') 217 | if drop_empty_columns: 218 | labels_to_drop = _find_labels_of_empty_unnamed_columns(df) 219 | if labels_to_drop: 220 | df = df.drop(labels=labels_to_drop, axis=1) 221 | _reconstruct_if_multi_index(df, 'columns') 222 | 223 | return df 224 | 225 | def _reconstruct_if_multi_index(df, attrname): 226 | # pandas, even as of 2.2.2, has a bug where a MultiIndex 227 | # will simply preserve the dropped labels in each level 228 | # when asked by .levels and .levshape, although the dropped 229 | # labels won't appear in to_numpy(). We must therefore reconstruct 230 | # the MultiIndex via to_numpy() -> .from_tuples, and then 231 | # assign it to the dataframe's appropriate attribute. 232 | index = getattr(df, attrname) 233 | if not isinstance(index, pd.MultiIndex): 234 | return 235 | reconstructed = pd.MultiIndex.from_tuples(index.to_numpy()) 236 | setattr(df, attrname, reconstructed) 237 | 238 | 239 | def _label_represents_unnamed_column(label): 240 | if isinstance(label, str) and UNNAMED_COLUMN_NAME_PATTERN.search(label): 241 | return True 242 | # unnamed columns will have an int64 label if header=False was used. 243 | elif isinstance(label, np.int64): 244 | return True 245 | elif isinstance(label, tuple): 246 | return all([_label_represents_unnamed_column(item) for item in label]) 247 | else: 248 | return False 249 | 250 | def _find_labels_of_empty_unnamed_columns(df): 251 | return [ 252 | label for label 253 | in df.columns.to_numpy() 254 | if _label_represents_unnamed_column(label) and df[label].isna().all() 255 | ] 256 | 257 | def _determine_level_count(index): 258 | if hasattr(index, "levshape"): 259 | return len(index.levshape) 260 | return 1 261 | 262 | def _index_names(index): 263 | names = [] 264 | if hasattr(index, "names"): 265 | names = [ i if i != None else "" for i in index.names ] 266 | elif index.name not in (None, ""): 267 | names = [index.name] 268 | if not any([n not in (None, "") for n in names]): 269 | names = [] 270 | return names 271 | 272 | def set_with_dataframe( 273 | worksheet, 274 | dataframe, 275 | row=1, 276 | col=1, 277 | include_index=False, 278 | include_column_header=True, 279 | resize=False, 280 | allow_formulas=True, 281 | string_escaping="default", 282 | ): 283 | """ 284 | Sets the values of a given DataFrame, anchoring its upper-left corner 285 | at (row, col). (Default is row 1, column 1.) 286 | 287 | :param worksheet: the gspread worksheet to set with content of DataFrame. 288 | :param dataframe: the DataFrame. 289 | :param row: Row at which to start writing the DataFrame. Default is 1. 290 | :param col: Column at which to start writing the DataFrame. Default is 1. 291 | :param include_index: if True, include the DataFrame's index as an 292 | additional column. Defaults to False. 293 | :param include_column_header: if True, add a header row or rows before data 294 | with column names. (If include_index is True, the index's name(s) 295 | will be used as its columns' headers.) Defaults to True. 296 | :param resize: if True, changes the worksheet's size to match the shape 297 | of the provided DataFrame. If False, worksheet will only be 298 | resized as necessary to contain the DataFrame contents. 299 | Defaults to False. 300 | :param allow_formulas: if True, interprets `=foo` as a formula in 301 | cell values; otherwise all text beginning with `=` is escaped 302 | to avoid its interpretation as a formula. Defaults to True. 303 | :param string_escaping: determines when string values are escaped as text 304 | literals (by adding an initial `'` character) in requests to 305 | Sheets API. 306 | Four parameter values are accepted: 307 | - 'default': only escape strings starting with a literal `'` 308 | character 309 | - 'off': escape nothing; cell values starting with a `'` will be 310 | interpreted by sheets as an escape character followed by 311 | a text literal. 312 | - 'full': escape all string values 313 | - any callable object: will be called once for each cell's string 314 | value; if return value is true, string will be escaped 315 | with preceding `'` (A useful technique is to pass a 316 | regular expression bound method, e.g. 317 | `re.compile(r'^my_regex_.*$').search`.) 318 | The escaping done when allow_formulas=False (escaping string values 319 | beginning with `=`) is unaffected by this parameter's value. 320 | Default value is `'default'`. 321 | """ 322 | # x_pos, y_pos refers to the position of data rows only, 323 | # excluding any header rows in the google sheet. 324 | # If header-related params are True, the values are adjusted 325 | # to allow space for the headers. 326 | y, x = dataframe.shape 327 | index_col_size = 0 328 | column_header_size = 0 329 | index_names = _index_names(dataframe.index) 330 | column_names_not_labels = _index_names(dataframe.columns) 331 | if include_index: 332 | index_col_size = _determine_level_count(dataframe.index) 333 | x += index_col_size 334 | if include_column_header: 335 | column_header_size = _determine_level_count(dataframe.columns) 336 | y += column_header_size 337 | # if included index has name(s) it needs its own header row to accommodate columns' index names 338 | if column_header_size > 1 and include_index and index_names: 339 | y += 1 340 | if row > 1: 341 | y += row - 1 342 | if col > 1: 343 | x += col - 1 344 | if resize: 345 | worksheet.resize(y, x) 346 | else: 347 | _resize_to_minimum(worksheet, y, x) 348 | 349 | updates = [] 350 | 351 | if include_column_header: 352 | elts = list(dataframe.columns) 353 | # if columns object is multi-index, it will span multiple rows 354 | extra_header_row = None 355 | if column_header_size > 1: 356 | elts = list(dataframe.columns) 357 | if include_index: 358 | extra = tuple(column_names_not_labels) \ 359 | if column_names_not_labels \ 360 | else ("",) * column_header_size 361 | extra = [ extra ] 362 | if index_col_size > 1: 363 | extra = extra + [ ("",) * column_header_size ] * (index_col_size - 1) 364 | elts = extra + elts 365 | # if index has names, they need their own header row 366 | if index_names: 367 | extra_header_row = list(index_names) + [ "" ] * len(dataframe.columns) 368 | for level in range(0, column_header_size): 369 | for idx, tup in enumerate(elts): 370 | updates.append( 371 | ( 372 | row, 373 | col + idx, 374 | _cellrepr( 375 | tup[level], allow_formulas, string_escaping 376 | ), 377 | ) 378 | ) 379 | row += 1 380 | if extra_header_row: 381 | for idx, val in enumerate(extra_header_row): 382 | updates.append( 383 | ( 384 | row, 385 | col + idx, 386 | _cellrepr( 387 | val, allow_formulas, string_escaping 388 | ), 389 | ) 390 | ) 391 | row += 1 392 | 393 | else: 394 | # columns object is not multi-index, columns object's "names" 395 | # can not be written anywhere in header and be parseable to pandas. 396 | elts = list(dataframe.columns) 397 | if include_index: 398 | # if index has names, they do NOT need their own header row 399 | if index_names: 400 | elts = index_names + elts 401 | else: 402 | elts = ([""] * index_col_size) + elts 403 | for idx, val in enumerate(elts): 404 | updates.append( 405 | ( 406 | row, 407 | col + idx, 408 | _cellrepr(val, allow_formulas, string_escaping), 409 | ) 410 | ) 411 | row += 1 412 | 413 | values = [] 414 | for value_row, index_value in zip_longest( 415 | dataframe.to_numpy('object'), dataframe.index.to_numpy('object') 416 | ): 417 | if include_index: 418 | if not isinstance(index_value, (list, tuple)): 419 | index_value = [index_value] 420 | value_row = list(index_value) + list(value_row) 421 | values.append(value_row) 422 | for y_idx, value_row in enumerate(values): 423 | for x_idx, cell_value in enumerate(value_row): 424 | updates.append( 425 | ( 426 | y_idx + row, 427 | x_idx + col, 428 | _cellrepr(cell_value, allow_formulas, string_escaping), 429 | ) 430 | ) 431 | 432 | if not updates: 433 | logger.debug("No updates to perform on worksheet.") 434 | return 435 | 436 | cells_to_update = [Cell(row, col, value) for row, col, value in updates] 437 | logger.debug("%d cell updates to send", len(cells_to_update)) 438 | 439 | resp = worksheet.update_cells( 440 | cells_to_update, value_input_option="USER_ENTERED" 441 | ) 442 | logger.debug("Cell update response: %s", resp) 443 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "gspread-dataframe" 7 | dynamic = ["version"] 8 | description = "Read/write gspread worksheets using pandas DataFrames" 9 | readme = "README.rst" 10 | requires-python = ">=3.0" 11 | license = { file = "LICENSE" } 12 | keywords = ['spreadsheets', 'google-spreadsheets', 'pandas', 'dataframe'] 13 | authors = [{ name = "Robin Thomas", email = "rthomas900@gmail.com" }] 14 | maintainers = [{ name = "Robin Thomas", email = "rthomas900@gmail.com" }] 15 | 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Intended Audience :: Developers", 19 | "Intended Audience :: Science/Research", 20 | "Topic :: Office/Business :: Financial :: Spreadsheet", 21 | "Topic :: Software Development :: Libraries :: Python Modules", 22 | "License :: OSI Approved :: MIT License", 23 | "Programming Language :: Python :: 3" 24 | ] 25 | 26 | dependencies = ["gspread>=3.0.0", "pandas>=0.24.0"] 27 | 28 | [project.optional-dependencies] 29 | dev = [ 30 | "gitchangelog", 31 | "Sphinx", 32 | "Sphinx-PyPI-upload3", 33 | "twine", 34 | "pytest", 35 | "oauth2client" 36 | ] 37 | 38 | test = [ 39 | "pytest", 40 | "oauth2client", 41 | "pandas", 42 | "tox" 43 | ] 44 | 45 | [project.urls] 46 | "Homepage" = "https://github.com/robin900/gspread-dataframe" 47 | "Bug Reports" = "https://github.com/robin900/gspread-dataframe/issues" 48 | "Source" = "https://github.com/robin900/gspread-dataframe/" 49 | 50 | [tool.setuptools.dynamic] 51 | version = {file = "VERSION"} 52 | 53 | [tool.coverage.report] 54 | fail_under = 90 55 | show_missing = true 56 | exclude_lines = [ 57 | 'pragma: no cover', 58 | '\.\.\.', 59 | 'if TYPE_CHECKING:', 60 | "if __name__ == '__main__':", 61 | ] 62 | 63 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from . import gspread_dataframe_test 2 | from . import gspread_dataframe_integration 3 | -------------------------------------------------------------------------------- /tests/cell_list.json: -------------------------------------------------------------------------------- 1 | [ 2 | ["Thingy", "Syntax", "Numeric Column", "Formula Column", "Date Column", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", "Unnamed: 10"], 3 | ["filter", "[expr=foo]", "[1, 2, 3]", "=R[0]C[-1]*2", "2017-03-04", "literals", "multiple", "no", "3e50", "", ""], 4 | ["'+", "[expr=daterange]", 2.01, "=R[0]C[-1]*2", "2017-03-05", "sometimes-parameterized SQL expressions brincolín", "single, REQUIRED", "yes as [daterange]", "yes probably", "static SQL but uses account's timezone setting to determine what SQL intervals to emit; Custom Date Range selection exposes two date pickers and provides the values. This is the only case I can see where more than one parameter is needed. Also, start date or end date can be empty, which should remove its clauses entirely. If not selected or reset, defaults to \"All Dates\" which appears as \"all time\" in chart titles. (UI bug fails to show All Dates as selected when filters are reset.)", ""], 5 | ["aggregation", "[expr:aggregation]", 2.907, "=R[0]C[-1]*2", "2017-03-06", "parameterized SQL expressions", "single, REQUIRED", "yes as [aggregation]", "yes probably", "one could argue this is a map of { label : formatter } of which you can pick 1..1 entry to apply. If not selected or reset, defaults to Daily (though UI bug does not mark Daily in aggregation area).", ""], 6 | ["snippet", "[foo]", 3.804, "=R[0]C[-1]*2", "2017-03-07", "static SQL expressions", "n/a", "n/a", "no", "", ""], 7 | ["formatter", "[expr:foo]", 4.701, "=R[0]C[-1]*2", "2017-03-08", "parameterized SQL expressions", "n/a", "n/a", "yes", "", ""], 8 | ["automatic join", "[foo+bar]", 5.598, "=R[0]C[-1]*2", "2017-03-09", "SQL expression with two parameters!", "n/a", "n/a", "???", "uses key name conventions as rules to determine join columns", ""], 9 | ["Proposed Thingy", "Syntax", 6.495, "=R[0]C[-1]*2", "2017-03-10", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", ""], 10 | ["parameterized snippet", "[expr::foo]", 7.392, "=R[0]C[-1]*2", "2017-03-11", "parameterized SQL expressions", "n/a", "n/a", "", "Syntax not decided yet; unique among macro types in that it can evaluate all other macro types when rendering", ""], 11 | ["filter as SQL expression", "[expr=foo]", 8.289, "=R[0]C[-1]*2", "2017-03-12", "static SQL expression", "multiple", "no", "no", "map of { label : expression } of which you can pick 0..N-1. their expressions just get ORed together.", ""], 12 | ["", "", "", "", "", "", "", "", "", "", ""] 13 | ] 14 | -------------------------------------------------------------------------------- /tests/gspread_dataframe_integration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import re 5 | import random 6 | import unittest 7 | import itertools 8 | import uuid 9 | import json 10 | import logging 11 | import sys 12 | from random import uniform 13 | from datetime import datetime, date 14 | from gspread.exceptions import APIError 15 | import pandas as pd 16 | from gspread_dataframe import \ 17 | get_as_dataframe, \ 18 | set_with_dataframe, \ 19 | _resize_to_minimum 20 | 21 | try: 22 | import ConfigParser 23 | except ImportError: 24 | import configparser as ConfigParser 25 | 26 | from oauth2client.service_account import ServiceAccountCredentials 27 | 28 | import gspread 29 | from gspread import utils 30 | 31 | try: 32 | unicode 33 | except NameError: 34 | basestring = unicode = str 35 | 36 | logger = logging.getLogger(__name__) 37 | logger.setLevel(logging.INFO) 38 | 39 | CONFIG_FILENAME = os.path.join(os.path.dirname(__file__), "tests.config") 40 | CREDS_FILENAME = os.path.join(os.path.dirname(__file__), "creds.json") 41 | SCOPE = [ 42 | "https://spreadsheets.google.com/feeds", 43 | "https://www.googleapis.com/auth/drive.file", 44 | ] 45 | 46 | I18N_STR = u"Iñtërnâtiônàlizætiøn" # .encode('utf8') 47 | 48 | CELL_LIST_FILENAME = os.path.join(os.path.dirname(__file__), "cell_list.json") 49 | 50 | STRING_ESCAPING_PATTERN = re.compile(r"(?:'\+|3e50)").match 51 | 52 | TEST_WORKSHEET_NAME = "ZZZ1" # just happens to be a valid cell reference (column ZZZ, row 1) 53 | 54 | def read_config(filename): 55 | config = ConfigParser.ConfigParser() 56 | with open(filename) as fp: 57 | if hasattr(config, "read_file"): 58 | read_func = config.read_file 59 | else: 60 | read_func = config.readfp 61 | read_func(fp) 62 | return config 63 | 64 | 65 | def read_credentials(filename): 66 | return ServiceAccountCredentials.from_json_keyfile_name(filename, SCOPE) 67 | 68 | 69 | def gen_value(prefix=None): 70 | if prefix: 71 | return u"%s %s" % (prefix, gen_value()) 72 | else: 73 | return unicode(uuid.uuid4()) 74 | 75 | 76 | class GspreadDataframeTest(unittest.TestCase): 77 | config = None 78 | gc = None 79 | 80 | @classmethod 81 | def setUpClass(cls): 82 | try: 83 | cls.config = read_config(CONFIG_FILENAME) 84 | credentials = read_credentials(CREDS_FILENAME) 85 | cls.gc = gspread.authorize(credentials) 86 | except IOError as e: 87 | msg = "Can't find %s for reading test configuration. " 88 | raise Exception(msg % e.filename) 89 | 90 | def setUp(self): 91 | if self.__class__.gc is None: 92 | self.__class__.setUpClass() 93 | self.assertTrue(isinstance(self.gc, gspread.client.Client)) 94 | 95 | 96 | class WorksheetTest(GspreadDataframeTest): 97 | """Test for gspread_dataframe using a gspread.Worksheet.""" 98 | 99 | spreadsheet = None 100 | 101 | @classmethod 102 | def setUpClass(cls): 103 | super(WorksheetTest, cls).setUpClass() 104 | ss_id = cls.config.get("Spreadsheet", "id") 105 | cls.spreadsheet = cls.gc.open_by_key(ss_id) 106 | cls.spreadsheet.batch_update( 107 | { 108 | "requests": [ 109 | { 110 | "updateSpreadsheetProperties": { 111 | "properties": {"locale": "en_US"}, 112 | "fields": "locale", 113 | } 114 | } 115 | ] 116 | } 117 | ) 118 | try: 119 | test_sheet = cls.spreadsheet.worksheet(TEST_WORKSHEET_NAME) 120 | if test_sheet: 121 | # somehow left over from interrupted test, remove. 122 | cls.spreadsheet.del_worksheet(test_sheet) 123 | except gspread.exceptions.WorksheetNotFound: 124 | pass # expected 125 | 126 | def setUp(self): 127 | super(WorksheetTest, self).setUp() 128 | self.streamHandler = logger.addHandler(logging.StreamHandler(sys.stdout)) 129 | if self.__class__.spreadsheet is None: 130 | self.__class__.setUpClass() 131 | self.sheet = self.spreadsheet.add_worksheet(TEST_WORKSHEET_NAME, 200, 20) 132 | self.__class__.spreadsheet.batch_update( 133 | { 134 | "requests": [ 135 | { 136 | "updateSpreadsheetProperties": { 137 | "properties": {"locale": "en_US"}, 138 | "fields": "locale", 139 | } 140 | } 141 | ] 142 | } 143 | ) 144 | 145 | def tearDown(self): 146 | self.spreadsheet.del_worksheet(self.sheet) 147 | logger.removeHandler(self.streamHandler) 148 | 149 | def test_roundtrip(self): 150 | # populate sheet with cell list values 151 | rows = None 152 | with open(CELL_LIST_FILENAME) as f: 153 | rows = json.load(f) 154 | # drop empty column, drop empty row 155 | rows = [ r[:-1] for r in rows ][:-1] 156 | 157 | cell_list = self.sheet.range("A1:J10") 158 | for cell, value in zip(cell_list, itertools.chain(*rows)): 159 | cell.value = value 160 | self.sheet.update_cells(cell_list) 161 | 162 | df = get_as_dataframe(self.sheet) 163 | set_with_dataframe( 164 | self.sheet, df, string_escaping=STRING_ESCAPING_PATTERN 165 | ) 166 | df2 = get_as_dataframe(self.sheet) 167 | self.assertTrue(df.equals(df2)) 168 | 169 | def test_numeric_values_with_spanish_locale(self): 170 | # set locale! 171 | self.__class__.spreadsheet.batch_update( 172 | { 173 | "requests": [ 174 | { 175 | "updateSpreadsheetProperties": { 176 | "properties": {"locale": "es_ES"}, 177 | "fields": "locale", 178 | } 179 | } 180 | ] 181 | } 182 | ) 183 | # populate sheet with cell list values 184 | rows = None 185 | with open(CELL_LIST_FILENAME) as f: 186 | rows = json.load(f) 187 | # drop empty column and empty row 188 | rows = [ r[:-1] for r in rows ][:-1] 189 | 190 | cell_list = self.sheet.range("A1:J10") 191 | for cell, value in zip(cell_list, itertools.chain(*rows)): 192 | cell.value = value 193 | self.sheet.update_cells(cell_list) 194 | 195 | df = get_as_dataframe(self.sheet) 196 | set_with_dataframe( 197 | self.sheet, df, string_escaping=STRING_ESCAPING_PATTERN 198 | ) 199 | df2 = get_as_dataframe(self.sheet) 200 | # check that some numeric values in numeric column are intact 201 | self.assertEqual(3.804, df2["Numeric Column"][3]) 202 | self.assertTrue(df.equals(df2)) 203 | 204 | def test_nrows(self): 205 | # populate sheet with cell list values 206 | rows = None 207 | with open(CELL_LIST_FILENAME) as f: 208 | rows = json.load(f) 209 | # drop empty column and empty row 210 | rows = [ r[:-1] for r in rows ][:-1] 211 | 212 | cell_list = self.sheet.range("A1:J10") 213 | for cell, value in zip(cell_list, itertools.chain(*rows)): 214 | cell.value = value 215 | self.sheet.update_cells(cell_list) 216 | 217 | for nrows in (9, 6, 0): 218 | df = get_as_dataframe(self.sheet, nrows=nrows) 219 | self.assertEqual(nrows, len(df)) 220 | 221 | def test_resize_to_minimum_large(self): 222 | self.sheet.resize(100, 26) 223 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 224 | # Large increase that requires exact re-sizing to avoid exceeding 225 | # cell limit: this should result in 1000000 rows and 2 columns. 226 | # The sheets API, however, applies new rowCount first, then 227 | # checks against cell count limit before applying new colCount! 228 | # So to avoid a 400 response, we must in these cases have 229 | # _resize_to_minimum call resize twice, first with the value 230 | # that will reduce cell count and second with the value that 231 | # will increase cell count. 232 | _resize_to_minimum(self.sheet, 1000000, 2) 233 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 234 | self.assertEqual(1000000, self.sheet.row_count) 235 | self.assertEqual(2, self.sheet.col_count) 236 | # let's test the other case, where if columnCount were applied 237 | # first the limit would be exceeded. 238 | _resize_to_minimum(self.sheet, 10000, 26) 239 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 240 | self.assertEqual(10000, self.sheet.row_count) 241 | self.assertEqual(26, self.sheet.col_count) 242 | 243 | def test_resize_to_minimum(self): 244 | self.sheet.resize(100, 26) 245 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 246 | # min rows < current, no change 247 | _resize_to_minimum(self.sheet, 20, None) 248 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 249 | self.assertEqual(100, self.sheet.row_count) 250 | self.assertEqual(26, self.sheet.col_count) 251 | # min cols < current, no change 252 | _resize_to_minimum(self.sheet, None, 2) 253 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 254 | self.assertEqual(100, self.sheet.row_count) 255 | self.assertEqual(26, self.sheet.col_count) 256 | # increase rows 257 | _resize_to_minimum(self.sheet, 200, None) 258 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 259 | self.assertEqual(200, self.sheet.row_count) 260 | self.assertEqual(26, self.sheet.col_count) 261 | # increase cols 262 | _resize_to_minimum(self.sheet, None, 27) 263 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 264 | self.assertEqual(200, self.sheet.row_count) 265 | self.assertEqual(27, self.sheet.col_count) 266 | # increase both 267 | _resize_to_minimum(self.sheet, 201, 28) 268 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 269 | self.assertEqual(201, self.sheet.row_count) 270 | self.assertEqual(28, self.sheet.col_count) 271 | # large increase that exact re-sizing cannot keep below cell limit 272 | # this should result in a 400 ApiError 273 | with self.assertRaises(APIError): 274 | _resize_to_minimum(self.sheet, 1000000, None) 275 | 276 | def test_multiindex(self): 277 | # populate sheet with cell list values 278 | rows = None 279 | with open(CELL_LIST_FILENAME) as f: 280 | rows = json.load(f) 281 | # drop empty column and empty row 282 | rows = [ r[:-1] for r in rows ][:-1] 283 | mi = list( 284 | pd.MultiIndex.from_product( 285 | [["A", "B"], ["one", "two", "three", "four", "five"]] 286 | ) 287 | ) 288 | column_names = ["Category", "Subcategory"] + rows[0] 289 | rows = [column_names] + [ 290 | list(index_tup) + row for row, index_tup in zip(rows[1:], mi) 291 | ] 292 | cell_list = self.sheet.range("A1:L10") 293 | for cell, value in zip(cell_list, itertools.chain(*rows)): 294 | cell.value = value 295 | self.sheet.update_cells(cell_list) 296 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 297 | df = get_as_dataframe(self.sheet, index_col=[0, 1]) 298 | set_with_dataframe( 299 | self.sheet, 300 | df, 301 | resize=True, 302 | include_index=True, 303 | string_escaping=STRING_ESCAPING_PATTERN, 304 | ) 305 | # must do this to refresh the size attributes of worksheet 306 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 307 | df2 = get_as_dataframe(self.sheet, index_col=[0, 1]) 308 | self.assertTrue(df.equals(df2)) 309 | 310 | def test_multiindex_column_header(self): 311 | # populate sheet with cell list values 312 | rows = None 313 | with open(CELL_LIST_FILENAME) as f: 314 | rows = json.load(f) 315 | # drop empty column, drop empty row 316 | rows = [ r[:-1] for r in rows ][:-1] 317 | column_headers = [ 318 | "SQL", 319 | "SQL", 320 | "SQL", 321 | "SQL", 322 | "SQL", 323 | "Misc", 324 | "Misc", 325 | "Misc", 326 | "Misc", 327 | "Misc", 328 | ] 329 | rows = [column_headers] + rows 330 | cell_list = self.sheet.range("A1:J11") 331 | for cell, value in zip(cell_list, itertools.chain(*rows)): 332 | cell.value = value 333 | self.sheet.update_cells(cell_list) 334 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 335 | df = get_as_dataframe(self.sheet, header=[0, 1]) 336 | self.assertEqual((2, 10), getattr(df.columns, "levshape", None)), 337 | set_with_dataframe( 338 | self.sheet, 339 | df, 340 | string_escaping=STRING_ESCAPING_PATTERN, 341 | ) 342 | df2 = get_as_dataframe(self.sheet, header=[0, 1]) 343 | self.assertTrue(df.equals(df2)) 344 | 345 | def test_int64_json_issue35(self): 346 | df = pd.DataFrame( 347 | { 348 | 'a':pd.Series([1, 2, 3],dtype='int64',index=pd.RangeIndex(start=0, stop=3, step=1)), 349 | 'b':pd.Series([4, 5, 6],dtype='int64',index=pd.RangeIndex(start=0, stop=3, step=1)) 350 | }, 351 | index=pd.RangeIndex(start=0, stop=3, step=1) 352 | ) 353 | set_with_dataframe( 354 | self.sheet, 355 | df, 356 | resize=True, 357 | include_index=True 358 | ) 359 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 360 | df2 = get_as_dataframe(self.sheet, dtype={'a': 'int64', 'b': 'int64'}, index_col=0, header=0) 361 | self.assertTrue(df.equals(df2)) 362 | 363 | def test_header_writing_and_parsing(self): 364 | truth_table = itertools.product(*([[False, True]] * 4)) 365 | for include_index, columns_multilevel, index_has_names, columns_has_names in truth_table: 366 | data = [[uniform(0, 100000) for i in range(8)] for j in range(20)] 367 | index_names = ["Category", "Subcategory"] if index_has_names else None 368 | index = list( 369 | itertools.product( 370 | ["A", "B", "C", "D"], 371 | ["one", "two", "three", "four", "five"], 372 | ) 373 | ) 374 | index = pd.MultiIndex.from_tuples(index, names=index_names) 375 | if not include_index: 376 | index = None 377 | columns = ["Alice", "Bob", "Carol", "Dave", "Ellen", "Fulgencio", "Gina", "Hector"] 378 | columns = [ ("Helpful" if i < 4 else "Unhelpful", v) for i, v in enumerate(columns) ] 379 | names = ["Demeanor", "Name"] if columns_has_names else None 380 | columns = pd.MultiIndex.from_tuples(columns, names=names) 381 | if not columns_multilevel: 382 | columns = columns.droplevel(0) 383 | df = pd.DataFrame.from_records(data, index=index, columns=columns) 384 | set_with_dataframe(self.sheet, df, resize=True, include_index=include_index) 385 | self.sheet = self.sheet.spreadsheet.worksheet(self.sheet.title) 386 | header_arg = list(range(len(getattr(columns, "levshape", [1])))) 387 | # if include_index and columns_multilevel and index_has_names, there 388 | # will be an additional header row 389 | index_col_arg = list(range(len(getattr(index, "levshape", [1])))) 390 | df_readback = get_as_dataframe( 391 | self.sheet, 392 | header=header_arg, 393 | index_col=(index_col_arg if include_index else None) 394 | ) 395 | if not df.equals(df_readback): 396 | logger.info( 397 | "Testing include_index %s, index_has_names %s, columns_multilevel %s, columns_has_names %s", 398 | include_index, index_has_names, columns_multilevel, columns_has_names 399 | ) 400 | logger.info("header=%s, index_col=%s", header_arg, index_col_arg) 401 | logger.info("%s", df) 402 | logger.info("%s", df.dtypes) 403 | logger.info("%s", df_readback) 404 | logger.info("%s", df_readback.dtypes) 405 | self.assertTrue(df.equals(df_readback)) 406 | -------------------------------------------------------------------------------- /tests/gspread_dataframe_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .mock_worksheet import ( 3 | MockWorksheet, 4 | CELL_LIST, 5 | CELL_LIST_STRINGIFIED, 6 | CELL_LIST_STRINGIFIED_NO_THINGY, 7 | ) 8 | 9 | from gspread_dataframe import get_as_dataframe, set_with_dataframe 10 | from gspread_dataframe import _escaped_string as escape, _cellrepr as cellrepr 11 | from gspread import Cell 12 | import numpy as np 13 | import pandas as pd 14 | from difflib import SequenceMatcher 15 | 16 | import unittest 17 | 18 | try: 19 | from unittest.mock import Mock, MagicMock 20 | except ImportError: 21 | from mock import Mock, MagicMock 22 | from datetime import datetime 23 | import re 24 | 25 | # Expected results 26 | 27 | COLUMN_NAMES = [ 28 | "Thingy", 29 | "Syntax", 30 | "Numeric Column", 31 | "Formula Column", 32 | "Date Column", 33 | "Values are...", 34 | "Selection", 35 | "Label(s) referencible in chart title", 36 | "Dialect-specific implementations", 37 | "Notes", 38 | ] 39 | 40 | USECOLS_COLUMN_NAMES = [ 41 | "Thingy", 42 | "Numeric Column", 43 | "Formula Column", 44 | "Date Column", 45 | ] 46 | 47 | 48 | # Tests 49 | 50 | 51 | class TestStringEscaping(unittest.TestCase): 52 | CORE_VALUES = ("foo", '"""""', "2015-06-14", "345.60", "+", "=sum(a:a)") 53 | VALUES_WITH_LEADING_APOSTROPHE = ("'foo", "'") 54 | VALUES_NEVER_ESCAPED = ("",) 55 | 56 | def _run_values_for_escape_args( 57 | self, escape_arg, escaped_values, unescaped_values 58 | ): 59 | for value in escaped_values: 60 | self.assertEqual(escape(value, escape_arg), "'" + value) 61 | for value in unescaped_values: 62 | self.assertEqual(escape(value, escape_arg), value) 63 | for value in self.VALUES_NEVER_ESCAPED: 64 | self.assertEqual(escape(value, escape_arg), value) 65 | 66 | def test_default(self): 67 | self._run_values_for_escape_args( 68 | "default", self.VALUES_WITH_LEADING_APOSTROPHE, self.CORE_VALUES 69 | ) 70 | 71 | def test_off(self): 72 | self._run_values_for_escape_args( 73 | "off", (), self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE 74 | ) 75 | 76 | def test_full(self): 77 | self._run_values_for_escape_args( 78 | "full", self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE, () 79 | ) 80 | 81 | def test_callable(self): 82 | self._run_values_for_escape_args( 83 | lambda x: False, 84 | (), 85 | self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE, 86 | ) 87 | self._run_values_for_escape_args( 88 | re.compile(r"@@@@@{200}").match, 89 | (), 90 | self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE, 91 | ) 92 | self._run_values_for_escape_args( 93 | lambda x: True, 94 | self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE, 95 | (), 96 | ) 97 | self._run_values_for_escape_args( 98 | re.compile(r".*").match, 99 | self.CORE_VALUES + self.VALUES_WITH_LEADING_APOSTROPHE, 100 | (), 101 | ) 102 | 103 | def test_formula_cellrepr_when_no_formulas_allowed(self): 104 | self.assertEqual(cellrepr("=A1", allow_formulas=False, string_escaping="default"), "'=A1") 105 | 106 | 107 | class TestWorksheetReads(unittest.TestCase): 108 | def setUp(self): 109 | self.sheet = MockWorksheet() 110 | 111 | def test_noargs(self): 112 | df = get_as_dataframe(self.sheet) 113 | self.assertEqual(list(df.columns.array), COLUMN_NAMES) 114 | self.assertEqual(len(df.columns), 10) 115 | self.assertEqual(len(df), 9) 116 | self.assertEqual(df.index.name, None) 117 | self.assertEqual(list(df.index.array), list(range(9))) 118 | 119 | def test_drop_empty_columns_false(self): 120 | df = get_as_dataframe(self.sheet, drop_empty_columns=False) 121 | self.assertEqual(list(df.columns.array), COLUMN_NAMES + ["Unnamed: 10"]) 122 | self.assertEqual(len(df.columns), 11) 123 | self.assertEqual(len(df), 9) 124 | self.assertEqual(df.index.name, None) 125 | self.assertEqual(list(df.index.array), list(range(9))) 126 | 127 | def test_drop_empty_rows_false(self): 128 | df = get_as_dataframe(self.sheet, drop_empty_rows=False) 129 | self.assertEqual(list(df.columns.array), COLUMN_NAMES) 130 | self.assertEqual(len(df.columns), 10) 131 | self.assertEqual(len(df), 10) 132 | self.assertEqual(df.index.name, None) 133 | self.assertEqual(list(df.index.array), list(range(10))) 134 | 135 | def test_evaluate_formulas_true(self): 136 | df = get_as_dataframe(self.sheet, evaluate_formulas=True) 137 | self.assertEqual(list(df.columns.array), COLUMN_NAMES) 138 | self.assertEqual(df["Formula Column"][0], 2.226) 139 | 140 | def test_evaluate_formulas_false(self): 141 | df = get_as_dataframe(self.sheet) 142 | self.assertEqual(list(df.columns.array), COLUMN_NAMES) 143 | self.assertEqual(df["Formula Column"][0], "=R[0]C[-1]*2") 144 | 145 | def test_usecols(self): 146 | df = get_as_dataframe(self.sheet, usecols=USECOLS_COLUMN_NAMES) 147 | self.assertEqual(list(df.columns.array), USECOLS_COLUMN_NAMES) 148 | 149 | def test_indexcol(self): 150 | df = get_as_dataframe(self.sheet, index_col=4) 151 | self.assertEqual(len(df.columns), 9) 152 | self.assertEqual(df.index.name, "Date Column") 153 | self.assertEqual(type(df.index).__name__, "Index") 154 | self.assertEqual(df.index.array[0], "2017-03-04") 155 | 156 | def test_indexcol_none(self): 157 | df = get_as_dataframe(self.sheet, index_col=False) 158 | self.assertEqual(len(df.columns), 10) 159 | self.assertEqual(df.index.name, None) 160 | self.assertEqual(list(df.index.array), list(range(9))) 161 | 162 | def test_header_false(self): 163 | df = get_as_dataframe(self.sheet, header=None) 164 | self.assertEqual(len(df), 10) 165 | 166 | def test_header_first_row(self): 167 | df = get_as_dataframe(self.sheet, header=0) 168 | self.assertEqual(len(df), 9) 169 | 170 | def test_skiprows(self): 171 | df = get_as_dataframe(self.sheet, skiprows=range(1, 4)) 172 | self.assertEqual(len(df), 6) 173 | 174 | def test_squeeze(self): 175 | if pd.__version__ < '2.0.0': 176 | df = get_as_dataframe(self.sheet, usecols=[0], squeeze=True) 177 | self.assertTrue(isinstance(df, pd.Series)) 178 | self.assertEqual(len(df), 9) 179 | 180 | def test_converters_datetime(self): 181 | df = get_as_dataframe( 182 | self.sheet, 183 | converters={ 184 | "Date Column": lambda x: datetime.strptime(x, "%Y-%m-%d") 185 | }, 186 | ) 187 | self.assertEqual(df["Date Column"][0], datetime(2017, 3, 4)) 188 | 189 | def test_dtype_raises(self): 190 | self.assertRaises( 191 | ValueError, 192 | get_as_dataframe, 193 | self.sheet, 194 | dtype={"Numeric Column": np.float64}, 195 | ) 196 | 197 | def test_no_nafilter(self): 198 | df = get_as_dataframe(self.sheet, na_filter=False) 199 | self.assertEqual(df["Dialect-specific implementations"][7], "") 200 | 201 | def test_nafilter(self): 202 | df = get_as_dataframe(self.sheet, na_filter=True) 203 | self.assertTrue(np.isnan(df["Dialect-specific implementations"][7])) 204 | 205 | def test_parse_dates_true(self): 206 | df = get_as_dataframe(self.sheet, index_col=4, parse_dates=True) 207 | self.assertEqual(df.index[0], pd.Timestamp("2017-03-04 00:00:00")) 208 | 209 | def test_parse_dates_true_infer(self): 210 | df = get_as_dataframe( 211 | self.sheet, 212 | index_col=4, 213 | parse_dates=True, 214 | infer_datetime_format=True, 215 | ) 216 | self.assertEqual(df.index[0], pd.Timestamp("2017-03-04 00:00:00")) 217 | 218 | def test_parse_dates_custom_parser(self): 219 | df = get_as_dataframe( 220 | self.sheet, 221 | parse_dates=[4], 222 | date_format="%Y-%m-%d" 223 | ) 224 | self.assertEqual(df["Date Column"][0], datetime(2017, 3, 4)) 225 | 226 | 227 | _original_mock_failure_message = Mock._format_mock_failure_message 228 | 229 | 230 | def _format_mock_failure_message(self, args, kwargs): 231 | message = "Expected call: %s\nActual call: %s" 232 | expected_string = self._format_mock_call_signature(args, kwargs) 233 | call_args = self.call_args 234 | if len(call_args) == 3: 235 | call_args = call_args[1:] 236 | actual_string = self._format_mock_call_signature(*call_args) 237 | msg = message % (expected_string, actual_string) 238 | if ( 239 | len(call_args[0]) > 1 240 | and isinstance(call_args[0][1], (str, bytes)) 241 | and len(args) > 1 242 | and isinstance(args[1], (str, bytes)) 243 | and call_args[0][1] != args[1] 244 | ): 245 | import difflib 246 | 247 | sm = difflib.SequenceMatcher(None, call_args[0][1], args[1]) 248 | m = sm.find_longest_match(0, len(call_args[0][1]), 0, len(args[1])) 249 | msg += "; diff: at index %d, expected %s -- actual %s" % ( 250 | m.a + m.size, 251 | call_args[0][1][m.a + m.size - 40 : m.a + m.size + 40], 252 | args[1][m.b + m.size - 40 : m.b + m.size + 40], 253 | ) 254 | return msg 255 | 256 | 257 | Mock._format_mock_failure_message = _format_mock_failure_message 258 | 259 | # have to patch Cell to make cells comparable 260 | def __eq__(self, other): 261 | if not isinstance(other, self.__class__): 262 | return False 263 | return ( 264 | self.row == other.row 265 | and self.col == other.col 266 | and self.value == other.value 267 | ) 268 | 269 | 270 | Cell.__eq__ = __eq__ 271 | 272 | 273 | class TestWorksheetWrites(unittest.TestCase): 274 | def setUp(self): 275 | self.sheet = MockWorksheet() 276 | self.sheet.resize = MagicMock() 277 | self.sheet.update_cells = MagicMock() 278 | self.sheet.spreadsheet.values_update = MagicMock() 279 | 280 | def test_write_basic(self): 281 | df = get_as_dataframe(self.sheet, na_filter=False) 282 | set_with_dataframe( 283 | self.sheet, 284 | df, 285 | resize=True, 286 | string_escaping=re.compile(r"3e50").match, 287 | ) 288 | self.sheet.resize.assert_called_once_with(11, 11) 289 | self.sheet.update_cells.assert_called_once_with( 290 | CELL_LIST_STRINGIFIED, value_input_option="USER_ENTERED" 291 | ) 292 | 293 | def test_write_empty_df_no_updates(self): 294 | df = pd.DataFrame.from_records([]) 295 | set_with_dataframe(self.sheet, df) 296 | self.sheet.update_cells.assert_not_called() 297 | 298 | def test_include_index_false(self): 299 | df = get_as_dataframe(self.sheet, na_filter=False) 300 | df_index = df.set_index("Thingy") 301 | set_with_dataframe( 302 | self.sheet, 303 | df_index, 304 | resize=True, 305 | include_index=False, 306 | string_escaping=lambda x: x == "3e50", 307 | ) 308 | self.sheet.resize.assert_called_once_with(11, 10) 309 | self.sheet.update_cells.assert_called_once_with( 310 | CELL_LIST_STRINGIFIED_NO_THINGY, value_input_option="USER_ENTERED" 311 | ) 312 | 313 | def test_include_index_true(self): 314 | df = get_as_dataframe(self.sheet, na_filter=False) 315 | df_index = df.set_index("Thingy") 316 | set_with_dataframe( 317 | self.sheet, 318 | df_index, 319 | resize=True, 320 | include_index=True, 321 | string_escaping=re.compile(r"3e50").match, 322 | ) 323 | self.sheet.resize.assert_called_once_with(11, 11) 324 | self.sheet.update_cells.assert_called_once_with( 325 | CELL_LIST_STRINGIFIED, value_input_option="USER_ENTERED" 326 | ) 327 | 328 | def test_write_list_value_to_cell(self): 329 | df = get_as_dataframe(self.sheet, na_filter=False) 330 | df.at[0, "Numeric Column"] = [1, 2, 3] 331 | set_with_dataframe( 332 | self.sheet, 333 | df, 334 | resize=True, 335 | string_escaping=re.compile(r"3e50").match, 336 | ) 337 | self.sheet.resize.assert_called_once_with(11, 11) 338 | self.sheet.update_cells.assert_called_once_with( 339 | CELL_LIST_STRINGIFIED, value_input_option="USER_ENTERED" 340 | ) 341 | -------------------------------------------------------------------------------- /tests/mock_worksheet.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import json 3 | import re 4 | from gspread import Cell 5 | from gspread_dataframe import _cellrepr 6 | 7 | 8 | def contents_of_file(filename, et_parse=True): 9 | with open(os.path.join(os.path.dirname(__file__), filename), "r") as f: 10 | return json.load(f) 11 | 12 | 13 | SHEET_CONTENTS_FORMULAS = contents_of_file("sheet_contents_formulas.json") 14 | SHEET_CONTENTS_EVALUATED = contents_of_file("sheet_contents_evaluated.json") 15 | CELL_LIST = [ 16 | Cell(row=i + 1, col=j + 1, value=value) 17 | for i, row in enumerate(contents_of_file("cell_list.json")) 18 | for j, value in enumerate(row) 19 | ] 20 | 21 | CELL_LIST_STRINGIFIED = [ 22 | Cell( 23 | row=i + 1, 24 | col=j + 1, 25 | value=_cellrepr( 26 | value, 27 | allow_formulas=True, 28 | string_escaping=re.compile(r"3e50").match, 29 | ), 30 | ) 31 | for i, row in enumerate(contents_of_file("cell_list.json")) 32 | for j, value in enumerate(row) 33 | ] 34 | 35 | _without_index = contents_of_file("cell_list.json") 36 | for _r in _without_index: 37 | del _r[0] 38 | 39 | CELL_LIST_STRINGIFIED_NO_THINGY = [ 40 | Cell( 41 | row=i + 1, 42 | col=j + 1, 43 | value=_cellrepr( 44 | value, 45 | allow_formulas=True, 46 | string_escaping=re.compile(r"3e50").match, 47 | ), 48 | ) 49 | for i, row in enumerate(_without_index) 50 | for j, value in enumerate(row) 51 | ] 52 | 53 | 54 | class MockWorksheet(object): 55 | def __init__(self): 56 | self.row_count = 10 57 | self.col_count = 11 58 | self.id = "fooby" 59 | self.title = "gspread dataframe test" 60 | self.spreadsheet = MockSpreadsheet() 61 | 62 | 63 | class MockSpreadsheet(object): 64 | def values_get(self, *args, **kwargs): 65 | if ( 66 | kwargs.get("params", {}).get("valueRenderOption") 67 | == "UNFORMATTED_VALUE" 68 | ): 69 | return SHEET_CONTENTS_EVALUATED 70 | if kwargs.get("params", {}).get("valueRenderOption") == "FORMULA": 71 | return SHEET_CONTENTS_FORMULAS 72 | 73 | 74 | if __name__ == "__main__": 75 | from gspread_dataframe import * 76 | 77 | ws = MockWorksheet() 78 | -------------------------------------------------------------------------------- /tests/sheet_contents_evaluated.json: -------------------------------------------------------------------------------- 1 | {"range": "Sheet1!A1:K10", "majorDimension": "ROWS", "values": [["Thingy", "Syntax", "Numeric Column", "Formula Column", "Date Column", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", ""], ["filter", "[expr=foo]", "[1, 2, 3]", 2.226, "2017-03-04", "literals", "multiple", "no", "3e50", ""], ["'+", "[expr=daterange]", 2.01, 2.226, "2017-03-05", "sometimes-parameterized SQL expressions brincolín", "single, REQUIRED", "yes as [daterange]", "yes probably", "static SQL but uses account's timezone setting to determine what SQL intervals to emit; Custom Date Range selection exposes two date pickers and provides the values. This is the only case I can see where more than one parameter is needed. Also, start date or end date can be empty, which should remove its clauses entirely. If not selected or reset, defaults to \"All Dates\" which appears as \"all time\" in chart titles. (UI bug fails to show All Dates as selected when filters are reset.)", ""], ["aggregation", "[expr:aggregation]", 2.907, 2.226, "2017-03-06", "parameterized SQL expressions", "single, REQUIRED", "yes as [aggregation]", "yes probably", "one could argue this is a map of { label : formatter } of which you can pick 1..1 entry to apply. If not selected or reset, defaults to Daily (though UI bug does not mark Daily in aggregation area).", ""], ["snippet", "[foo]", 3.804, 2.226, "2017-03-07", "static SQL expressions", "n/a", "n/a", "no", ""], ["formatter", "[expr:foo]", 4.701, 2.226, "2017-03-08", "parameterized SQL expressions", "n/a", "n/a", "yes", ""], ["automatic join", "[foo+bar]", 5.598, 2.226, "2017-03-09", "SQL expression with two parameters!", "n/a", "n/a", "???", "uses key name conventions as rules to determine join columns", ""], ["Proposed Thingy", "Syntax", 6.495, 2.226, "2017-03-10", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", ""], ["parameterized snippet", "[expr::foo]", 7.392, 2.226, "2017-03-11", "parameterized SQL expressions", "n/a", "n/a", "", "Syntax not decided yet; unique among macro types in that it can evaluate all other macro types when rendering", ""], ["filter as SQL expression", "[expr=foo]", 8.289, 2.226, "2017-03-12", "static SQL expression", "multiple", "no", "no", "map of { label : expression } of which you can pick 0..N-1. their expressions just get ORed together.", ""], ["", "", "", "", "", "", "", "", "", "", ""]]} 2 | -------------------------------------------------------------------------------- /tests/sheet_contents_formulas.json: -------------------------------------------------------------------------------- 1 | {"range": "Sheet1!A1:K10", "majorDimension": "ROWS", "values": [["Thingy", "Syntax", "Numeric Column", "Formula Column", "Date Column", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", ""], ["filter", "[expr=foo]", "[1, 2, 3]", "=R[0]C[-1]*2", "2017-03-04", "literals", "multiple", "no", "3e50", ""], ["'+", "[expr=daterange]", 2.01, "=R[0]C[-1]*2", "2017-03-05", "sometimes-parameterized SQL expressions brincolín", "single, REQUIRED", "yes as [daterange]", "yes probably", "static SQL but uses account's timezone setting to determine what SQL intervals to emit; Custom Date Range selection exposes two date pickers and provides the values. This is the only case I can see where more than one parameter is needed. Also, start date or end date can be empty, which should remove its clauses entirely. If not selected or reset, defaults to \"All Dates\" which appears as \"all time\" in chart titles. (UI bug fails to show All Dates as selected when filters are reset.)", ""], ["aggregation", "[expr:aggregation]", 2.907, "=R[0]C[-1]*2", "2017-03-06", "parameterized SQL expressions", "single, REQUIRED", "yes as [aggregation]", "yes probably", "one could argue this is a map of { label : formatter } of which you can pick 1..1 entry to apply. If not selected or reset, defaults to Daily (though UI bug does not mark Daily in aggregation area).", ""], ["snippet", "[foo]", 3.804, "=R[0]C[-1]*2", "2017-03-07", "static SQL expressions", "n/a", "n/a", "no", ""], ["formatter", "[expr:foo]", 4.701, "=R[0]C[-1]*2", "2017-03-08", "parameterized SQL expressions", "n/a", "n/a", "yes", ""], ["automatic join", "[foo+bar]", 5.598, "=R[0]C[-1]*2", "2017-03-09", "SQL expression with two parameters!", "n/a", "n/a", "???", "uses key name conventions as rules to determine join columns", ""], ["Proposed Thingy", "Syntax", 6.495, "=R[0]C[-1]*2", "2017-03-10", "Values are...", "Selection", "Label(s) referencible in chart title", "Dialect-specific implementations", "Notes", ""], ["parameterized snippet", "[expr::foo]", 7.392, "=R[0]C[-1]*2", "2017-03-11", "parameterized SQL expressions", "n/a", "n/a", "", "Syntax not decided yet; unique among macro types in that it can evaluate all other macro types when rendering", ""], ["filter as SQL expression", "[expr=foo]", 8.289, "=R[0]C[-1]*2", "2017-03-12", "static SQL expression", "multiple", "no", "no", "map of { label : expression } of which you can pick 0..N-1. their expressions just get ORed together.", ""], ["", "", "", "", "", "", "", "", "", "", ""]]} 2 | -------------------------------------------------------------------------------- /tests/tests.config.example: -------------------------------------------------------------------------------- 1 | [Spreadsheet] 2 | id: 1P3rdCDxfO760TJdE-cbi0k_yy9vmC-joapjuGw9vNjc 3 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | env_list = 3 | 3.8 4 | 3.13 5 | minversion = 4.24.2 6 | 7 | [testenv] 8 | description = run the tests with pytest 9 | package = wheel 10 | wheel_build_env = .pkg 11 | deps = 12 | pytest>=6 13 | coverage 14 | oauth2client 15 | !3.8: pandas>=2.0.0 16 | 3.8: pandas<2.0.0 17 | commands = 18 | coverage erase 19 | coverage run -m pytest {tty:--color=yes} tests/gspread_dataframe_test.py tests/gspread_dataframe_integration.py {posargs} 20 | coverage report --omit='tests/*' 21 | 22 | [gh-actions] 23 | python = 24 | 3.8: py38 25 | 3.13: py313 26 | --------------------------------------------------------------------------------