├── .github └── workflows │ └── tests.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── README.md ├── docs ├── .gitignore ├── Makefile ├── conf.py └── index.rst ├── json_tricks ├── __init__.py ├── _version.py ├── comment.py ├── decoders.py ├── encoders.py ├── nonp.py ├── np.py ├── np_utils.py └── utils.py ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── run_locally.md ├── test_bare.py ├── test_class.py ├── test_enum.py ├── test_meta.py ├── test_np.py ├── test_pandas.py ├── test_pathlib.py ├── test_slice.py ├── test_tz.py └── test_utils.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | 2 | name: 'pyjson-tricks' 3 | 4 | on: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | 10 | jobs: 11 | build: 12 | name: tests 13 | runs-on: ubuntu-latest 14 | strategy: 15 | max-parallel: 8 16 | fail-fast: false 17 | matrix: 18 | libraries: [ 19 | 'vanilla', 20 | 'tz', 21 | 'path', 22 | 'numpy', 23 | 'pandas', 24 | 'all' 25 | ] 26 | python-version: [ 27 | '3.7', 28 | '3.8', 29 | '3.9', 30 | '3.10', 31 | '3.11' 32 | ] 33 | steps: 34 | - uses: actions/checkout@v1 35 | - name: Set up Python ${{ matrix.python-version }} 36 | uses: actions/setup-python@v2 37 | with: 38 | python-version: ${{ matrix.python-version }} 39 | - name: Install dependencies 40 | run: | 41 | python -m pip install --upgrade pip 42 | pip install pytest 43 | if [ "${{ matrix.python-version }}" == "2.7" ] ; then 44 | pip install enum34 45 | fi 46 | export LIBS="${{ matrix.libraries }}" 47 | if [ "$LIBS" == "tz" ] || [ "$LIBS" == "all" ] ; then 48 | pip install pytz 49 | fi 50 | if [ "$LIBS" == "path" ] || [ "$LIBS" == "all" ] ; then 51 | pip install pathlib 52 | fi 53 | if [ "$LIBS" == "numpy" ] || [ "$LIBS" == "all" ] ; then 54 | pip install numpy 55 | fi 56 | if [ "$LIBS" == "pandas" ] || [ "$LIBS" == "all" ] ; then 57 | pip install pandas 58 | fi 59 | - name: Run tests 60 | run: | 61 | python --version 62 | PYTEST_ARGS='-v --strict tests/test_bare.py tests/test_class.py tests/test_meta.py tests/test_enum.py' 63 | export LIBS="${{ matrix.libraries }}" 64 | if [ "$LIBS" == "vanilla" ] ; then 65 | py.test $PYTEST_ARGS 66 | elif [ "$LIBS" == "tz" ] ; then 67 | py.test $PYTEST_ARGS tests/test_tz.py 68 | elif [ "$LIBS" == "path" ] ; then 69 | py.test $PYTEST_ARGS tests/test_pathlib.py 70 | elif [ "$LIBS" == "numpy" ] ; then 71 | py.test $PYTEST_ARGS tests/test_np.py 72 | elif [ "$LIBS" == "pandas" ] ; then 73 | py.test $PYTEST_ARGS tests/test_pandas.py 74 | elif [ "$LIBS" == "all" ] ; then 75 | py.test -v --strict 76 | else 77 | echo "UNKNOWN LIBRARY '$LIBS'" 78 | exit 1 79 | fi 80 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.egg-info 4 | *.swp 5 | /.idea/ 6 | *.sqlite3 7 | /.pypirc 8 | /dist/ 9 | /build/ 10 | /MANIFEST 11 | /.cache/ 12 | /try.py 13 | .pytest_cache/ 14 | 15 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of conduct 2 | 3 | **Just be nice.** 4 | 5 | Saturday Morning Breakfast Serial summarized it well: 6 | 7 | [![image](https://www.smbc-comics.com/comics/20090727.gif)](https://www.smbc-comics.com/comic/2009-07-27) 8 | 9 | I do not expect that the type of people that behaves badly will read this. 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions are very welcome! Bug reports, feature suggestions and 4 | code contributions help this project become more useful for everyone! 5 | 6 | Contributing can be done through: 7 | 8 | * Reporting bugs ([issues](https://github.com/mverleg/pyjson_tricks/issues)) 9 | * Suggesting features ([issues](https://github.com/mverleg/pyjson_tricks/issues)) 10 | * Fixing bugs ([pull request](https://github.com/mverleg/pyjson_tricks/pulls)) 11 | * Implementing features ([pull request](https://github.com/mverleg/pyjson_tricks/pulls)) 12 | * Reviewing a [pull request](https://github.com/mverleg/pyjson_tricks/pulls) 13 | * Telling a friend :) 14 | 15 | There are only few things to keep in mind: 16 | 17 | * Your contributions become [BSD-licensed](https://github.com/mverleg/pyjson_tricks/blob/master/LICENSE.txt) 18 | * Discuss features in an issue before contributing code. 19 | * Automated tests are required to go live, not necessarily to commit. 20 | * Try to follow the [code conventions](https://www.python.org/dev/peps/pep-0008/). 21 | * [Be nice](https://github.com/mverleg/pyjson_tricks/blob/master/CODE_OF_CONDUCT.rst). 22 | 23 | Feel free to add yourself in the README if you send a pull request! 24 | Either at the specific feature ("thanks to NAME"), or in "Usage & 25 | contributions". 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | LICENSE: BSD-3-Clause 2 | 3 | 4 | Copyright (c) 2023 Mark V. All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, 7 | are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its contributors 17 | may be used to endorse or promote products derived from this software without 18 | specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 24 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 29 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JSON tricks (python) 2 | 3 | The [pyjson-tricks] package brings several pieces of 4 | functionality to python handling of json files: 5 | 6 | 1. **Store and load numpy arrays** in human-readable format. 7 | 2. **Store and load class instances** both generic and customized. 8 | 3. **Store and load date/times** as a dictionary (including timezone). 9 | 4. **Preserve map order** `{}` using `OrderedDict`. 10 | 5. **Allow for comments** in json files by starting lines with `#`. 11 | 6. Sets, complex numbers, Decimal, Fraction, enums, compression, 12 | duplicate keys, pathlib Paths, bytes ... 13 | 14 | As well as compression and disallowing duplicate keys. 15 | 16 | * Code: 17 | * Documentation: 18 | * PIP: 19 | 20 | Several keys of the format `__keyname__` have special meanings, and more 21 | might be added in future releases. 22 | 23 | If you're considering JSON-but-with-comments as a config file format, 24 | have a look at [HJSON](https://github.com/hjson/hjson-py), it might be 25 | more appropriate. For other purposes, keep reading! 26 | 27 | Thanks for all the Github stars⭐! 28 | 29 | # Installation and use 30 | 31 | You can install using 32 | 33 | ``` bash 34 | pip install json-tricks 35 | ``` 36 | 37 | Decoding of some data types needs the corresponding package to be 38 | installed, e.g. `numpy` for arrays, `pandas` for dataframes and `pytz` 39 | for timezone-aware datetimes. 40 | 41 | You can import the usual json functions dump(s) and load(s), as well as 42 | a separate comment removal function, as follows: 43 | 44 | ``` bash 45 | from json_tricks import dump, dumps, load, loads, strip_comments 46 | ``` 47 | 48 | The exact signatures of these and other functions are in the [documentation](http://json-tricks.readthedocs.org/en/latest/#main-components). 49 | 50 | Quite some older versions of Python are supported. For an up-to-date list see [the automated tests](./.github/workflows/tests.yml). 51 | 52 | # Features 53 | 54 | ## Numpy arrays 55 | 56 | When not compressed, the array is encoded in sort-of-readable and very 57 | flexible and portable format, like so: 58 | 59 | ``` python 60 | arr = arange(0, 10, 1, dtype=uint8).reshape((2, 5)) 61 | print(dumps({'mydata': arr})) 62 | ``` 63 | 64 | this yields: 65 | 66 | ``` javascript 67 | { 68 | "mydata": { 69 | "dtype": "uint8", 70 | "shape": [2, 5], 71 | "Corder": true, 72 | "__ndarray__": [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] 73 | } 74 | } 75 | ``` 76 | 77 | which will be converted back to a numpy array when using 78 | `json_tricks.loads`. Note that the memory order (`Corder`) is only 79 | stored in v3.1 and later and for arrays with at least 2 dimensions. 80 | 81 | As you see, this uses the magic key `__ndarray__`. Don't use 82 | `__ndarray__` as a dictionary key unless you're trying to make a numpy 83 | array (and know what you're doing). 84 | 85 | Numpy scalars are also serialized (v3.5+). They are represented by the 86 | closest python primitive type. A special representation was not 87 | feasible, because Python's json implementation serializes some numpy 88 | types as primitives, without consulting custom encoders. If you want to 89 | preserve the exact numpy type, use 90 | [encode_scalars_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace). 91 | 92 | There is also a compressed format (thanks `claydugo` for fix). From 93 | the next major release, this will be default when using compression. 94 | For now, you can use it as: 95 | 96 | ``` python 97 | dumps(data, compression=True, properties={'ndarray_compact': True}) 98 | ``` 99 | 100 | This compressed format encodes the array data in base64, with gzip 101 | compression for the array, unless 1) compression has little effect for 102 | that array, or 2) the whole file is already compressed. If you only want 103 | compact format for large arrays, pass the number of elements to 104 | `ndarray_compact`. 105 | 106 | Example: 107 | 108 | ``` python 109 | data = [linspace(0, 10, 9), array([pi, exp(1)])] 110 | dumps(data, compression=False, properties={'ndarray_compact': 8}) 111 | 112 | [{ 113 | "__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgQAZf7CE0iwOE5oPSIlBaEkrLQegGRShfxQEAz7QFikgAAAA=", 114 | "dtype": "float64", 115 | "shape": [9] 116 | }, { 117 | "__ndarray__": [3.141592653589793, 2.718281828459045], 118 | "dtype": "float64", 119 | "shape": [2] 120 | }] 121 | ``` 122 | 123 | ## Class instances 124 | 125 | `json_tricks` can serialize class instances. 126 | 127 | If the class behaves normally (not generated dynamic, no `__new__` or 128 | `__metaclass__` magic, etc) *and* all it's attributes are serializable, 129 | then this should work by default. 130 | 131 | ``` python 132 | # json_tricks/test_class.py 133 | class MyTestCls: 134 | def __init__(self, **kwargs): 135 | for k, v in kwargs.items(): 136 | setattr(self, k, v) 137 | 138 | cls_instance = MyTestCls(s='ub', dct={'7': 7}) 139 | 140 | json = dumps(cls_instance, indent=4) 141 | cls_instance_again = loads(json) 142 | ``` 143 | 144 | You'll get your instance back. Here the json looks like this: 145 | 146 | ``` javascript 147 | { 148 | "__instance_type__": [ 149 | "json_tricks.test_class", 150 | "MyTestCls" 151 | ], 152 | "attributes": { 153 | "s": "ub", 154 | "dct": { 155 | "7": 7 156 | } 157 | } 158 | } 159 | ``` 160 | 161 | As you can see, this stores the module and class name. The class must be 162 | importable from the same module when decoding (and should not have 163 | changed). If it isn't, you have to manually provide a dictionary to 164 | `cls_lookup_map` when loading in which the class name can be looked up. 165 | Note that if the class is imported, then `globals()` is such a 166 | dictionary (so try `loads(json, cls_lookup_map=glboals())`). Also note 167 | that if the class is defined in the 'top' script (that you're calling 168 | directly), then this isn't a module and the import part cannot be 169 | extracted. Only the class name will be stored; it can then only be 170 | deserialized in the same script, or if you provide `cls_lookup_map`. 171 | 172 | Note that this also works with `slots` without having to do anything 173 | (thanks to `koffie` and `dominicdoty`), which encodes like this (custom 174 | indentation): 175 | 176 | ``` javascript 177 | { 178 | "__instance_type__": ["module.path", "ClassName"], 179 | "slots": {"slotattr": 37}, 180 | "attributes": {"dictattr": 42} 181 | } 182 | ``` 183 | 184 | If the instance doesn't serialize automatically, or if you want custom 185 | behaviour, then you can implement `__json__encode__(self)` and 186 | `__json_decode__(self, **attributes)` methods, like so: 187 | 188 | ``` python 189 | class CustomEncodeCls: 190 | def __init__(self): 191 | self.relevant = 42 192 | self.irrelevant = 37 193 | 194 | def __json_encode__(self): 195 | # should return primitive, serializable types like dict, list, int, string, float... 196 | return {'relevant': self.relevant} 197 | 198 | def __json_decode__(self, **attrs): 199 | # should initialize all properties; note that __init__ is not called implicitly 200 | self.relevant = attrs['relevant'] 201 | self.irrelevant = 12 202 | ``` 203 | 204 | As you've seen, this uses the magic key `__instance_type__`. Don't use 205 | `__instance_type__` as a dictionary key unless you know what you're 206 | doing. 207 | 208 | ## Date, time, datetime and timedelta 209 | 210 | Date, time, datetime and timedelta objects are stored as dictionaries of 211 | "day", "hour", "millisecond" etc keys, for each nonzero property. 212 | 213 | Timezone name is also stored in case it is set, as is DST (thanks `eumir`). 214 | You'll need to have `pytz` installed to use timezone-aware date/times, 215 | it's not needed for naive date/times. 216 | 217 | ``` javascript 218 | { 219 | "__datetime__": null, 220 | "year": 1988, 221 | "month": 3, 222 | "day": 15, 223 | "hour": 8, 224 | "minute": 3, 225 | "second": 59, 226 | "microsecond": 7, 227 | "tzinfo": "Europe/Amsterdam" 228 | } 229 | ``` 230 | 231 | This approach was chosen over timestamps for readability and consistency 232 | between date and time, and over a single string to prevent parsing 233 | problems and reduce dependencies. Note that if `primitives=True`, 234 | date/times are encoded as ISO 8601, but they won't be restored 235 | automatically. 236 | 237 | Don't use `__date__`, `__time__`, `__datetime__`, `__timedelta__` or 238 | `__tzinfo__` as dictionary keys unless you know what you're doing, as 239 | they have special meaning. 240 | 241 | ## Order 242 | 243 | Given an ordered dictionary like this (see the tests for a longer one): 244 | 245 | ``` python 246 | ordered = OrderedDict(( 247 | ('elephant', None), 248 | ('chicken', None), 249 | ('tortoise', None), 250 | )) 251 | ``` 252 | 253 | Converting to json and back will preserve the order: 254 | 255 | ``` python 256 | from json_tricks import dumps, loads 257 | json = dumps(ordered) 258 | ordered = loads(json, preserve_order=True) 259 | ``` 260 | 261 | where `preserve_order=True` is added for emphasis; it can be left out 262 | since it's the default. 263 | 264 | As a note on [performance](http://stackoverflow.com/a/8177061/723090), 265 | both dicts and OrderedDicts have the same scaling for getting and 266 | setting items (`O(1)`). In Python versions before 3.5, OrderedDicts were 267 | implemented in Python rather than C, so were somewhat slower; since 268 | Python 3.5 both are implemented in C. In summary, you should have no 269 | scaling problems and probably no performance problems at all, especially 270 | in Python 3. Python 3.6+ preserves order of dictionaries by default 271 | making this redundant, but this is an implementation detail that should 272 | not be relied on. 273 | 274 | ## Comments 275 | 276 | *Warning: in the next major version, comment parsing will be opt-in, not 277 | default anymore (for performance reasons). Update your code now to pass 278 | `ignore_comments=True` explicitly if you want comment parsing.* 279 | 280 | This package uses `#` and `//` for comments, which seem to be the most 281 | common conventions, though only the latter is valid javascript. 282 | 283 | For example, you could call `loads` on the following string: 284 | 285 | { # "comment 1 286 | "hello": "Wor#d", "Bye": ""M#rk"", "yes\\"": 5,# comment" 2 287 | "quote": ""th#t's" what she said", // comment "3" 288 | "list": [1, 1, "#", """, "\", 8], "dict": {"q": 7} #" comment 4 with quotes 289 | } 290 | // comment 5 291 | 292 | And it would return the de-commented version: 293 | 294 | ``` javascript 295 | { 296 | "hello": "Wor#d", "Bye": ""M#rk"", "yes\\"": 5, 297 | "quote": ""th#t's" what she said", 298 | "list": [1, 1, "#", """, "\", 8], "dict": {"q": 7} 299 | } 300 | ``` 301 | 302 | Since comments aren't stored in the Python representation of the data, 303 | loading and then saving a json file will remove the comments (it also 304 | likely changes the indentation). 305 | 306 | The implementation of comments is a bit crude, which means that there are 307 | some exceptional cases that aren't handled correctly ([#57](https://github.com/mverleg/pyjson_tricks/issues/57)). 308 | 309 | It is also not very fast. For that reason, if `ignore_comments` wasn't 310 | explicitly set to True, then json-tricks first tries to parge without 311 | ignoring comments. If that fails, then it will automatically re-try 312 | with comment handling. This makes the no-comment case faster at the cost 313 | of the comment case, so if you are expecting comments make sure to set 314 | `ignore_comments` to True. 315 | 316 | ## Other features 317 | 318 | * Special floats like `NaN`, `Infinity` and 319 | `-0` using the `allow_nan=True` argument 320 | ([non-standard](https://stackoverflow.com/questions/1423081/json-left-out-infinity-and-nan-json-status-in-ecmascript) 321 | json, may not decode in other implementations). 322 | * Sets are serializable and can be loaded. By default the set json 323 | representation is sorted, to have a consistent representation. 324 | * Save and load complex numbers (py3) with `1+2j` serializing as 325 | `{'__complex__': [1, 2]}`. 326 | * Save and load `Decimal` and `Fraction` (including NaN, infinity, -0 327 | for Decimal). 328 | * Save and load `Enum` (thanks to `Jenselme`), either built-in in 329 | python3.4+, or with the [enum34](https://pypi.org/project/enum34/) 330 | package in earlier versions. `IntEnum` needs 331 | [encode_intenums_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace). 332 | * `json_tricks` allows for gzip compression using the 333 | `compression=True` argument (off by default). 334 | * `json_tricks` can check for duplicate keys in maps by setting 335 | `allow_duplicates` to False. These are [kind of 336 | allowed](http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object), 337 | but are handled inconsistently between json implementations. In 338 | Python, for `dict` and `OrderedDict`, duplicate keys are silently 339 | overwritten. 340 | * Save and load `pathlib.Path` objects (e.g., the current path, 341 | `Path('.')`, serializes as `{"__pathlib__": "."}`) 342 | (thanks to `bburan`). 343 | * Save and load bytes (python 3+ only), which will be encoded as utf8 if 344 | that is valid, or as base64 otherwise. Base64 is always used if 345 | primitives are requested. Serialized as 346 | `[{"__bytes_b64__": "aGVsbG8="}]` vs `[{"__bytes_utf8__": "hello"}]`. 347 | * Save and load slices (thanks to `claydugo`). 348 | 349 | # Preserve type vs use primitive 350 | 351 | By default, types are encoded such that they can be restored to their 352 | original type when loaded with `json-tricks`. Example encodings in this 353 | documentation refer to that format. 354 | 355 | You can also choose to store things as their closest primitive type 356 | (e.g. arrays and sets as lists, decimals as floats). This may be 357 | desirable if you don't care about the exact type, or you are loading 358 | the json in another language (which doesn't restore python types). 359 | It's also smaller. 360 | 361 | To forego meta data and store primitives instead, pass `primitives` to 362 | `dump(s)`. This is available in version `3.8` and later. Example: 363 | 364 | ``` python 365 | data = [ 366 | arange(0, 10, 1, dtype=int).reshape((2, 5)), 367 | datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), 368 | 1 + 2j, 369 | Decimal(42), 370 | Fraction(1, 3), 371 | MyTestCls(s='ub', dct={'7': 7}), # see later 372 | set(range(7)), 373 | ] 374 | # Encode with metadata to preserve types when decoding 375 | print(dumps(data)) 376 | ``` 377 | 378 | ``` javascript 379 | 380 | // (comments added and indenting changed) 381 | [ 382 | // numpy array 383 | { 384 | "__ndarray__": [ 385 | [0, 1, 2, 3, 4], 386 | [5, 6, 7, 8, 9]], 387 | "dtype": "int64", 388 | "shape": [2, 5], 389 | "Corder": true 390 | }, 391 | // datetime (naive) 392 | { 393 | "__datetime__": null, 394 | "year": 2017, 395 | "month": 1, 396 | "day": 19, 397 | "hour": 23 398 | }, 399 | // complex number 400 | { 401 | "__complex__": [1.0, 2.0] 402 | }, 403 | // decimal & fraction 404 | { 405 | "__decimal__": "42" 406 | }, 407 | { 408 | "__fraction__": true 409 | "numerator": 1, 410 | "denominator": 3, 411 | }, 412 | // class instance 413 | { 414 | "__instance_type__": [ 415 | "tests.test_class", 416 | "MyTestCls" 417 | ], 418 | "attributes": { 419 | "s": "ub", 420 | "dct": {"7": 7} 421 | } 422 | }, 423 | // set 424 | { 425 | "__set__": [0, 1, 2, 3, 4, 5, 6] 426 | } 427 | ] 428 | ``` 429 | 430 | ``` python 431 | # Encode as primitive types; more simple but loses type information 432 | print(dumps(data, primitives=True)) 433 | ``` 434 | 435 | ``` javascript 436 | // (comments added and indentation changed) 437 | [ 438 | // numpy array 439 | [[0, 1, 2, 3, 4], 440 | [5, 6, 7, 8, 9]], 441 | // datetime (naive) 442 | "2017-01-19T23:00:00", 443 | // complex number 444 | [1.0, 2.0], 445 | // decimal & fraction 446 | 42.0, 447 | 0.3333333333333333, 448 | // class instance 449 | { 450 | "s": "ub", 451 | "dct": {"7": 7} 452 | }, 453 | // set 454 | [0, 1, 2, 3, 4, 5, 6] 455 | ] 456 | ``` 457 | 458 | Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. 459 | 460 | Note that valid json is produced either way: `json-tricks` stores meta 461 | data as normal json, but other packages probably won't interpret it. 462 | 463 | # Usage & contributions 464 | 465 | Code is under [Revised BSD License](LICENSE.txt) 466 | so you can use it for most purposes including commercially. 467 | 468 | Contributions are very welcome! Bug reports, feature suggestions and 469 | code contributions help this project become more useful for everyone! 470 | There is a short [contribution 471 | guide](CONTRIBUTING.md). 472 | 473 | Contributors not yet mentioned: `janLo` (performance boost). 474 | 475 | # Tests 476 | 477 | Tests are run automatically for commits to the repository for all 478 | supported versions. This is the status: 479 | 480 | ![image](https://github.com/mverleg/pyjson_tricks/workflows/pyjson-tricks/badge.svg?branch=master) 481 | 482 | To run the tests manually for your version, see [this guide](tests/run_locally.md). -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | _static/ 3 | _templates/ 4 | 5 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/json-tricks.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/json-tricks.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/json-tricks" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/json-tricks" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its 4 | # containing dir. 5 | # 6 | # Note that not all possible configuration values are present in this 7 | # autogenerated file. 8 | # 9 | # All configuration values have a default; values that are commented out 10 | # serve to show the default. 11 | 12 | import sys 13 | import os 14 | import shlex 15 | from os.path import abspath 16 | 17 | 18 | sys.path.insert(0, abspath('..')) 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | #sys.path.insert(0, os.path.abspath('.')) 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | #needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.viewcode', 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ['_templates'] 40 | 41 | # The suffix(es) of source filenames. 42 | # You can specify multiple suffix as a list of string: 43 | # source_suffix = ['.rst', '.md'] 44 | source_suffix = '.rst' 45 | 46 | # The encoding of source files. 47 | #source_encoding = 'utf-8-sig' 48 | 49 | # The master toctree document. 50 | master_doc = 'index' 51 | 52 | # General information about the project. 53 | project = u'json-tricks' 54 | copyright = u'2017, Mark' 55 | author = u'Mark' 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | version = '1.2' 63 | # The full version, including alpha/beta/rc tags. 64 | release = version 65 | 66 | # The language for content autogenerated by Sphinx. Refer to documentation 67 | # for a list of supported languages. 68 | # 69 | # This is also used if you do content translation via gettext catalogs. 70 | # Usually you set "language" from the command line for these cases. 71 | language = None 72 | 73 | # There are two options for replacing |today|: either, you set today to some 74 | # non-false value, then it is used: 75 | #today = '' 76 | # Else, today_fmt is used as the format for a strftime call. 77 | #today_fmt = '%B %d, %Y' 78 | 79 | # List of patterns, relative to source directory, that match files and 80 | # directories to ignore when looking for source files. 81 | exclude_patterns = ['_build'] 82 | 83 | # The reST default role (used for this markup: `text`) to use for all 84 | # documents. 85 | #default_role = None 86 | 87 | # If true, '()' will be appended to :func: etc. cross-reference text. 88 | #add_function_parentheses = True 89 | 90 | # If true, the current module name will be prepended to all description 91 | # unit titles (such as .. function::). 92 | #add_module_names = True 93 | 94 | # If true, sectionauthor and moduleauthor directives will be shown in the 95 | # output. They are ignored by default. 96 | #show_authors = False 97 | 98 | # The name of the Pygments (syntax highlighting) style to use. 99 | pygments_style = 'sphinx' 100 | 101 | # A list of ignored prefixes for module index sorting. 102 | #modindex_common_prefix = [] 103 | 104 | # If true, keep warnings as "system message" paragraphs in the built documents. 105 | #keep_warnings = False 106 | 107 | # If true, `to do` and `todoList` produce output, else they produce nothing. 108 | todo_include_todos = False 109 | 110 | 111 | # -- Options for HTML output ---------------------------------------------- 112 | 113 | # The theme to use for HTML and HTML Help pages. See the documentation for 114 | # a list of builtin themes. 115 | html_theme = 'alabaster' 116 | 117 | # Theme options are theme-specific and customize the look and feel of a theme 118 | # further. For a list of options available for each theme, see the 119 | # documentation. 120 | #html_theme_options = {} 121 | 122 | # Add any paths that contain custom themes here, relative to this directory. 123 | #html_theme_path = [] 124 | 125 | # The name for this set of Sphinx documents. If None, it defaults to 126 | # " v documentation". 127 | #html_title = None 128 | 129 | # A shorter title for the navigation bar. Default is the same as html_title. 130 | #html_short_title = None 131 | 132 | # The name of an image file (relative to this directory) to place at the top 133 | # of the sidebar. 134 | #html_logo = None 135 | 136 | # The name of an image file (within the static path) to use as favicon of the 137 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 138 | # pixels large. 139 | #html_favicon = None 140 | 141 | # Add any paths that contain custom static files (such as style sheets) here, 142 | # relative to this directory. They are copied after the builtin static files, 143 | # so a file named "default.css" will overwrite the builtin "default.css". 144 | html_static_path = ['_static'] 145 | 146 | # Add any extra paths that contain custom files (such as robots.txt or 147 | # .htaccess) here, relative to this directory. These files are copied 148 | # directly to the root of the documentation. 149 | #html_extra_path = [] 150 | 151 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 152 | # using the given strftime format. 153 | #html_last_updated_fmt = '%b %d, %Y' 154 | 155 | # If true, SmartyPants will be used to convert quotes and dashes to 156 | # typographically correct entities. 157 | #html_use_smartypants = True 158 | 159 | # Custom sidebar templates, maps document names to template names. 160 | #html_sidebars = {} 161 | 162 | # Additional templates that should be rendered to pages, maps page names to 163 | # template names. 164 | #html_additional_pages = {} 165 | 166 | # If false, no module index is generated. 167 | #html_domain_indices = True 168 | 169 | # If false, no index is generated. 170 | #html_use_index = True 171 | 172 | # If true, the index is split into individual pages for each letter. 173 | #html_split_index = False 174 | 175 | # If true, links to the reST sources are added to the pages. 176 | #html_show_sourcelink = True 177 | 178 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 179 | #html_show_sphinx = True 180 | 181 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 182 | #html_show_copyright = True 183 | 184 | # If true, an OpenSearch description file will be output, and all pages will 185 | # contain a tag referring to it. The value of this option must be the 186 | # base URL from which the finished HTML is served. 187 | #html_use_opensearch = '' 188 | 189 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 190 | #html_file_suffix = None 191 | 192 | # Language to be used for generating the HTML full-text search index. 193 | # Sphinx supports the following languages: 194 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 195 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' 196 | #html_search_language = 'en' 197 | 198 | # A dictionary with options for the search language support, empty by default. 199 | # Now only 'ja' uses this config value 200 | #html_search_options = {'type': 'default'} 201 | 202 | # The name of a javascript file (relative to the configuration directory) that 203 | # implements a search results scorer. If empty, the default will be used. 204 | #html_search_scorer = 'scorer.js' 205 | 206 | # Output file base name for HTML help builder. 207 | htmlhelp_basename = 'json-tricksdoc' 208 | 209 | # -- Options for LaTeX output --------------------------------------------- 210 | 211 | latex_elements = { 212 | # The paper size ('letterpaper' or 'a4paper'). 213 | #'papersize': 'letterpaper', 214 | 215 | # The font size ('10pt', '11pt' or '12pt'). 216 | #'pointsize': '10pt', 217 | 218 | # Additional stuff for the LaTeX preamble. 219 | #'preamble': '', 220 | 221 | # Latex figure (float) alignment 222 | #'figure_align': 'htbp', 223 | } 224 | 225 | # Grouping the document tree into LaTeX files. List of tuples 226 | # (source start file, target name, title, 227 | # author, documentclass [howto, manual, or own class]). 228 | latex_documents = [ 229 | (master_doc, 'json-tricks.tex', u'json-tricks Documentation', 230 | u'Mark', 'manual'), 231 | ] 232 | 233 | # The name of an image file (relative to this directory) to place at the top of 234 | # the title page. 235 | #latex_logo = None 236 | 237 | # For "manual" documents, if this is true, then toplevel headings are parts, 238 | # not chapters. 239 | #latex_use_parts = False 240 | 241 | # If true, show page references after internal links. 242 | #latex_show_pagerefs = False 243 | 244 | # If true, show URL addresses after external links. 245 | #latex_show_urls = False 246 | 247 | # Documents to append as an appendix to all manuals. 248 | #latex_appendices = [] 249 | 250 | # If false, no module index is generated. 251 | #latex_domain_indices = True 252 | 253 | 254 | # -- Options for manual page output --------------------------------------- 255 | 256 | # One entry per manual page. List of tuples 257 | # (source start file, name, description, authors, manual section). 258 | man_pages = [ 259 | (master_doc, 'json-tricks', u'json-tricks Documentation', 260 | [author], 1) 261 | ] 262 | 263 | # If true, show URL addresses after external links. 264 | #man_show_urls = False 265 | 266 | 267 | # -- Options for Texinfo output ------------------------------------------- 268 | 269 | # Grouping the document tree into Texinfo files. List of tuples 270 | # (source start file, target name, title, author, 271 | # dir menu entry, description, category) 272 | texinfo_documents = [ 273 | (master_doc, 'json-tricks', u'json-tricks Documentation', 274 | author, 'json-tricks', 'One line description of project.', 275 | 'Miscellaneous'), 276 | ] 277 | 278 | # Documents to append as an appendix to all manuals. 279 | #texinfo_appendices = [] 280 | 281 | # If false, no module index is generated. 282 | #texinfo_domain_indices = True 283 | 284 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 285 | #texinfo_show_urls = 'footnote' 286 | 287 | # If true, do not generate a @detailmenu in the "Top" node's menu. 288 | #texinfo_no_detailmenu = False 289 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | .. include:: ../README.rst 3 | 4 | Main components 5 | --------------------------------------- 6 | 7 | Support for numpy, pandas and other libraries should work automatically if those libraries are installed. They are not installed automatically as dependencies because `json-tricks` can be used without them. 8 | 9 | dumps 10 | +++++++++++++++++++++++++++++++++++++++ 11 | 12 | .. autofunction:: json_tricks.nonp.dumps 13 | 14 | .. autofunction:: json_tricks.np.dumps 15 | 16 | dump 17 | +++++++++++++++++++++++++++++++++++++++ 18 | 19 | .. autofunction:: json_tricks.nonp.dump 20 | 21 | .. autofunction:: json_tricks.np.dump 22 | 23 | loads 24 | +++++++++++++++++++++++++++++++++++++++ 25 | 26 | .. autofunction:: json_tricks.nonp.loads 27 | 28 | .. autofunction:: json_tricks.np.loads 29 | 30 | load 31 | +++++++++++++++++++++++++++++++++++++++ 32 | 33 | .. autofunction:: json_tricks.nonp.load 34 | 35 | .. autofunction:: json_tricks.np.load 36 | 37 | Utilities 38 | --------------------------------------- 39 | 40 | strip comments 41 | +++++++++++++++++++++++++++++++++++++++ 42 | 43 | .. autofunction:: json_tricks.comment.strip_comments 44 | 45 | numpy 46 | +++++++++++++++++++++++++++++++++++++++ 47 | 48 | .. autofunction:: json_tricks.np.numpy_encode 49 | 50 | .. autofunction:: json_tricks.np.json_numpy_obj_hook 51 | 52 | class instances 53 | +++++++++++++++++++++++++++++++++++++++ 54 | 55 | .. autofunction:: json_tricks.encoders.class_instance_encode 56 | 57 | .. autoclass:: json_tricks.decoders.ClassInstanceHook 58 | 59 | enum instances 60 | +++++++++++++++++++++++++++++++++++++++ 61 | 62 | Support for enums was added in Python 3.4. Support for previous versions of Python is available with the `enum 34`_ package. 63 | 64 | .. autofunction:: json_tricks.encoders.enum_instance_encode 65 | 66 | .. autoclass:: json_tricks.decoders.EnumInstanceHook 67 | 68 | By default ``IntEnum`` cannot be encoded as enums since they cannot be differenciated from integers. To serialize them, you must use `encode_intenums_inplace` which mutates a nested data structure (in place!) to replace any ``IntEnum`` by their representation. If you serialize this result, it can subsequently be loaded without further adaptations. 69 | 70 | .. autofunction:: json_tricks.utils.encode_intenums_inplace 71 | 72 | date/time 73 | +++++++++++++++++++++++++++++++++++++++ 74 | 75 | .. autofunction:: json_tricks.encoders.json_date_time_encode 76 | 77 | .. autofunction:: json_tricks.decoders.json_date_time_hook 78 | 79 | numpy scalars 80 | +++++++++++++++++++++++++++++++++++++++ 81 | 82 | It's not possible (without a lot of hacks) to encode numpy scalars. This is the case because some numpy scalars (`float64`, and depending on Python version also `int64`) are subclasses of `float` and `int`. This means that the Python json encoder will stringify them without them ever reaching the custom encoders. 83 | 84 | So if you really want to encode numpy scalars, you'll have to do the conversion beforehand. For that purpose you can use `encode_scalars_inplace`, which mutates a nested data structure (in place!) to replace any numpy scalars by their representation. If you serialize this result, it can subsequently be loaded without further adaptations. 85 | 86 | It's not great, but unless the Python json module changes, it's the best that can be done. See `issue 18`_ for more details. 87 | 88 | .. autofunction:: json_tricks.np_utils.encode_scalars_inplace 89 | 90 | Table of content 91 | --------------------------------------- 92 | 93 | This is a simple module so the documentation is single-page. 94 | 95 | .. toctree:: 96 | :maxdepth: 2 97 | 98 | 99 | .. _`issue 18`: https://github.com/mverleg/pyjson_tricks/issues/18 100 | .. _`enum 34`: https://pypi.org/project/enum34/ 101 | 102 | 103 | -------------------------------------------------------------------------------- /json_tricks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | try: 3 | from json import JSONDecodeError # imported for convenience 4 | except ImportError: 5 | """ Older versions of Python use ValueError, of which JSONDecodeError is a subclass; it's recommended to catch ValueError. """ 6 | from .utils import hashodict, NoEnumException, NoNumpyException, NoPandasException, get_scalar_repr, encode_intenums_inplace, encode_scalars_inplace 7 | from .comment import strip_comment_line_with_symbol, strip_comments 8 | from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, json_complex_encode, \ 9 | numeric_types_encode, ClassInstanceEncoder, json_set_encode, pandas_encode, nopandas_encode, \ 10 | numpy_encode, NumpyEncoder, nonumpy_encode, NoNumpyEncoder, fallback_ignore_unknown, pathlib_encode, \ 11 | bytes_encode, slice_encode 12 | from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, json_complex_hook, \ 13 | numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \ 14 | json_nonumpy_obj_hook, pathlib_hook, json_bytes_hook 15 | from .nonp import dumps, dump, loads, load 16 | from ._version import VERSION 17 | 18 | __version__ = VERSION 19 | 20 | 21 | try: 22 | # find_module takes just as long as importing, so no optimization possible 23 | import numpy 24 | except ImportError: 25 | NUMPY_MODE = False 26 | # from .nonp import dumps, dump, loads, load, nonumpy_encode as numpy_encode, json_nonumpy_obj_hook as json_numpy_obj_hook 27 | else: 28 | NUMPY_MODE = True 29 | # from .np import dumps, dump, loads, load, numpy_encode, NumpyEncoder, json_numpy_obj_hook 30 | # from .np_utils import encode_scalars_inplace 31 | 32 | 33 | -------------------------------------------------------------------------------- /json_tricks/_version.py: -------------------------------------------------------------------------------- 1 | 2 | VERSION = '3.17.2' 3 | 4 | -------------------------------------------------------------------------------- /json_tricks/comment.py: -------------------------------------------------------------------------------- 1 | 2 | from re import findall 3 | 4 | 5 | def strip_comment_line_with_symbol(line, start): 6 | parts = line.split(start) 7 | counts = [len(findall(r'(?:^|[^"\\]|(?:\\\\|\\")+)(")(?!")', part)) for part in parts] 8 | total = 0 9 | for nr, count in enumerate(counts): 10 | total += count 11 | if total % 2 == 0: 12 | return start.join(parts[:nr+1]).rstrip() 13 | else: 14 | return line.rstrip() 15 | 16 | 17 | def strip_comments(string, comment_symbols=frozenset(('#', '//'))): 18 | """ 19 | Stripping comments usually works, but there are a few edge cases that trip it up, like https://github.com/mverleg/pyjson_tricks/issues/57. 20 | 21 | :param string: A string containing json with comments started by comment_symbols. 22 | :param comment_symbols: Iterable of symbols that start a line comment (default # or //). 23 | :return: The string with the comments removed. 24 | """ 25 | lines = string.splitlines() 26 | for k in range(len(lines)): 27 | for symbol in comment_symbols: 28 | lines[k] = strip_comment_line_with_symbol(lines[k], start=symbol) 29 | return '\n'.join(lines) 30 | 31 | 32 | -------------------------------------------------------------------------------- /json_tricks/decoders.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import warnings 3 | from base64 import standard_b64decode 4 | from collections import OrderedDict 5 | from datetime import datetime, date, time, timedelta 6 | from decimal import Decimal 7 | from fractions import Fraction 8 | 9 | from json_tricks import NoEnumException, NoPandasException, NoNumpyException 10 | from .utils import ClassInstanceHookBase, nested_index, str_type, gzip_decompress, filtered_wrapper 11 | 12 | 13 | class DuplicateJsonKeyException(Exception): 14 | """ Trying to load a json map which contains duplicate keys, but allow_duplicates is False """ 15 | 16 | 17 | class TricksPairHook(object): 18 | """ 19 | Hook that converts json maps to the appropriate python type (dict or OrderedDict) 20 | and then runs any number of hooks on the individual maps. 21 | """ 22 | def __init__(self, ordered=True, obj_pairs_hooks=None, allow_duplicates=True, properties=None): 23 | """ 24 | :param ordered: True if maps should retain their ordering. 25 | :param obj_pairs_hooks: An iterable of hooks to apply to elements. 26 | """ 27 | self.properties = properties or {} 28 | self.map_type = OrderedDict 29 | if not ordered: 30 | self.map_type = dict 31 | self.obj_pairs_hooks = [] 32 | if obj_pairs_hooks: 33 | self.obj_pairs_hooks = list(filtered_wrapper(hook) for hook in obj_pairs_hooks) 34 | self.allow_duplicates = allow_duplicates 35 | 36 | def __call__(self, pairs): 37 | if not self.allow_duplicates: 38 | known = set() 39 | for key, value in pairs: 40 | if key in known: 41 | raise DuplicateJsonKeyException(('Trying to load a json map which contains a ' + 42 | 'duplicate key "{0:}" (but allow_duplicates is False)').format(key)) 43 | known.add(key) 44 | map = self.map_type(pairs) 45 | for hook in self.obj_pairs_hooks: 46 | map = hook(map, properties=self.properties) 47 | return map 48 | 49 | 50 | def json_date_time_hook(dct): 51 | """ 52 | Return an encoded date, time, datetime or timedelta to it's python representation, including optional timezone. 53 | 54 | :param dct: (dict) json encoded date, time, datetime or timedelta 55 | :return: (date/time/datetime/timedelta obj) python representation of the above 56 | """ 57 | def get_tz(dct): 58 | if not 'tzinfo' in dct: 59 | return None 60 | try: 61 | import pytz 62 | except ImportError as err: 63 | raise ImportError(('Tried to load a json object which has a timezone-aware (date)time. ' 64 | 'However, `pytz` could not be imported, so the object could not be loaded. ' 65 | 'Error: {0:}').format(str(err))) 66 | return pytz.timezone(dct['tzinfo']) 67 | 68 | if not isinstance(dct, dict): 69 | return dct 70 | if '__date__' in dct: 71 | return date(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0)) 72 | elif '__time__' in dct: 73 | tzinfo = get_tz(dct) 74 | return time(hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), 75 | microsecond=dct.get('microsecond', 0), tzinfo=tzinfo) 76 | elif '__datetime__' in dct: 77 | tzinfo = get_tz(dct) 78 | dt = datetime(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0), 79 | hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), 80 | microsecond=dct.get('microsecond', 0)) 81 | if tzinfo is None: 82 | return dt 83 | return tzinfo.localize(dt, is_dst=dct.get('is_dst', None)) 84 | elif '__timedelta__' in dct: 85 | return timedelta(days=dct.get('days', 0), seconds=dct.get('seconds', 0), 86 | microseconds=dct.get('microseconds', 0)) 87 | return dct 88 | 89 | 90 | def json_complex_hook(dct): 91 | """ 92 | Return an encoded complex number to Python complex type. 93 | 94 | :param dct: (dict) json encoded complex number (__complex__) 95 | :return: python complex number 96 | """ 97 | if not isinstance(dct, dict): 98 | return dct 99 | if not '__complex__' in dct: 100 | return dct 101 | parts = dct['__complex__'] 102 | assert len(parts) == 2 103 | return parts[0] + parts[1] * 1j 104 | 105 | 106 | def json_bytes_hook(dct): 107 | """ 108 | Return encoded bytes, either base64 or utf8, back to Python bytes. 109 | 110 | :param dct: any object, if it is a dict containing encoded bytes, they will be converted 111 | :return: python complex number 112 | """ 113 | if not isinstance(dct, dict): 114 | return dct 115 | if '__bytes_b64__' in dct: 116 | return standard_b64decode(dct['__bytes_b64__']) 117 | if '__bytes_utf8__' in dct: 118 | return dct['__bytes_utf8__'].encode('utf-8') 119 | return dct 120 | 121 | 122 | def numeric_types_hook(dct): 123 | if not isinstance(dct, dict): 124 | return dct 125 | if '__decimal__' in dct: 126 | return Decimal(dct['__decimal__']) 127 | if '__fraction__' in dct: 128 | return Fraction(numerator=dct['numerator'], denominator=dct['denominator']) 129 | return dct 130 | 131 | 132 | def noenum_hook(dct): 133 | if isinstance(dct, dict) and '__enum__' in dct: 134 | raise NoEnumException(('Trying to decode a map which appears to represent a enum ' 135 | 'data structure, but enum support is not enabled, perhaps it is not installed.')) 136 | return dct 137 | 138 | 139 | def pathlib_hook(dct): 140 | if not isinstance(dct, dict): 141 | return dct 142 | if not '__pathlib__' in dct: 143 | return dct 144 | from pathlib import Path 145 | return Path(dct['__pathlib__']) 146 | 147 | 148 | def nopathlib_hook(dct): 149 | if isinstance(dct, dict) and '__pathlib__' in dct: 150 | raise NoPathlib(('Trying to decode a map which appears to represent a ' 151 | 'pathlib.Path data structure, but pathlib support ' 152 | 'is not enabled.')) 153 | return dct 154 | 155 | def slice_hook(dct): 156 | if not isinstance(dct, dict): 157 | return dct 158 | if not '__slice__' in dct: 159 | return dct 160 | return slice(dct['start'], dct['stop'], dct['step']) 161 | 162 | 163 | class EnumInstanceHook(ClassInstanceHookBase): 164 | """ 165 | This hook tries to convert json encoded by enum_instance_encode back to it's original instance. 166 | It only works if the environment is the same, e.g. the enum is similarly importable and hasn't changed. 167 | """ 168 | def __call__(self, dct, properties=None): 169 | if not isinstance(dct, dict): 170 | return dct 171 | if '__enum__' not in dct: 172 | return dct 173 | cls_lookup_map = properties.get('cls_lookup_map', {}) 174 | mod, name = dct['__enum__']['__enum_instance_type__'] 175 | Cls = self.get_cls_from_instance_type(mod, name, cls_lookup_map=cls_lookup_map) 176 | return Cls[dct['__enum__']['name']] 177 | 178 | 179 | class ClassInstanceHook(ClassInstanceHookBase): 180 | """ 181 | This hook tries to convert json encoded by class_instance_encoder back to it's original instance. 182 | It only works if the environment is the same, e.g. the class is similarly importable and hasn't changed. 183 | """ 184 | def __call__(self, dct, properties=None): 185 | if not isinstance(dct, dict): 186 | return dct 187 | if '__instance_type__' not in dct: 188 | return dct 189 | cls_lookup_map = properties.get('cls_lookup_map', {}) or {} 190 | mod, name = dct['__instance_type__'] 191 | Cls = self.get_cls_from_instance_type(mod, name, cls_lookup_map=cls_lookup_map) 192 | try: 193 | obj = Cls.__new__(Cls) 194 | except TypeError: 195 | raise TypeError(('problem while decoding instance of "{0:s}"; this instance has a special ' 196 | '__new__ method and can\'t be restored').format(name)) 197 | if hasattr(obj, '__json_decode__'): 198 | properties = {} 199 | if 'slots' in dct: 200 | properties.update(dct['slots']) 201 | if 'attributes' in dct: 202 | properties.update(dct['attributes']) 203 | obj.__json_decode__(**properties) 204 | else: 205 | if 'slots' in dct: 206 | for slot,value in dct['slots'].items(): 207 | setattr(obj, slot, value) 208 | if 'attributes' in dct: 209 | obj.__dict__ = dict(dct['attributes']) 210 | return obj 211 | 212 | 213 | def json_set_hook(dct): 214 | """ 215 | Return an encoded set to it's python representation. 216 | """ 217 | if not isinstance(dct, dict): 218 | return dct 219 | if '__set__' not in dct: 220 | return dct 221 | return set((tuple(item) if isinstance(item, list) else item) for item in dct['__set__']) 222 | 223 | 224 | def pandas_hook(dct): 225 | if not isinstance(dct, dict): 226 | return dct 227 | if '__pandas_dataframe__' not in dct and '__pandas_series__' not in dct: 228 | return dct 229 | if '__pandas_dataframe__' in dct: 230 | try: 231 | from pandas import DataFrame 232 | except ImportError: 233 | raise NoPandasException('Trying to decode a map which appears to repr esent a pandas data structure, but pandas appears not to be installed.') 234 | from numpy import dtype, array 235 | meta = dct.pop('__pandas_dataframe__') 236 | indx = dct.pop('index') if 'index' in dct else None 237 | dtypes = dict((colname, dtype(tp)) for colname, tp in zip(meta['column_order'], meta['types'])) 238 | data = OrderedDict() 239 | for name, col in dct.items(): 240 | data[name] = array(col, dtype=dtypes[name]) 241 | return DataFrame( 242 | data=data, 243 | index=indx, 244 | columns=meta['column_order'], 245 | # mixed `dtypes` argument not supported, so use duct of numpy arrays 246 | ) 247 | elif '__pandas_series__' in dct: 248 | from pandas import Series 249 | from numpy import dtype, array 250 | meta = dct.pop('__pandas_series__') 251 | indx = dct.pop('index') if 'index' in dct else None 252 | return Series( 253 | data=dct['data'], 254 | index=indx, 255 | name=meta['name'], 256 | dtype=dtype(meta['type']), 257 | ) 258 | return dct # impossible 259 | 260 | 261 | def nopandas_hook(dct): 262 | if isinstance(dct, dict) and ('__pandas_dataframe__' in dct or '__pandas_series__' in dct): 263 | raise NoPandasException(('Trying to decode a map which appears to represent a pandas ' 264 | 'data structure, but pandas support is not enabled, perhaps it is not installed.')) 265 | return dct 266 | 267 | 268 | def json_numpy_obj_hook(dct): 269 | """ 270 | Replace any numpy arrays previously encoded by `numpy_encode` to their proper 271 | shape, data type and data. 272 | 273 | :param dct: (dict) json encoded ndarray 274 | :return: (ndarray) if input was an encoded ndarray 275 | """ 276 | if not isinstance(dct, dict): 277 | return dct 278 | if not '__ndarray__' in dct: 279 | return dct 280 | try: 281 | import numpy 282 | except ImportError: 283 | raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' 284 | 'array, but numpy appears not to be installed.') 285 | order = None 286 | if 'Corder' in dct: 287 | order = 'C' if dct['Corder'] else 'F' 288 | data_json = dct['__ndarray__'] 289 | shape = tuple(dct['shape']) 290 | nptype = dct['dtype'] 291 | if shape: 292 | if nptype == 'object': 293 | return _lists_of_obj_to_ndarray(data_json, order, shape, nptype) 294 | if isinstance(data_json, str_type): 295 | endianness = dct.get('endian', 'native') 296 | return _bin_str_to_ndarray(data_json, order, shape, nptype, endianness) 297 | else: 298 | return _lists_of_numbers_to_ndarray(data_json, order, shape, nptype) 299 | else: 300 | return _scalar_to_numpy(data_json, nptype) 301 | 302 | 303 | def _bin_str_to_ndarray(data, order, shape, np_type_name, data_endianness): 304 | """ 305 | From base64 encoded, gzipped binary data to ndarray. 306 | """ 307 | from base64 import standard_b64decode 308 | from numpy import frombuffer, dtype 309 | 310 | assert order in [None, 'C'], 'specifying different memory order is not (yet) supported ' \ 311 | 'for binary numpy format (got order = {})'.format(order) 312 | if data.startswith('b64.gz:'): 313 | data = standard_b64decode(data[7:]) 314 | data = gzip_decompress(data) 315 | elif data.startswith('b64:'): 316 | data = standard_b64decode(data[4:]) 317 | else: 318 | raise ValueError('found numpy array buffer, but did not understand header; supported: b64 or b64.gz') 319 | np_type = dtype(np_type_name) 320 | if data_endianness == sys.byteorder: 321 | pass 322 | if data_endianness == 'little': 323 | np_type = np_type.newbyteorder('<') 324 | elif data_endianness == 'big': 325 | np_type = np_type.newbyteorder('>') 326 | elif data_endianness != 'native': 327 | warnings.warn('array of shape {} has unknown endianness \'{}\''.format(shape, data_endianness)) 328 | data = frombuffer(bytearray(data), dtype=np_type) 329 | return data.reshape(shape) 330 | 331 | 332 | def _lists_of_numbers_to_ndarray(data, order, shape, dtype): 333 | """ 334 | From nested list of numbers to ndarray. 335 | """ 336 | from numpy import asarray 337 | arr = asarray(data, dtype=dtype, order=order) 338 | if 0 in shape: 339 | return arr.reshape(shape) 340 | if shape != arr.shape: 341 | warnings.warn('size mismatch decoding numpy array: expected {}, got {}'.format(shape, arr.shape)) 342 | return arr 343 | 344 | 345 | def _lists_of_obj_to_ndarray(data, order, shape, dtype): 346 | """ 347 | From nested list of objects (that aren't native numpy numbers) to ndarray. 348 | """ 349 | from numpy import empty, ndindex 350 | arr = empty(shape, dtype=dtype, order=order) 351 | dec_data = data 352 | for indx in ndindex(arr.shape): 353 | arr[indx] = nested_index(dec_data, indx) 354 | return arr 355 | 356 | 357 | def _scalar_to_numpy(data, dtype): 358 | """ 359 | From scalar value to numpy type. 360 | """ 361 | import numpy as nptypes 362 | dtype = getattr(nptypes, dtype) 363 | return dtype(data) 364 | 365 | 366 | def json_nonumpy_obj_hook(dct): 367 | """ 368 | This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message. 369 | """ 370 | if isinstance(dct, dict) and '__ndarray__' in dct: 371 | raise NoNumpyException(('Trying to decode a map which appears to represent a numpy array, ' 372 | 'but numpy support is not enabled, perhaps it is not installed.')) 373 | return dct 374 | 375 | 376 | -------------------------------------------------------------------------------- /json_tricks/encoders.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from base64 import standard_b64encode 3 | from datetime import datetime, date, time, timedelta 4 | from decimal import Decimal 5 | from fractions import Fraction 6 | from functools import wraps 7 | from json import JSONEncoder 8 | import sys 9 | 10 | from .utils import hashodict, get_module_name_from_object, NoEnumException, NoPandasException, \ 11 | NoNumpyException, str_type, JsonTricksDeprecation, gzip_compress, filtered_wrapper, is_py3 12 | 13 | def _fallback_wrapper(encoder): 14 | """ 15 | This decorator makes an encoder run only if the current object hasn't been changed yet. 16 | (Changed-ness is checked with is_changed which is based on identity with `id`). 17 | """ 18 | @wraps(encoder) 19 | def fallback_encoder(obj, is_changed, **kwargs): 20 | if is_changed: 21 | return obj 22 | return encoder(obj, is_changed=is_changed, **kwargs) 23 | return fallback_encoder 24 | 25 | 26 | def fallback_ignore_unknown(obj, is_changed=None, fallback_value=None): 27 | """ 28 | This encoder returns None if the object isn't changed by another encoder and isn't a primitive. 29 | """ 30 | if is_changed: 31 | return obj 32 | if obj is None or isinstance(obj, (int, float, str_type, bool, list, dict)): 33 | return obj 34 | return fallback_value 35 | 36 | 37 | class TricksEncoder(JSONEncoder): 38 | """ 39 | Encoder that runs any number of encoder functions or instances on 40 | the objects that are being encoded. 41 | 42 | Each encoder should make any appropriate changes and return an object, 43 | changed or not. This will be passes to the other encoders. 44 | """ 45 | def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, fallback_encoders=(), properties=None, **json_kwargs): 46 | """ 47 | :param obj_encoders: An iterable of functions or encoder instances to try. 48 | :param silence_typeerror: DEPRECATED - If set to True, ignore the TypeErrors that Encoder instances throw (default False). 49 | """ 50 | if silence_typeerror and not getattr(TricksEncoder, '_deprecated_silence_typeerror'): 51 | TricksEncoder._deprecated_silence_typeerror = True 52 | sys.stderr.write('TricksEncoder.silence_typeerror is deprecated and may be removed in a future version\n') 53 | self.obj_encoders = [] 54 | if obj_encoders: 55 | self.obj_encoders = list(obj_encoders) 56 | self.obj_encoders.extend(_fallback_wrapper(encoder) for encoder in list(fallback_encoders)) 57 | self.obj_encoders = [filtered_wrapper(enc) for enc in self.obj_encoders] 58 | self.silence_typeerror = silence_typeerror 59 | self.properties = properties 60 | self.primitives = primitives 61 | super(TricksEncoder, self).__init__(**json_kwargs) 62 | 63 | def default(self, obj, *args, **kwargs): 64 | """ 65 | This is the method of JSONEncoders that is called for each object; it calls 66 | all the encoders with the previous one's output used as input. 67 | 68 | It works for Encoder instances, but they are expected not to throw 69 | `TypeError` for unrecognized types (the super method does that by default). 70 | 71 | It never calls the `super` method so if there are non-primitive types 72 | left at the end, you'll get an encoding error. 73 | """ 74 | prev_id = id(obj) 75 | for encoder in self.obj_encoders: 76 | obj = encoder(obj, primitives=self.primitives, is_changed=id(obj) != prev_id, properties=self.properties) 77 | if id(obj) == prev_id: 78 | raise TypeError(('Object of type {0:} could not be encoded by {1:} using encoders [{2:s}]. ' 79 | 'You can add an encoders for this type using `extra_obj_encoders`. If you want to \'skip\' this ' 80 | 'object, consider using `fallback_encoders` like `str` or `lambda o: None`.').format( 81 | type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders))) 82 | return obj 83 | 84 | 85 | def json_date_time_encode(obj, primitives=False): 86 | """ 87 | Encode a date, time, datetime or timedelta to a string of a json dictionary, including optional timezone. 88 | 89 | :param obj: date/time/datetime/timedelta obj 90 | :return: (dict) json primitives representation of date, time, datetime or timedelta 91 | """ 92 | if primitives and isinstance(obj, (date, time, datetime)): 93 | return obj.isoformat() 94 | if isinstance(obj, datetime): 95 | dct = hashodict([('__datetime__', None), ('year', obj.year), ('month', obj.month), 96 | ('day', obj.day), ('hour', obj.hour), ('minute', obj.minute), 97 | ('second', obj.second), ('microsecond', obj.microsecond)]) 98 | if obj.tzinfo: 99 | if hasattr(obj.tzinfo, 'zone'): 100 | dct['tzinfo'] = obj.tzinfo.zone 101 | else: 102 | dct['tzinfo'] = obj.tzinfo.tzname(None) 103 | dct['is_dst'] = bool(obj.dst()) 104 | elif isinstance(obj, date): 105 | dct = hashodict([('__date__', None), ('year', obj.year), ('month', obj.month), ('day', obj.day)]) 106 | elif isinstance(obj, time): 107 | dct = hashodict([('__time__', None), ('hour', obj.hour), ('minute', obj.minute), 108 | ('second', obj.second), ('microsecond', obj.microsecond)]) 109 | if obj.tzinfo: 110 | if hasattr(obj.tzinfo, 'zone'): 111 | dct['tzinfo'] = obj.tzinfo.zone 112 | else: 113 | dct['tzinfo'] = obj.tzinfo.tzname(None) 114 | elif isinstance(obj, timedelta): 115 | if primitives: 116 | return obj.total_seconds() 117 | else: 118 | dct = hashodict([('__timedelta__', None), ('days', obj.days), ('seconds', obj.seconds), 119 | ('microseconds', obj.microseconds)]) 120 | else: 121 | return obj 122 | for key, val in tuple(dct.items()): 123 | if not key.startswith('__') and not key == 'is_dst' and not val: 124 | del dct[key] 125 | return dct 126 | 127 | 128 | def enum_instance_encode(obj, primitives=False, with_enum_value=False): 129 | """Encodes an enum instance to json. Note that it can only be recovered if the environment allows the enum to be 130 | imported in the same way. 131 | :param primitives: If true, encode the enum values as primitive (more readable, but cannot be restored automatically). 132 | :param with_enum_value: If true, the value of the enum is also exported (it is not used during import, as it should be constant). 133 | """ 134 | from enum import Enum 135 | if not isinstance(obj, Enum): 136 | return obj 137 | if primitives: 138 | return {obj.name: obj.value} 139 | mod = get_module_name_from_object(obj) 140 | representation = dict( 141 | __enum__=dict( 142 | # Don't use __instance_type__ here since enums members cannot be created with __new__ 143 | # Ie we can't rely on class deserialization to read them. 144 | __enum_instance_type__=[mod, type(obj).__name__], 145 | name=obj.name, 146 | ), 147 | ) 148 | if with_enum_value: 149 | representation['__enum__']['value'] = obj.value 150 | return representation 151 | 152 | 153 | def noenum_instance_encode(obj, primitives=False): 154 | if type(obj.__class__).__name__ == 'EnumMeta': 155 | raise NoEnumException(('Trying to encode an object of type {0:} which appears to be ' 156 | 'an enum, but enum support is not enabled, perhaps it is not installed.').format(type(obj))) 157 | return obj 158 | 159 | 160 | def class_instance_encode(obj, primitives=False): 161 | """ 162 | Encodes a class instance to json. Note that it can only be recovered if the environment allows the class to be 163 | imported in the same way. 164 | """ 165 | if isinstance(obj, list) or isinstance(obj, dict): 166 | return obj 167 | if hasattr(obj, '__class__') and (hasattr(obj, '__dict__') or hasattr(obj, '__slots__')): 168 | if not hasattr(obj, '__new__'): 169 | raise TypeError('class "{0:s}" does not have a __new__ method; '.format(obj.__class__) + 170 | ('perhaps it is an old-style class not derived from `object`; add `object` as a base class to encode it.' 171 | if (sys.version[:2] == '2.') else 'this should not happen in Python3')) 172 | if type(obj) == type(lambda: 0): 173 | raise TypeError('instance "{0:}" of class "{1:}" cannot be encoded because it appears to be a lambda or function.' 174 | .format(obj, obj.__class__)) 175 | try: 176 | obj.__new__(obj.__class__) 177 | except TypeError: 178 | raise TypeError(('instance "{0:}" of class "{1:}" cannot be encoded, perhaps because it\'s __new__ method ' 179 | 'cannot be called because it requires extra parameters').format(obj, obj.__class__)) 180 | mod = get_module_name_from_object(obj) 181 | if mod == 'threading': 182 | # In Python2, threading objects get serialized, which is probably unsafe 183 | return obj 184 | name = obj.__class__.__name__ 185 | if hasattr(obj, '__json_encode__'): 186 | attrs = obj.__json_encode__() 187 | if primitives: 188 | return attrs 189 | else: 190 | return hashodict((('__instance_type__', (mod, name)), ('attributes', attrs))) 191 | dct = hashodict([('__instance_type__',(mod, name))]) 192 | if hasattr(obj, '__slots__'): 193 | slots = obj.__slots__ 194 | if isinstance(slots, str): 195 | slots = [slots] 196 | dct['slots'] = hashodict([]) 197 | for s in slots: 198 | if s == '__dict__': 199 | continue 200 | if s == '__weakref__': 201 | continue 202 | dct['slots'][s] = getattr(obj, s) 203 | if hasattr(obj, '__dict__'): 204 | dct['attributes'] = hashodict(obj.__dict__) 205 | if primitives: 206 | attrs = dct.get('attributes',{}) 207 | attrs.update(dct.get('slots',{})) 208 | return attrs 209 | else: 210 | return dct 211 | return obj 212 | 213 | 214 | def json_complex_encode(obj, primitives=False): 215 | """ 216 | Encode a complex number as a json dictionary of its real and imaginary part. 217 | 218 | :param obj: complex number, e.g. `2+1j` 219 | :return: (dict) json primitives representation of `obj` 220 | """ 221 | if isinstance(obj, complex): 222 | if primitives: 223 | return [obj.real, obj.imag] 224 | else: 225 | return hashodict(__complex__=[obj.real, obj.imag]) 226 | return obj 227 | 228 | 229 | def bytes_encode(obj, primitives=False): 230 | """ 231 | Encode bytes as one of these: 232 | 233 | * A utf8-string with special `__bytes_utf8__` marking, if the bytes are valid utf8 and primitives is False. 234 | * A base64 encoded string of the bytes with special `__bytes_b64__` marking, if the bytes are not utf8, or if primitives is True. 235 | 236 | :param obj: any object, which will be transformed if it is of type bytes 237 | :return: (dict) json primitives representation of `obj` 238 | """ 239 | if isinstance(obj, bytes): 240 | if not is_py3: 241 | return obj 242 | if primitives: 243 | return hashodict(__bytes_b64__=standard_b64encode(obj).decode('ascii')) 244 | else: 245 | try: 246 | return hashodict(__bytes_utf8__=obj.decode('utf-8')) 247 | except UnicodeDecodeError: 248 | return hashodict(__bytes_b64__=standard_b64encode(obj).decode('ascii')) 249 | return obj 250 | 251 | 252 | def numeric_types_encode(obj, primitives=False): 253 | """ 254 | Encode Decimal and Fraction. 255 | 256 | :param primitives: Encode decimals and fractions as standard floats. You may lose precision. If you do this, you may need to enable `allow_nan` (decimals always allow NaNs but floats do not). 257 | """ 258 | if isinstance(obj, Decimal): 259 | if primitives: 260 | return float(obj) 261 | else: 262 | return { 263 | '__decimal__': str(obj.canonical()), 264 | } 265 | if isinstance(obj, Fraction): 266 | if primitives: 267 | return float(obj) 268 | else: 269 | return hashodict(( 270 | ('__fraction__', True), 271 | ('numerator', obj.numerator), 272 | ('denominator', obj.denominator), 273 | )) 274 | return obj 275 | 276 | 277 | def pathlib_encode(obj, primitives=False): 278 | from pathlib import Path 279 | if not isinstance(obj, Path): 280 | return obj 281 | 282 | if primitives: 283 | return str(obj) 284 | 285 | return {'__pathlib__': str(obj)} 286 | 287 | def slice_encode(obj, primitives=False): 288 | if not isinstance(obj, slice): 289 | return obj 290 | 291 | if primitives: 292 | return [obj.start, obj.stop, obj.step] 293 | else: 294 | return hashodict(( 295 | ('__slice__', True), 296 | ('start', obj.start), 297 | ('stop', obj.stop), 298 | ('step', obj.step), 299 | )) 300 | 301 | class ClassInstanceEncoder(JSONEncoder): 302 | """ 303 | See `class_instance_encoder`. 304 | """ 305 | # Not covered in tests since `class_instance_encode` is recommended way. 306 | def __init__(self, obj, encode_cls_instances=True, **kwargs): 307 | self.encode_cls_instances = encode_cls_instances 308 | super(ClassInstanceEncoder, self).__init__(obj, **kwargs) 309 | 310 | def default(self, obj, *args, **kwargs): 311 | if self.encode_cls_instances: 312 | obj = class_instance_encode(obj) 313 | return super(ClassInstanceEncoder, self).default(obj, *args, **kwargs) 314 | 315 | 316 | def json_set_encode(obj, primitives=False): 317 | """ 318 | Encode python sets as dictionary with key __set__ and a list of the values. 319 | 320 | Try to sort the set to get a consistent json representation, use arbitrary order if the data is not ordinal. 321 | """ 322 | if isinstance(obj, set): 323 | try: 324 | repr = sorted(obj) 325 | except Exception: 326 | repr = list(obj) 327 | if primitives: 328 | return repr 329 | else: 330 | return hashodict(__set__=repr) 331 | return obj 332 | 333 | 334 | def pandas_encode(obj, primitives=False): 335 | from pandas import DataFrame, Series 336 | if isinstance(obj, DataFrame): 337 | repr = hashodict() 338 | if not primitives: 339 | repr['__pandas_dataframe__'] = hashodict(( 340 | ('column_order', tuple(obj.columns.values)), 341 | ('types', tuple(str(dt) for dt in obj.dtypes)), 342 | )) 343 | repr['index'] = tuple(obj.index.values) 344 | for k, name in enumerate(obj.columns.values): 345 | repr[name] = tuple(obj.iloc[:, k].values) 346 | return repr 347 | if isinstance(obj, Series): 348 | repr = hashodict() 349 | if not primitives: 350 | repr['__pandas_series__'] = hashodict(( 351 | ('name', str(obj.name)), 352 | ('type', str(obj.dtype)), 353 | )) 354 | repr['index'] = tuple(obj.index.values) 355 | repr['data'] = tuple(obj.values) 356 | return repr 357 | return obj 358 | 359 | 360 | def nopandas_encode(obj): 361 | if ('DataFrame' in getattr(obj.__class__, '__name__', '') or 'Series' in getattr(obj.__class__, '__name__', '')) \ 362 | and 'pandas.' in getattr(obj.__class__, '__module__', ''): 363 | raise NoPandasException(('Trying to encode an object of type {0:} which appears to be ' 364 | 'a numpy array, but numpy support is not enabled, perhaps it is not installed.').format(type(obj))) 365 | return obj 366 | 367 | 368 | def numpy_encode(obj, primitives=False, properties=None): 369 | """ 370 | Encodes numpy `ndarray`s as lists with meta data. 371 | 372 | Encodes numpy scalar types as Python equivalents. Special encoding is not possible, 373 | because int64 (in py2) and float64 (in py2 and py3) are subclasses of primitives, 374 | which never reach the encoder. 375 | 376 | :param primitives: If True, arrays are serialized as (nested) lists without meta info. 377 | """ 378 | from numpy import ndarray, generic 379 | 380 | if isinstance(obj, ndarray): 381 | if primitives: 382 | return obj.tolist() 383 | else: 384 | properties = properties or {} 385 | use_compact = properties.get('ndarray_compact', None) 386 | store_endianness = properties.get('ndarray_store_byteorder', None) 387 | assert store_endianness in [None, 'little', 'big', 'suppress'] ,\ 388 | 'property ndarray_store_byteorder should be \'little\', \'big\' or \'suppress\' if provided' 389 | json_compression = bool(properties.get('compression', False)) 390 | if use_compact is None and json_compression and not getattr(numpy_encode, '_warned_compact', False): 391 | numpy_encode._warned_compact = True 392 | warnings.warn('storing ndarray in text format while compression in enabled; in the next major version ' 393 | 'of json_tricks, the default when using compression will change to compact mode; to already use ' 394 | 'that smaller format, pass `properties={"ndarray_compact": True}` to json_tricks.dump; ' 395 | 'to silence this warning, pass `properties={"ndarray_compact": False}`; ' 396 | 'see issue https://github.com/mverleg/pyjson_tricks/issues/73', JsonTricksDeprecation) 397 | # Property 'use_compact' may also be an integer, in which case it's the number of 398 | # elements from which compact storage is used. 399 | if isinstance(use_compact, int) and not isinstance(use_compact, bool): 400 | use_compact = obj.size >= use_compact 401 | if use_compact: 402 | # If the overall json file is compressed, then don't compress the array. 403 | data_json = _ndarray_to_bin_str(obj, do_compress=not json_compression, store_endianness=store_endianness) 404 | else: 405 | data_json = obj.tolist() 406 | dct = hashodict(( 407 | ('__ndarray__', data_json), 408 | ('dtype', str(obj.dtype)), 409 | ('shape', obj.shape), 410 | )) 411 | if len(obj.shape) > 1: 412 | dct['Corder'] = obj.flags['C_CONTIGUOUS'] 413 | if use_compact and store_endianness != 'suppress': 414 | dct['endian'] = store_endianness or sys.byteorder 415 | return dct 416 | elif isinstance(obj, generic): 417 | if NumpyEncoder.SHOW_SCALAR_WARNING: 418 | NumpyEncoder.SHOW_SCALAR_WARNING = False 419 | warnings.warn('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') 420 | return obj.item() 421 | return obj 422 | 423 | 424 | def _ndarray_to_bin_str(array, do_compress, store_endianness): 425 | """ 426 | From ndarray to base64 encoded, gzipped binary data. 427 | """ 428 | from base64 import standard_b64encode 429 | assert array.flags['C_CONTIGUOUS'], 'only C memory order is (currently) supported for compact ndarray format' 430 | 431 | original_size = array.size * array.itemsize 432 | header = 'b64:' 433 | if store_endianness in ['little', 'big'] and store_endianness != sys.byteorder: 434 | array = array.byteswap(inplace=False) 435 | data = array.data 436 | if do_compress: 437 | small = gzip_compress(data, compresslevel=9) 438 | if len(small) < 0.9 * original_size and len(small) < original_size - 8: 439 | header = 'b64.gz:' 440 | data = small 441 | data = standard_b64encode(data) 442 | return header + data.decode('ascii') 443 | 444 | 445 | class NumpyEncoder(ClassInstanceEncoder): 446 | """ 447 | JSON encoder for numpy arrays. 448 | """ 449 | SHOW_SCALAR_WARNING = True # show a warning that numpy scalar serialization is experimental 450 | 451 | def default(self, obj, *args, **kwargs): 452 | """ 453 | If input object is a ndarray it will be converted into a dict holding 454 | data type, shape and the data. The object can be restored using json_numpy_obj_hook. 455 | """ 456 | warnings.warn('`NumpyEncoder` is deprecated, use `numpy_encode`', JsonTricksDeprecation) 457 | obj = numpy_encode(obj) 458 | return super(NumpyEncoder, self).default(obj, *args, **kwargs) 459 | 460 | 461 | def nonumpy_encode(obj): 462 | """ 463 | Raises an error for numpy arrays. 464 | """ 465 | if 'ndarray' in getattr(obj.__class__, '__name__', '') and 'numpy.' in getattr(obj.__class__, '__module__', ''): 466 | raise NoNumpyException(('Trying to encode an object of type {0:} which appears to be ' 467 | 'a pandas data stucture, but pandas support is not enabled, perhaps it is not installed.').format(type(obj))) 468 | return obj 469 | 470 | 471 | class NoNumpyEncoder(JSONEncoder): 472 | """ 473 | See `nonumpy_encode`. 474 | """ 475 | def default(self, obj, *args, **kwargs): 476 | warnings.warn('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`', JsonTricksDeprecation) 477 | obj = nonumpy_encode(obj) 478 | return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) 479 | -------------------------------------------------------------------------------- /json_tricks/nonp.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from json import loads as json_loads 3 | from os import fsync 4 | from sys import exc_info 5 | 6 | from json_tricks.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation 7 | from .utils import str_type, NoNumpyException # keep 'unused' imports 8 | from .comment import strip_comments # keep 'unused' imports 9 | #TODO @mark: imports removed? 10 | from .encoders import TricksEncoder, json_date_time_encode, \ 11 | class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ 12 | nonumpy_encode, nopandas_encode, pandas_encode, noenum_instance_encode, \ 13 | enum_instance_encode, pathlib_encode, bytes_encode, slice_encode # keep 'unused' imports 14 | from .decoders import TricksPairHook, \ 15 | json_date_time_hook, ClassInstanceHook, \ 16 | json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, \ 17 | json_nonumpy_obj_hook, \ 18 | nopandas_hook, pandas_hook, EnumInstanceHook, \ 19 | noenum_hook, pathlib_hook, nopathlib_hook, json_bytes_hook, slice_hook # keep 'unused' imports 20 | 21 | 22 | ENCODING = 'UTF-8' 23 | 24 | 25 | _cih_instance = ClassInstanceHook() 26 | _eih_instance = EnumInstanceHook() 27 | 28 | DEFAULT_ENCODERS = [ 29 | json_date_time_encode, 30 | json_complex_encode, 31 | json_set_encode, 32 | numeric_types_encode, 33 | class_instance_encode, 34 | bytes_encode, 35 | slice_encode, 36 | ] 37 | 38 | DEFAULT_HOOKS = [ 39 | json_date_time_hook, 40 | json_complex_hook, 41 | json_set_hook, 42 | numeric_types_hook, 43 | _cih_instance, 44 | json_bytes_hook, 45 | slice_hook, 46 | ] 47 | 48 | 49 | #TODO @mark: add properties to all built-in encoders (for speed - but it should keep working without) 50 | try: 51 | import enum 52 | except ImportError: 53 | DEFAULT_ENCODERS = [noenum_instance_encode,] + DEFAULT_ENCODERS 54 | DEFAULT_HOOKS = [noenum_hook,] + DEFAULT_HOOKS 55 | else: 56 | DEFAULT_ENCODERS = [enum_instance_encode,] + DEFAULT_ENCODERS 57 | DEFAULT_HOOKS = [_eih_instance,] + DEFAULT_HOOKS 58 | 59 | try: 60 | import numpy 61 | except ImportError: 62 | DEFAULT_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS 63 | DEFAULT_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS 64 | else: 65 | # numpy encode needs to be before complex 66 | DEFAULT_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS 67 | DEFAULT_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS 68 | 69 | try: 70 | import pandas 71 | except ImportError: 72 | DEFAULT_ENCODERS = [nopandas_encode,] + DEFAULT_ENCODERS 73 | DEFAULT_HOOKS = [nopandas_hook,] + DEFAULT_HOOKS 74 | else: 75 | DEFAULT_ENCODERS = [pandas_encode,] + DEFAULT_ENCODERS 76 | DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS 77 | 78 | try: 79 | import pathlib 80 | except: 81 | # No need to include a "nopathlib_encode" hook since we would not encounter 82 | # the Path object if pathlib isn't available. However, we *could* encounter 83 | # a serialized Path object (produced by a version of Python with pathlib). 84 | DEFAULT_HOOKS = [nopathlib_hook,] + DEFAULT_HOOKS 85 | else: 86 | DEFAULT_ENCODERS = [pathlib_encode,] + DEFAULT_ENCODERS 87 | DEFAULT_HOOKS = [pathlib_hook,] + DEFAULT_HOOKS 88 | 89 | 90 | DEFAULT_NONP_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS # DEPRECATED 91 | DEFAULT_NONP_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED 92 | 93 | 94 | def dumps(obj, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), 95 | primitives=False, compression=None, allow_nan=False, conv_str_byte=False, fallback_encoders=(), 96 | properties=None, **jsonkwargs): 97 | """ 98 | Convert a nested data structure to a json string. 99 | 100 | :param obj: The Python object to convert. 101 | :param sort_keys: Keep this False if you want order to be preserved. 102 | :param cls: The json encoder class to use, defaults to NoNumpyEncoder which gives a warning for numpy arrays. 103 | :param obj_encoders: Iterable of encoders to use to convert arbitrary objects into json-able promitives. 104 | :param extra_obj_encoders: Like `obj_encoders` but on top of them: use this to add encoders without replacing defaults. Since v3.5 these happen before default encoders. 105 | :param fallback_encoders: These are extra `obj_encoders` that 1) are ran after all others and 2) only run if the object hasn't yet been changed. 106 | :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False). 107 | :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). 108 | :param properties: A dictionary of properties that is passed to each encoder that will accept it. 109 | :return: The string containing the json-encoded version of obj. 110 | 111 | Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. 112 | """ 113 | if not hasattr(extra_obj_encoders, '__iter__'): 114 | raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') 115 | encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) 116 | properties = properties or {} 117 | dict_default(properties, 'primitives', primitives) 118 | dict_default(properties, 'compression', compression) 119 | dict_default(properties, 'allow_nan', allow_nan) 120 | if cls is None: 121 | cls = TricksEncoder 122 | combined_encoder = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan, 123 | primitives=primitives, fallback_encoders=fallback_encoders, 124 | properties=properties, **jsonkwargs) 125 | txt = combined_encoder.encode(obj) 126 | if not is_py3 and isinstance(txt, str): 127 | txt = unicode(txt, ENCODING) 128 | if not compression: 129 | return txt 130 | if compression is True: 131 | compression = 5 132 | txt = txt.encode(ENCODING) 133 | gzstring = gzip_compress(txt, compresslevel=compression) 134 | return gzstring 135 | 136 | 137 | def dump(obj, fp, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), 138 | primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, 139 | fallback_encoders=(), properties=None, **jsonkwargs): 140 | """ 141 | Convert a nested data structure to a json string. 142 | 143 | :param fp: File handle or path to write to. 144 | :param compression: The gzip compression level, or None for no compression. 145 | :param force_flush: If True, flush the file handle used, when possibly also in the operating system (default False). 146 | 147 | The other arguments are identical to `dumps`. 148 | """ 149 | if (isinstance(obj, str_type) or hasattr(obj, 'write')) and isinstance(fp, (list, dict)): 150 | raise ValueError('json-tricks dump arguments are in the wrong order: provide the data to be serialized before file handle') 151 | txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders, 152 | primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, 153 | fallback_encoders=fallback_encoders, properties=properties, **jsonkwargs) 154 | if isinstance(fp, str_type): 155 | if compression: 156 | fh = open(fp, 'wb+') 157 | else: 158 | fh = open(fp, 'w+') 159 | else: 160 | fh = fp 161 | if conv_str_byte: 162 | try: 163 | fh.write(b'') 164 | except TypeError: 165 | pass 166 | # if not isinstance(txt, str_type): 167 | # # Cannot write bytes, so must be in text mode, but we didn't get a text 168 | # if not compression: 169 | # txt = txt.decode(ENCODING) 170 | else: 171 | try: 172 | fh.write(u'') 173 | except TypeError: 174 | if isinstance(txt, str_type): 175 | txt = txt.encode(ENCODING) 176 | try: 177 | if compression and 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type): 178 | raise IOError('If compression is enabled, the file must be opened in binary mode.') 179 | try: 180 | fh.write(txt) 181 | except TypeError as err: 182 | err.args = (err.args[0] + '. A possible reason is that the file is not opened in binary mode; ' 183 | 'be sure to set file mode to something like "wb".',) 184 | raise 185 | finally: 186 | if force_flush: 187 | fh.flush() 188 | try: 189 | if fh.fileno() is not None: 190 | fsync(fh.fileno()) 191 | except (ValueError,): 192 | pass 193 | if isinstance(fp, str_type): 194 | fh.close() 195 | return txt 196 | 197 | 198 | def loads(string, preserve_order=True, ignore_comments=None, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, 199 | extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, 200 | properties=None, **jsonkwargs): 201 | """ 202 | Convert a nested data structure to a json string. 203 | 204 | :param string: The string containing a json encoded data structure. 205 | :param decode_cls_instances: True to attempt to decode class instances (requires the environment to be similar the the encoding one). 206 | :param preserve_order: Whether to preserve order by using OrderedDicts or not. 207 | :param ignore_comments: Remove comments (starting with # or //). By default (`None`), try without comments first, and re-try with comments upon failure. 208 | :param decompression: True to use gzip decompression, False to use raw data, None to automatically determine (default). Assumes utf-8 encoding! 209 | :param obj_pairs_hooks: A list of dictionary hooks to apply. 210 | :param extra_obj_pairs_hooks: Like `obj_pairs_hooks` but on top of them: use this to add hooks without replacing defaults. Since v3.5 these happen before default hooks. 211 | :param cls_lookup_map: If set to a dict, for example ``globals()``, then classes encoded from __main__ are looked up this dict. 212 | :param allow_duplicates: If set to False, an error will be raised when loading a json-map that contains duplicate keys. 213 | :param parse_float: A function to parse strings to integers (e.g. Decimal). There is also `parse_int`. 214 | :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). 215 | :return: The string containing the json-encoded version of obj. 216 | 217 | Other arguments are passed on to json_func. 218 | """ 219 | if not hasattr(extra_obj_pairs_hooks, '__iter__'): 220 | raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') 221 | if decompression is None: 222 | decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' 223 | if decompression: 224 | string = gzip_decompress(string).decode(ENCODING) 225 | if not isinstance(string, str_type): 226 | if conv_str_byte: 227 | string = string.decode(ENCODING) 228 | else: 229 | raise TypeError(('The input was of non-string type "{0:}" in `json_tricks.load(s)`. ' 230 | 'Bytes cannot be automatically decoding since the encoding is not known. Recommended ' 231 | 'way is to instead encode the bytes to a string and pass that string to `load(s)`, ' 232 | 'for example bytevar.encode("utf-8") if utf-8 is the encoding. Alternatively you can ' 233 | 'force an attempt by passing conv_str_byte=True, but this may cause decoding issues.') 234 | .format(type(string))) 235 | properties = properties or {} 236 | dict_default(properties, 'preserve_order', preserve_order) 237 | dict_default(properties, 'ignore_comments', ignore_comments) 238 | dict_default(properties, 'decompression', decompression) 239 | dict_default(properties, 'cls_lookup_map', cls_lookup_map) 240 | dict_default(properties, 'allow_duplicates', allow_duplicates) 241 | hooks = tuple(extra_obj_pairs_hooks) + tuple(obj_pairs_hooks) 242 | hook = TricksPairHook(ordered=preserve_order, obj_pairs_hooks=hooks, allow_duplicates=allow_duplicates, properties=properties) 243 | if ignore_comments is None: 244 | try: 245 | # first try to parse without stripping comments 246 | return _strip_loads(string, hook, False, **jsonkwargs) 247 | except ValueError: 248 | # if this fails, re-try parsing after stripping comments 249 | result = _strip_loads(string, hook, True, **jsonkwargs) 250 | if not getattr(loads, '_ignore_comments_warned', False): 251 | warnings.warn('`json_tricks.load(s)` stripped some comments, but `ignore_comments` was ' 252 | 'not passed; in the next major release, the behaviour when `ignore_comments` is not ' 253 | 'passed will change; it is recommended to explicitly pass `ignore_comments=True` if ' 254 | 'you want to strip comments; see https://github.com/mverleg/pyjson_tricks/issues/74', 255 | JsonTricksDeprecation) 256 | loads._ignore_comments_warned = True 257 | return result 258 | if ignore_comments: 259 | return _strip_loads(string, hook, True, **jsonkwargs) 260 | return _strip_loads(string, hook, False, **jsonkwargs) 261 | 262 | 263 | def _strip_loads(string, object_pairs_hook, ignore_comments_bool, **jsonkwargs): 264 | if ignore_comments_bool: 265 | string = strip_comments(string) 266 | return json_loads(string, object_pairs_hook=object_pairs_hook, **jsonkwargs) 267 | 268 | 269 | def load(fp, preserve_order=True, ignore_comments=None, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, 270 | extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, 271 | properties=None, **jsonkwargs): 272 | """ 273 | Convert a nested data structure to a json string. 274 | 275 | :param fp: File handle or path to load from. 276 | 277 | The other arguments are identical to loads. 278 | """ 279 | try: 280 | if isinstance(fp, str_type): 281 | if decompression is not None: 282 | open_binary = bool(decompression) 283 | else: 284 | with open(fp, 'rb') as fh: 285 | # This attempts to detect gzip mode; gzip should always 286 | # have this header, and text json can't have it. 287 | open_binary = (fh.read(2) == b'\x1f\x8b') 288 | with open(fp, 'rb' if open_binary else 'r') as fh: 289 | string = fh.read() 290 | else: 291 | string = fp.read() 292 | except UnicodeDecodeError as err: 293 | # todo: not covered in tests, is it relevant? 294 | raise Exception('There was a problem decoding the file content. A possible reason is that the file is not ' + 295 | 'opened in binary mode; be sure to set file mode to something like "rb".').with_traceback(exc_info()[2]) 296 | return loads(string, preserve_order=preserve_order, ignore_comments=ignore_comments, decompression=decompression, 297 | obj_pairs_hooks=obj_pairs_hooks, extra_obj_pairs_hooks=extra_obj_pairs_hooks, cls_lookup_map=cls_lookup_map, 298 | allow_duplicates=allow_duplicates, conv_str_byte=conv_str_byte, properties=properties, **jsonkwargs) 299 | 300 | 301 | -------------------------------------------------------------------------------- /json_tricks/np.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This file exists for backward compatibility reasons. 4 | """ 5 | 6 | import warnings 7 | from .nonp import NoNumpyException, DEFAULT_ENCODERS, DEFAULT_HOOKS, dumps, dump, loads, load # keep 'unused' imports 8 | from .utils import hashodict, NoPandasException, JsonTricksDeprecation 9 | from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports 10 | from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \ 11 | numpy_encode, NumpyEncoder # keep 'unused' imports 12 | from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \ 13 | json_complex_hook, json_set_hook, json_numpy_obj_hook, json_bytes_hook # keep 'unused' imports 14 | 15 | try: 16 | import numpy 17 | except ImportError: 18 | raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding ' 19 | 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') 20 | 21 | 22 | warnings.warn('`json_tricks.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) 23 | 24 | 25 | DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED 26 | DEFAULT_NP_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED 27 | 28 | 29 | -------------------------------------------------------------------------------- /json_tricks/np_utils.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This file exists for backward compatibility reasons. 4 | """ 5 | 6 | from .utils import hashodict, get_scalar_repr, encode_scalars_inplace 7 | from .utils import NoNumpyException 8 | from . import np 9 | 10 | # try: 11 | # from numpy import generic, complex64, complex128 12 | # except ImportError: 13 | # raise NoNumpyException('Could not load numpy, maybe it is not installed?') 14 | 15 | 16 | -------------------------------------------------------------------------------- /json_tricks/utils.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import io 3 | import warnings 4 | from collections import OrderedDict 5 | from functools import partial 6 | from importlib import import_module 7 | from sys import version_info, version 8 | 9 | 10 | class JsonTricksDeprecation(UserWarning): 11 | """ Special deprecation warning because the built-in one is ignored by default """ 12 | def __init__(self, msg): 13 | super(JsonTricksDeprecation, self).__init__(msg) 14 | 15 | 16 | class hashodict(OrderedDict): 17 | """ 18 | This dictionary is hashable. It should NOT be mutated, or all kinds of weird 19 | bugs may appear. This is not enforced though, it's only used for encoding. 20 | """ 21 | def __hash__(self): 22 | return hash(frozenset(self.items())) 23 | 24 | 25 | try: 26 | from inspect import signature 27 | except ImportError: 28 | try: 29 | from inspect import getfullargspec 30 | except ImportError: 31 | from inspect import getargspec, isfunction 32 | def get_arg_names(callable): 33 | if type(callable) == partial and version_info[0] == 2: 34 | if not hasattr(get_arg_names, '__warned_partial_argspec'): 35 | get_arg_names.__warned_partial_argspec = True 36 | warnings.warn("'functools.partial' and 'inspect.getargspec' are not compatible in this Python version; " 37 | "ignoring the 'partial' wrapper when inspecting arguments of {}, which can lead to problems".format(callable)) 38 | return set(getargspec(callable.func).args) 39 | if isfunction(callable): 40 | argspec = getargspec(callable) 41 | else: 42 | argspec = getargspec(callable.__call__) 43 | return set(argspec.args) 44 | else: 45 | #todo: this is not covered in test case (py 3+ uses `signature`, py2 `getfullargspec`); consider removing it 46 | def get_arg_names(callable): 47 | argspec = getfullargspec(callable) 48 | return set(argspec.args) | set(argspec.kwonlyargs) 49 | else: 50 | def get_arg_names(callable): 51 | sig = signature(callable) 52 | return set(sig.parameters.keys()) 53 | 54 | 55 | def filtered_wrapper(encoder): 56 | """ 57 | Filter kwargs passed to encoder. 58 | """ 59 | if hasattr(encoder, "default"): 60 | encoder = encoder.default 61 | elif not hasattr(encoder, '__call__'): 62 | raise TypeError('`obj_encoder` {0:} does not have `default` method and is not callable'.format(enc)) 63 | names = get_arg_names(encoder) 64 | 65 | def wrapper(*args, **kwargs): 66 | return encoder(*args, **{k: v for k, v in kwargs.items() if k in names}) 67 | return wrapper 68 | 69 | 70 | class NoNumpyException(Exception): 71 | """ Trying to use numpy features, but numpy cannot be found. """ 72 | 73 | 74 | class NoPandasException(Exception): 75 | """ Trying to use pandas features, but pandas cannot be found. """ 76 | 77 | 78 | class NoEnumException(Exception): 79 | """ Trying to use enum features, but enum cannot be found. """ 80 | 81 | 82 | class NoPathlibException(Exception): 83 | """ Trying to use pathlib features, but pathlib cannot be found. """ 84 | 85 | 86 | class ClassInstanceHookBase(object): 87 | def get_cls_from_instance_type(self, mod, name, cls_lookup_map): 88 | Cls = ValueError() 89 | if mod is None: 90 | try: 91 | Cls = getattr((__import__('__main__')), name) 92 | except (ImportError, AttributeError): 93 | if name not in cls_lookup_map: 94 | raise ImportError(('class {0:s} seems to have been exported from the main file, which means ' 95 | 'it has no module/import path set; you need to provide loads argument' 96 | '`cls_lookup_map={{"{0}": Class}}` to locate the class').format(name)) 97 | Cls = cls_lookup_map[name] 98 | else: 99 | imp_err = None 100 | try: 101 | module = import_module('{0:}'.format(mod, name)) 102 | except ImportError as err: 103 | imp_err = ('encountered import error "{0:}" while importing "{1:}" to decode a json file; perhaps ' 104 | 'it was encoded in a different environment where {1:}.{2:} was available').format(err, mod, name) 105 | else: 106 | if hasattr(module, name): 107 | Cls = getattr(module, name) 108 | else: 109 | imp_err = 'imported "{0:}" but could find "{1:}" inside while decoding a json file (found {2:})'.format( 110 | module, name, ', '.join(attr for attr in dir(module) if not attr.startswith('_'))) 111 | if imp_err: 112 | Cls = cls_lookup_map.get(name, None) 113 | if Cls is None: 114 | raise ImportError('{}; add the class to `cls_lookup_map={{"{}": Class}}` argument'.format(imp_err, name)) 115 | return Cls 116 | 117 | 118 | def get_scalar_repr(npscalar): 119 | return hashodict(( 120 | ('__ndarray__', npscalar.item()), 121 | ('dtype', str(npscalar.dtype)), 122 | ('shape', ()), 123 | )) 124 | 125 | 126 | def encode_scalars_inplace(obj): 127 | """ 128 | Searches a data structure of lists, tuples and dicts for numpy scalars 129 | and replaces them by their dictionary representation, which can be loaded 130 | by json-tricks. This happens in-place (the object is changed, use a copy). 131 | """ 132 | from numpy import generic, complex64, complex128 133 | if isinstance(obj, (generic, complex64, complex128)): 134 | return get_scalar_repr(obj) 135 | if isinstance(obj, dict): 136 | for key, val in tuple(obj.items()): 137 | obj[key] = encode_scalars_inplace(val) 138 | return obj 139 | if isinstance(obj, list): 140 | for k, val in enumerate(obj): 141 | obj[k] = encode_scalars_inplace(val) 142 | return obj 143 | if isinstance(obj, (tuple, set)): 144 | return type(obj)(encode_scalars_inplace(val) for val in obj) 145 | return obj 146 | 147 | 148 | def encode_intenums_inplace(obj): 149 | """ 150 | Searches a data structure of lists, tuples and dicts for IntEnum 151 | and replaces them by their dictionary representation, which can be loaded 152 | by json-tricks. This happens in-place (the object is changed, use a copy). 153 | """ 154 | from enum import IntEnum 155 | from json_tricks import encoders 156 | if isinstance(obj, IntEnum): 157 | return encoders.enum_instance_encode(obj) 158 | if isinstance(obj, dict): 159 | for key, val in obj.items(): 160 | obj[key] = encode_intenums_inplace(val) 161 | return obj 162 | if isinstance(obj, list): 163 | for index, val in enumerate(obj): 164 | obj[index] = encode_intenums_inplace(val) 165 | return obj 166 | if isinstance(obj, (tuple, set)): 167 | return type(obj)(encode_intenums_inplace(val) for val in obj) 168 | return obj 169 | 170 | 171 | def get_module_name_from_object(obj): 172 | mod = obj.__class__.__module__ 173 | if mod == '__main__': 174 | mod = None 175 | warnings.warn(('class {0:} seems to have been defined in the main file; unfortunately this means' 176 | ' that it\'s module/import path is unknown, so you might have to provide cls_lookup_map when ' 177 | 'decoding').format(obj.__class__)) 178 | return mod 179 | 180 | 181 | def nested_index(collection, indices): 182 | for i in indices: 183 | collection = collection[i] 184 | return collection 185 | 186 | 187 | def dict_default(dictionary, key, default_value): 188 | if key not in dictionary: 189 | dictionary[key] = default_value 190 | 191 | 192 | def gzip_compress(data, compresslevel): 193 | """ 194 | Do gzip compression, without the timestamp. Similar to gzip.compress, but without timestamp, and also before py3.2. 195 | """ 196 | buf = io.BytesIO() 197 | with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel, mtime=0) as fh: 198 | fh.write(data) 199 | return buf.getvalue() 200 | 201 | 202 | def gzip_decompress(data): 203 | """ 204 | Do gzip decompression, without the timestamp. Just like gzip.decompress, but that's py3.2+. 205 | """ 206 | with gzip.GzipFile(fileobj=io.BytesIO(data)) as f: 207 | return f.read() 208 | 209 | 210 | is_py3 = (version[:2] == '3.') 211 | str_type = str if is_py3 else (basestring, unicode,) 212 | 213 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | [metadata] 4 | description-file = README.rst 5 | license_file = LICENSE.txt 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from sys import version_info 4 | import warnings 5 | 6 | from setuptools import setup 7 | 8 | with open('README.md', 'r') as fh: 9 | readme = fh.read() 10 | 11 | # with open('json_tricks/_version.py', 'r') as fh: 12 | # version = fh.read().strip() 13 | from json_tricks._version import VERSION 14 | 15 | requires = [] 16 | if version_info < (2, 7, 0): 17 | requires.append('ordereddict') 18 | 19 | if (version_info[0] == 2 and version_info[1] < 7) or \ 20 | (version_info[0] == 3 and version_info[1] < 4) or \ 21 | version_info[0] not in (2, 3): 22 | raise warnings.warn('`json_tricks` does not support Python version {}.{}' 23 | .format(version_info[0], version_info[1])) 24 | 25 | setup( 26 | name='json_tricks', 27 | description='Extra features for Python\'s JSON: comments, order, numpy, ' 28 | 'pandas, datetimes, and many more! Simple but customizable.', 29 | long_description_content_type='text/markdown', 30 | long_description=readme, 31 | url='https://github.com/mverleg/pyjson_tricks', 32 | author='Mark V', 33 | maintainer='Mark V', 34 | author_email='markv.nl.dev@gmail.com', 35 | license='Revised BSD License (LICENSE.txt)', 36 | keywords=['json', 'numpy', 'OrderedDict', 'comments', 'pandas', 'pytz', 37 | 'enum', 'encode', 'decode', 'serialize', 'deserialize'], 38 | version=VERSION, 39 | packages=['json_tricks'], 40 | package_data=dict( 41 | json_tricks=['LICENSE.txt', 'README.md', 'VERSION'], 42 | # tests=['tests/*.py'], 43 | ), 44 | # include_package_data=True, 45 | zip_safe=True, 46 | classifiers=[ 47 | 'Development Status :: 5 - Production/Stable', 48 | 'Development Status :: 6 - Mature', 49 | 'Intended Audience :: Developers', 50 | 'Natural Language :: English', 51 | 'License :: OSI Approved :: BSD License', 52 | 'Operating System :: OS Independent', 53 | 'Programming Language :: Python', 54 | 'Programming Language :: Python :: 2', 55 | 'Programming Language :: Python :: 2.7', 56 | 'Programming Language :: Python :: 3', 57 | 'Programming Language :: Python :: 3.4', 58 | 'Programming Language :: Python :: 3.5', 59 | 'Programming Language :: Python :: 3.6', 60 | 'Programming Language :: Python :: 3.7', 61 | 'Programming Language :: Python :: 3.8', 62 | 'Programming Language :: Python :: 3.9', 63 | 'Programming Language :: Python :: Implementation :: CPython', 64 | 'Programming Language :: Python :: Implementation :: PyPy', 65 | 'Topic :: Software Development :: Libraries :: Python Modules', 66 | # 'Topic :: Utilities', 67 | ], 68 | install_requires=requires, 69 | ) 70 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mverleg/pyjson_tricks/8c3817043250e501f9fefd4dff6826dd3b2960eb/tests/__init__.py -------------------------------------------------------------------------------- /tests/run_locally.md: -------------------------------------------------------------------------------- 1 | # How to run tests locally 2 | 3 | If you want to, you can run the automated tests before using the code. 4 | 5 | ## Note 6 | 7 | The tests run automatically on the supported versions of Python for 8 | every commit. You can check the Github Actions result at the bottom of 9 | the README on Github. 10 | 11 | ## Run current verison 12 | 13 | To run py.test for current Python version, install requirements: 14 | 15 | pip install numpy pytz pandas pathlib ordereddict pytest-coverage 16 | 17 | To run all the tests (requiring you to have all the packages mentioned): 18 | 19 | py.test --continue-on-collection-errors 20 | 21 | Using this flag, you will get a failure message when e.g. `pandas` is 22 | missing, but the other tests will still run. 23 | 24 | ## Example output 25 | 26 | Output if all tests pass: 27 | 28 | platform linux -- Python 3.6.8, pytest-5.3.1, py-1.8.1, pluggy-0.13.1 29 | rootdir: /home/mark/pyjson_tricks 30 | plugins: cov-2.10.1 31 | collected 80 items 32 | 33 | tests/test_bare.py ....................................... [ 48%] 34 | tests/test_enum.py ....... [ 57%] 35 | tests/test_meta.py . [ 58%] 36 | tests/test_np.py ....................... [ 87%] 37 | tests/test_pandas.py ... [ 91%] 38 | tests/test_pathlib.py . [ 92%] 39 | tests/test_tz.py ... [ 96%] 40 | tests/test_utils.py ... [100%] 41 | 42 | 80 passed, 4 warnings in 0.41s -------------------------------------------------------------------------------- /tests/test_bare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import OrderedDict 5 | from datetime import datetime, time, date, timedelta 6 | from decimal import Decimal 7 | from fractions import Fraction 8 | from functools import partial 9 | from io import BytesIO, StringIO 10 | from math import pi, exp 11 | from os.path import join 12 | from tempfile import mkdtemp 13 | from warnings import catch_warnings, simplefilter 14 | 15 | import pytest 16 | from pytest import raises, fail, warns 17 | 18 | from json_tricks import fallback_ignore_unknown, DuplicateJsonKeyException 19 | from json_tricks.nonp import strip_comments, dump, dumps, load, loads, \ 20 | ENCODING 21 | from json_tricks.utils import is_py3, gzip_compress, JsonTricksDeprecation, str_type 22 | from .test_class import MyTestCls, CustomEncodeCls, SubClass, SuperClass, SlotsBase, SlotsDictABC, SlotsStr, \ 23 | SlotsABCDict, SlotsABC 24 | 25 | nonpdata = { 26 | 'my_array': list(range(20)), 27 | 'my_map': dict((chr(k), k) for k in range(97, 123)), 28 | 'my_string': 'Hello world!', 29 | 'my_float': 3.1415, 30 | 'my_int': 42 31 | } 32 | 33 | 34 | def test_dumps_loads(): 35 | json = dumps(nonpdata) 36 | data2 = loads(json) 37 | assert nonpdata == data2 38 | 39 | 40 | def test_file_handle(): 41 | path = join(mkdtemp(), 'pytest-nonp.json') 42 | with open(path, 'wb+') as fh: 43 | dump(nonpdata, fh, compression=6) 44 | with open(path, 'rb') as fh: 45 | data2 = load(fh, decompression=True) 46 | assert data2 == nonpdata 47 | with open(path, 'rb') as fh: 48 | data3 = load(fh, decompression=None) # test autodetect gzip 49 | assert data3 == nonpdata 50 | 51 | 52 | def test_mix_handle_str_path(): 53 | # Based on issue 68 54 | data = {"fun": 1.1234567891234567e-13} 55 | path = join(mkdtemp(), 'test_mix_handle_str_path.json') 56 | dump(data, open(path, "w")) 57 | back = load(path) 58 | assert data == back 59 | 60 | 61 | def test_wrong_arg_order(): 62 | # Based on a problem from https://github.com/mverleg/array_storage_benchmark 63 | li = [[1.0, 2.0], [3.0, 4.0]] 64 | map = {"a": 1} 65 | path = join(mkdtemp(), 'pytest-np.json.gz') 66 | msg = 'json-tricks dump arguments are in the wrong order: provide the data to be serialized before file handle' 67 | with raises(ValueError) as ex: 68 | with open(path, 'wb+') as fh: 69 | dump(fh, li) 70 | assert msg in ex.value.args[0] 71 | with raises(ValueError) as ex: 72 | dump(path, li) 73 | assert msg in ex.value.args[0] 74 | with raises(ValueError) as ex: 75 | with open(path, 'wb+') as fh: 76 | dump(fh, map) 77 | assert msg in ex.value.args[0] 78 | with raises(ValueError) as ex: 79 | dump(path, map) 80 | assert msg in ex.value.args[0] 81 | 82 | 83 | def test_mix_handle_bin_path(): 84 | # Based on issue 68 85 | data = {"fun": 1.1234567891234567e-13} 86 | path = join(mkdtemp(), 'test_mix_handle_bin_path.json') 87 | if is_py3: 88 | with raises(TypeError): 89 | dump(data, open(path, "wb")) 90 | 91 | 92 | def test_mix_path_handle(): 93 | # Based on issue 68 94 | data = {"fun": 1.1234567891234567e-13} 95 | path = join(mkdtemp(), 'test_mix_path_handle.json') 96 | dump(data, path) 97 | 98 | 99 | def test_file_handle_types(): 100 | path = join(mkdtemp(), 'pytest-text.json') 101 | for conv_str_byte in [True, False]: 102 | with open(path, 'w+') as fh: 103 | dump(nonpdata, fh, compression=False, conv_str_byte=conv_str_byte) 104 | with open(path, 'r') as fh: 105 | assert load(fh, conv_str_byte=conv_str_byte) == nonpdata 106 | with StringIO() as fh: 107 | dump(nonpdata, fh, conv_str_byte=conv_str_byte) 108 | fh.seek(0) 109 | assert load(fh, conv_str_byte=conv_str_byte) == nonpdata 110 | with BytesIO() as fh: 111 | with raises(TypeError): 112 | dump(nonpdata, fh) 113 | with BytesIO() as fh: 114 | dump(nonpdata, fh, conv_str_byte=True) 115 | fh.seek(0) 116 | assert load(fh, conv_str_byte=True) == nonpdata 117 | if is_py3: 118 | with open(path, 'w+') as fh: 119 | with raises(IOError): 120 | dump(nonpdata, fh, compression=6) 121 | 122 | 123 | def test_file_path(): 124 | path = join(mkdtemp(), 'pytest-nonp.json') 125 | dump(nonpdata, path, compression=6) 126 | data2 = load(path, decompression=True) 127 | assert data2 == nonpdata 128 | data3 = load(path, decompression=None) # autodetect gzip 129 | assert data3 == nonpdata 130 | 131 | 132 | test_json_with_comments = """{ # "comment 1 133 | "hello": "Wor#d", "Bye": "\\"M#rk\\"", "yes\\\\\\"": 5,# comment" 2 134 | "quote": "\\"th#t's\\" what she said", # comment "3" 135 | "list": [1, 1, "#", "\\"", "\\\\", 8], "dict": {"q": 7} #" comment 4 with quotes 136 | } 137 | # comment 5""" 138 | 139 | test_json_without_comments = """{ 140 | "hello": "Wor#d", "Bye": "\\"M#rk\\"", "yes\\\\\\"": 5, 141 | "quote": "\\"th#t's\\" what she said", 142 | "list": [1, 1, "#", "\\"", "\\\\", 8], "dict": {"q": 7} 143 | } 144 | """ 145 | 146 | test_object_for_comment_strings = { 147 | "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5, 148 | "quote": "\"th#t's\" what she said", 149 | "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} 150 | } 151 | 152 | test_json_duplicates = """{"test": 42, "test": 37}""" 153 | 154 | 155 | def test_strip_comments(): 156 | valid = strip_comments(test_json_with_comments) 157 | assert valid == test_json_without_comments 158 | valid = strip_comments(test_json_with_comments.replace('#', '//')) 159 | assert valid == test_json_without_comments.replace('#', '//') 160 | 161 | 162 | def test_ignore_comments_deprecation(): 163 | # https://github.com/mverleg/pyjson_tricks/issues/74 164 | 165 | # First time should have deprecation warning 166 | loads._ignore_comments_warned_ = False 167 | with warns(JsonTricksDeprecation): 168 | loads(test_json_with_comments) 169 | 170 | # Second time there should be no warning 171 | with catch_warnings(): 172 | simplefilter("error") 173 | loaded = loads(test_json_with_comments) 174 | assert loaded == test_object_for_comment_strings 175 | 176 | # Passing a string without comments should not have a warning 177 | loads._ignore_comments_warned_ = False 178 | with catch_warnings(): 179 | simplefilter("error") 180 | loaded = loads(test_json_without_comments) 181 | 182 | # Passing True for argument explicitly should not have a warning 183 | loads._ignore_comments_warned_ = False 184 | with catch_warnings(): 185 | simplefilter("error") 186 | loaded = loads(test_json_with_comments, ignore_comments=True) 187 | assert loaded == test_object_for_comment_strings 188 | 189 | # Passing False for argument explicitly should not have a warning 190 | loads._ignore_comments_warned_ = False 191 | with catch_warnings(): 192 | simplefilter("error") 193 | loaded = loads(test_json_without_comments, ignore_comments=False) 194 | assert loaded == test_object_for_comment_strings 195 | 196 | 197 | ordered_map = OrderedDict(( 198 | ('elephant', None), 199 | ('chicken', None), 200 | ('dolphin', None), 201 | ('wild boar', None), 202 | ('grasshopper', None), 203 | ('tiger', None), 204 | ('buffalo', None), 205 | ('killer whale', None), 206 | ('eagle', None), 207 | ('tortoise', None), 208 | )) 209 | 210 | 211 | def test_string_compression(): 212 | json = dumps(ordered_map, compression=3) 213 | assert json[:2] == b'\x1f\x8b' 214 | data2 = loads(json, decompression=True) 215 | assert ordered_map == data2 216 | data3 = loads(json, decompression=None) 217 | assert ordered_map == data3 218 | 219 | 220 | def test_flush_no_errors(): 221 | # just tests that flush doesn't cause problems; checking actual flushing is too messy. 222 | path = join(mkdtemp(), 'pytest-nonp.json') 223 | with open(path, 'wb+') as fh: 224 | dump(nonpdata, fh, compression=True, force_flush=True) 225 | with open(path, 'rb') as fh: 226 | data2 = load(fh, decompression=True) 227 | assert data2 == nonpdata 228 | # flush non-file IO 229 | sh = BytesIO() 230 | try: 231 | dump(ordered_map, fp=sh, compression=True, force_flush=True) 232 | finally: 233 | sh.close() 234 | 235 | 236 | def test_compression_with_comments(): 237 | if is_py3: 238 | test_json = bytes(test_json_with_comments, encoding=ENCODING) 239 | else: 240 | test_json = test_json_with_comments 241 | json = gzip_compress(test_json, compresslevel=9) 242 | ref = loads(test_json_without_comments) 243 | data2 = loads(json, decompression=True) 244 | assert ref == data2 245 | data3 = loads(json, decompression=None) 246 | assert ref == data3 247 | 248 | 249 | def test_hooks_called_once_if_no_comments(): 250 | call_count = [0] 251 | def counting_hook(obj, *args): 252 | call_count[0] += 1 253 | return obj 254 | result = loads('{"abc": 123}', ignore_comments=None, extra_obj_pairs_hooks=(counting_hook,)) 255 | assert result == {"abc": 123} 256 | assert call_count[0] == 1 257 | 258 | 259 | def test_hooks_called_once_if_comment_before(): 260 | call_count = [0] 261 | def counting_hook(obj, *args): 262 | call_count[0] += 1 263 | return obj 264 | result = loads('// comment\n{"abc": 123}', ignore_comments=None, extra_obj_pairs_hooks=(counting_hook,)) 265 | assert result == {"abc": 123} 266 | assert call_count[0] == 1 267 | 268 | 269 | def test_hooks_called_twice_if_comment_after(): 270 | call_count = [0] 271 | def counting_hook(obj, *args): 272 | call_count[0] += 1 273 | return obj 274 | result = loads('{"abc": 123} // comment', ignore_comments=None, extra_obj_pairs_hooks=(counting_hook,)) 275 | assert result == {"abc": 123} 276 | assert call_count[0] == 2 277 | 278 | 279 | def test_order(): 280 | json = dumps(ordered_map) 281 | data2 = loads(json, preserve_order=True) 282 | assert tuple(ordered_map.keys()) == tuple(data2.keys()) 283 | reverse = OrderedDict(reversed(tuple(ordered_map.items()))) 284 | json = dumps(reverse) 285 | data3 = loads(json, preserve_order=True) 286 | assert tuple(reverse.keys()) == tuple(data3.keys()) 287 | json = dumps(ordered_map) 288 | data4 = loads(json, preserve_order=False) 289 | assert not isinstance(data4, OrderedDict) 290 | 291 | 292 | cls_instance = MyTestCls(s='ub', dct={'7': 7}) 293 | cls_instance_custom = CustomEncodeCls() 294 | 295 | 296 | def test_cls_instance_default(): 297 | json = dumps(cls_instance) 298 | back = loads(json) 299 | assert (cls_instance.s == back.s) 300 | assert (cls_instance.dct == dict(back.dct)) 301 | json = dumps(cls_instance, primitives=True) 302 | back = loads(json) 303 | assert tuple(sorted(back.keys())) == ('dct', 's',) 304 | assert '7' in back['dct'] 305 | 306 | 307 | def test_cls_instance_custom(): 308 | json = dumps(cls_instance_custom) 309 | back = loads(json) 310 | assert (cls_instance_custom.relevant == back.relevant) 311 | assert (cls_instance_custom.irrelevant == 37) 312 | assert (back.irrelevant == 12) 313 | json = dumps(cls_instance_custom, primitives=True) 314 | back = loads(json) 315 | assert (cls_instance_custom.relevant == back['relevant']) 316 | assert (cls_instance_custom.irrelevant == 37) 317 | assert 'irrelevant' not in back 318 | 319 | 320 | def test_cls_instance_local(): 321 | json = '{"__instance_type__": [null, "CustomEncodeCls"], "attributes": {"relevant": 137}}' 322 | loads(json, cls_lookup_map=globals()) 323 | 324 | 325 | def test_cls_instance_inheritance(): 326 | inst = SubClass() 327 | json = dumps(inst) 328 | assert '42' not in json 329 | back = loads(json) 330 | assert inst == back 331 | inst.set_attr() 332 | json = dumps(inst) 333 | assert '42' in json 334 | back = loads(json) 335 | assert inst == back 336 | 337 | 338 | def test_cls_attributes_unchanged(): 339 | """ 340 | Test that class attributes are not restored. This would be undesirable, 341 | because deserializing one instance could impact all other existing ones. 342 | """ 343 | SuperClass.cls_attr = 37 344 | inst = SuperClass() 345 | json = dumps(inst) 346 | assert '37' not in json 347 | SuperClass.cls_attr = 42 348 | back = loads(json) 349 | assert inst == back 350 | assert inst.cls_attr == back.cls_attr == 42 351 | SuperClass.cls_attr = 37 352 | 353 | 354 | def test_cls_lookup_map_fail(): 355 | class LocalCls(object): 356 | def __init__(self, val): 357 | self.value = val 358 | original = [LocalCls(37), LocalCls(42)] 359 | txt = dumps(original) 360 | with raises(ImportError) as err: 361 | loads(txt) 362 | assert 'LocalCls' in str(err.value) 363 | assert 'cls_lookup_map' in str(err.value) 364 | with raises(ImportError) as err: 365 | loads(txt, cls_lookup_map=globals()) 366 | assert 'LocalCls' in str(err.value) 367 | assert 'cls_lookup_map' in str(err.value) 368 | 369 | 370 | def test_cls_lookup_map_success(): 371 | class LocalCls(object): 372 | def __init__(self, val): 373 | self.value = val 374 | original = [LocalCls(37), LocalCls(42)] 375 | txt = dumps(original) 376 | back = loads(txt, cls_lookup_map=dict(LocalCls=LocalCls)) 377 | assert len(original) == len(back) == 2 378 | assert original[0].value == back[0].value 379 | assert original[1].value == back[1].value 380 | back = loads(txt, properties=dict(cls_lookup_map=dict(LocalCls=LocalCls))) 381 | assert len(original) == len(back) == 2 382 | assert original[0].value == back[0].value 383 | assert original[1].value == back[1].value 384 | 385 | 386 | def test_cls_slots(): 387 | slots = [SlotsBase(), SlotsDictABC(), SlotsStr(), SlotsABCDict(), SlotsABC()] 388 | txt = dumps(slots) 389 | res = loads(txt) 390 | for inputobj, outputobj in zip(slots, res): 391 | assert isinstance(outputobj, SlotsBase) 392 | assert inputobj == outputobj 393 | referenceobj = SlotsBase() 394 | for outputobj in res[1:]: 395 | assert outputobj != referenceobj 396 | 397 | 398 | def test_duplicates(): 399 | loads(test_json_duplicates, allow_duplicates=True) 400 | with raises(DuplicateJsonKeyException): 401 | loads(test_json_duplicates, allow_duplicates=False) 402 | 403 | 404 | def test_complex_number(): 405 | objs = ( 406 | 4.2 + 3.7j, 407 | 1j, 408 | 1 + 0j, 409 | -999999.9999999 - 999999.9999999j, 410 | ) 411 | for obj in objs: 412 | json = dumps(obj) 413 | back = loads(json) 414 | assert back == obj, 'json en/decoding failed for complex number {0:}'.format(obj) 415 | json = dumps(obj, primitives=True) 416 | back = loads(json) 417 | assert back == [obj.real, obj.imag] 418 | assert complex(*back) == obj 419 | txt = '{"__complex__": [4.2, 3.7]}' 420 | obj = loads(txt) 421 | assert obj == 4.2 + 3.7j 422 | 423 | 424 | def test_float_precision(): 425 | json = dumps([pi]) 426 | back = loads(json) 427 | assert back[0] - pi == 0, 'Precision lost while encoding and decoding float.' 428 | 429 | 430 | def test_set(): 431 | setdata = [{'set': set((3, exp(1), (-5, +7), False))}] 432 | json = dumps(setdata) 433 | back = loads(json) 434 | assert isinstance(back[0]['set'], set) 435 | assert setdata == back 436 | json = dumps(setdata, primitives=True) 437 | back = loads(json) 438 | assert isinstance(back[0]['set'], list) 439 | assert setdata[0]['set'] == set(tuple(q) if isinstance(q, list) else q for q in back[0]['set']) 440 | 441 | 442 | def test_special_nr_parsing(): 443 | nr_li_json = '[1, 3.14]' 444 | res = loads(nr_li_json, 445 | parse_int=lambda s: int('7' + s), 446 | parse_float=lambda s: float('5' + s) 447 | ) 448 | assert res == [71, 53.14], 'Special integer and/or float parsing not working' 449 | nr_li_json = '[1, 3.14]' 450 | res = loads(nr_li_json, 451 | parse_int=Decimal, 452 | parse_float=Decimal 453 | ) 454 | assert isinstance(res[0], Decimal) 455 | assert isinstance(res[1], Decimal) 456 | 457 | 458 | def test_special_floats(): 459 | """ 460 | The official json standard doesn't support infinity or NaN, but the Python implementation does. 461 | """ 462 | special_floats = [float('NaN'), float('Infinity'), -float('Infinity'), float('+0'), float('-0')] 463 | txt = dumps(special_floats, allow_nan=True) 464 | assert txt == "[NaN, Infinity, -Infinity, 0.0, -0.0]" 465 | res = loads(txt) 466 | for x, y in zip(special_floats, res): 467 | """ Use strings since `+0 == -1` and `NaN != NaN` """ 468 | assert str(x) == str(y) 469 | with raises(ValueError): 470 | dumps(special_floats, allow_nan=False) 471 | with raises(ValueError): 472 | dumps(special_floats) 473 | 474 | 475 | def test_decimal(): 476 | decimals = [Decimal(0), Decimal(-pi), Decimal('9999999999999999999999999999999999999999999999999999'), 477 | Decimal('NaN'), Decimal('Infinity'), -Decimal('Infinity'), Decimal('+0'), Decimal('-0')] 478 | txt = dumps(decimals) 479 | res = loads(txt) 480 | for x, y in zip(decimals, res): 481 | assert isinstance(y, Decimal) 482 | assert x == y or x.is_nan() 483 | assert str(x) == str(y) 484 | 485 | 486 | def test_decimal_primitives(): 487 | decimals = [Decimal(0), Decimal(-pi), Decimal('9999999999999')] 488 | txt = dumps(decimals, primitives=True) 489 | res = loads(txt) 490 | for x, y in zip(decimals, res): 491 | assert isinstance(y, float) 492 | assert x == y or x.is_nan() 493 | 494 | 495 | def test_fraction(): 496 | fractions = [Fraction(0), Fraction(1, 3), Fraction(-pi), Fraction('1/3'), Fraction('1/3') / Fraction('1/6'), 497 | Fraction('9999999999999999999999999999999999999999999999999999'), Fraction('1/12345678901234567890123456789'),] 498 | txt = dumps(fractions) 499 | res = loads(txt) 500 | for x, y in zip(fractions, res): 501 | assert isinstance(y, Fraction) 502 | assert x == y 503 | assert str(x) == str(y) 504 | txt = dumps(fractions, primitives=True) 505 | res = loads(txt) 506 | for x, y in zip(fractions, res): 507 | assert isinstance(y, float) 508 | assert abs(x - y) < 1e-10 509 | 510 | 511 | DTOBJ = [ 512 | datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7), 513 | date(year=1988, month=3, day=15), 514 | time(hour=8, minute=3, second=59, microsecond=123), 515 | timedelta(days=2, seconds=3599), 516 | ] 517 | 518 | 519 | def test_naive_date_time(): 520 | json = dumps(DTOBJ) 521 | back = loads(json) 522 | assert DTOBJ == back 523 | for orig, bck in zip(DTOBJ, back): 524 | assert orig == bck 525 | assert type(orig) == type(bck) 526 | txt = '{"__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, ' \ 527 | '"second": 59, "microsecond": 7}' 528 | obj = loads(txt) 529 | assert obj == datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7) 530 | 531 | 532 | def test_primitive_naive_date_time(): 533 | json = dumps(DTOBJ, primitives=True) 534 | back = loads(json) 535 | for orig, bck in zip(DTOBJ, back): 536 | if isinstance(bck, (date, time, datetime,)): 537 | assert isinstance(bck, str_type) 538 | assert bck == orig.isoformat() 539 | elif isinstance(bck, (timedelta,)): 540 | assert isinstance(bck, float) 541 | assert bck == orig.total_seconds() 542 | dt = datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7) 543 | assert dumps(dt, primitives=True).strip('"') == '1988-03-15T08:03:59.000007' 544 | 545 | 546 | def test_str_unicode_bytes(): 547 | text, obj = u'{"mykey": "你好"}', {"mykey": u"你好"} 548 | assert loads(text) == obj 549 | if is_py3: 550 | with raises(TypeError) as err: 551 | loads(text.encode('utf-8')) 552 | if 'ExceptionInfo' in str(type(err)): 553 | # This check is needed because the type of err varies between versions 554 | # For some reason, isinstance(..., py.code.ExceptionInfo) does not work 555 | err = err.value 556 | assert 'The input was of non-string type' in str(err) 557 | assert loads(text.encode('utf-8'), conv_str_byte=True) == obj 558 | else: 559 | assert loads('{"mykey": "nihao"}') == {'mykey': 'nihao'} 560 | 561 | 562 | def with_nondict_hook(): 563 | """ Add a custom hook, to test that all future hooks handle non-dicts. """ 564 | # Prevent issue 26 from coming back. 565 | def test_hook(dct): 566 | if not isinstance(dct, dict): 567 | return 568 | return ValueError() 569 | loads('{"key": 42}', extra_obj_pairs_hooks=(test_hook,)) 570 | 571 | 572 | def test_custom_enc_dec(): 573 | """ Test using a custom encoder/decoder. """ 574 | def silly_enc(obj): 575 | return {"val": 42} 576 | def silly_dec(dct): 577 | if not isinstance(dct, dict): 578 | return dct 579 | return [37] 580 | txt = dumps(lambda x: x * 2, extra_obj_encoders=(silly_enc,)) 581 | assert txt == '{"val": 42}' 582 | back = loads(txt, extra_obj_pairs_hooks=(silly_dec,)) 583 | assert back == [37] 584 | 585 | 586 | def test_lambda_partial(): 587 | """ Test that a custom encoder/decoder works when wrapped in functools.partial, 588 | which caused problems before because inspect.getargspec does not support it. """ 589 | obj = dict(alpha=37.42, beta=[1, 2, 4, 8, 16, 32]) 590 | enc_dec_lambda = partial(lambda x, y: x, y=0) 591 | txt = dumps(obj, extra_obj_encoders=(enc_dec_lambda,)) 592 | back = loads(txt, extra_obj_pairs_hooks=(enc_dec_lambda,)) 593 | assert obj == back 594 | def enc_dec_fun(obj, primitives=False, another=True): 595 | return obj 596 | txt = dumps(obj, extra_obj_encoders=(partial(enc_dec_fun, another=True),)) 597 | back = loads(txt, extra_obj_pairs_hooks=(partial(enc_dec_fun, another=True),)) 598 | assert obj == back 599 | 600 | 601 | def test_hooks_not_too_eager(): 602 | from threading import RLock 603 | with raises(TypeError): 604 | dumps([RLock()]) 605 | # TypeError did not get raised, so show a message 606 | # (https://github.com/pytest-dev/pytest/issues/3974) 607 | fail('There is no hook to serialize RLock, so this should fail, ' 608 | 'otherwise some hook is too eager.') 609 | 610 | 611 | def test_fallback_hooks(): 612 | from threading import RLock 613 | 614 | json = dumps(OrderedDict(( 615 | ('li', [1, 2, 3]), 616 | ('lock', RLock()), 617 | )), fallback_encoders=[fallback_ignore_unknown]) 618 | bck = loads(json) 619 | assert bck == OrderedDict(( 620 | ('li', [1, 2, 3]), 621 | ('lock', None), 622 | )) 623 | 624 | 625 | def test_empty_string_with_url(): 626 | """ Originally for https://github.com/mverleg/pyjson_tricks/issues/51 """ 627 | txt = '{"foo": "", "bar": "http://google.com"}' 628 | assert txt == strip_comments(txt), strip_comments(txt) 629 | txt = '{"foo": "", "bar": "http://google.com"}' 630 | assert txt == dumps(loads(txt, ignore_comments=False)) 631 | assert txt == dumps(loads(txt, ignore_comments=True)) 632 | txt = '{"a": "", "b": "//", "c": ""}' 633 | assert txt == dumps(loads(txt)) 634 | txt = '{"a": "", "b": "/*", "c": ""}' 635 | assert txt == dumps(loads(txt)) 636 | txt = '{"//": "//"}' 637 | assert txt == dumps(loads(txt)) 638 | txt = '{"///": "////*/*"}' 639 | assert txt == dumps(loads(txt)) 640 | 641 | 642 | def test_no_cls(): 643 | """ Originally for https://github.com/mverleg/pyjson_tricks/issues/79 """ 644 | data = dict(name='Leonardo da Vinci', year=1452) 645 | path = join(mkdtemp(), 'pytest-no-cls.json') 646 | with open(path, 'wb+') as fh: 647 | dump(data, fh, cls=None, compression=1) 648 | with open(path, 'rb') as fh: 649 | bck = load(fh) 650 | assert data == bck 651 | txt = dumps(data, cls=None, compression=2) 652 | bck = loads(txt) 653 | assert data == bck 654 | 655 | 656 | @pytest.mark.skipif(condition=not is_py3, reason='encoding bytes not supported on python 2') 657 | def test_utf8_bytes(): 658 | inputs = [ 659 | b'hello world', 660 | b'', 661 | b'\n', 662 | u'你好'.encode('utf-8', 'ignore'), 663 | b'"', 664 | b"''", 665 | ] 666 | json = dumps(inputs) 667 | assert '__bytes_utf8__' in json 668 | assert '__bytes_b64__' not in json 669 | json_bytes = json.encode('utf-8', 'ignore') 670 | assert inputs[0] in json_bytes 671 | bck = loads(json) 672 | assert inputs == bck 673 | 674 | 675 | @pytest.mark.skipif(condition=not is_py3, reason='encoding bytes not supported on python 2') 676 | def test_nonutf8_bytes(): 677 | inputs = [ 678 | b'\xc3\x28', 679 | b'\xa0\xa1', 680 | b'\xe2\x28\xa1', 681 | b'\xe2\x82\x28', 682 | b'\xf0\x28\x8c\xbc', 683 | b'\xf0\x90\x28\xbc', 684 | b'\xf0\x28\x8c\x28', 685 | ] 686 | json = dumps(inputs) 687 | assert '__bytes_utf8__' not in json 688 | assert '__bytes_b64__' in json 689 | json_bytes = json.encode('utf-8', 'ignore') 690 | for input in inputs: 691 | assert input not in json_bytes 692 | bck = loads(json) 693 | assert inputs == bck 694 | 695 | 696 | @pytest.mark.skipif(condition=not is_py3, reason='encoding bytes not supported on python 2') 697 | def test_bytes_primitive_repr(): 698 | inp = [u'hello = 你好'.encode('utf-8', 'ignore')] 699 | assert inp[0] == b'hello = \xe4\xbd\xa0\xe5\xa5\xbd' 700 | json = dumps(inp, primitives=True) 701 | assert json == '[{"__bytes_b64__": "aGVsbG8gPSDkvaDlpb0="}]' 702 | bck = loads(json) 703 | assert inp == bck 704 | 705 | -------------------------------------------------------------------------------- /tests/test_class.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import weakref 5 | from json_tricks import dumps, loads 6 | 7 | 8 | class MyTestCls(object): 9 | def __init__(self, **kwargs): 10 | for k, v in kwargs.items(): 11 | setattr(self, k, v) 12 | 13 | def __repr__(self): 14 | return 'A<{0:}>'.format(', '.join('{0:s}={1:}'.format(k, v) for k, v in self.__dict__.items())) 15 | 16 | 17 | class CustomEncodeCls(MyTestCls): 18 | def __init__(self, **kwargs): 19 | super(CustomEncodeCls, self).__init__(**kwargs) 20 | self.relevant = 42 21 | self.irrelevant = 37 22 | 23 | def __json_encode__(self): 24 | return {'relevant': self.relevant} 25 | 26 | def __json_decode__(self, **attrs): 27 | self.relevant = attrs['relevant'] 28 | self.irrelevant = 12 29 | 30 | 31 | class SuperClass(object): 32 | cls_attr = 37 33 | 34 | def __init__(self): 35 | self.attr = None 36 | 37 | def __eq__(self, other): 38 | return self.__class__ == other.__class__ and self.__dict__ == other.__dict__ 39 | 40 | 41 | class SubClass(SuperClass): 42 | def set_attr(self): 43 | self.attr = 42 44 | 45 | 46 | class SlotsBase(object): 47 | __slots__ = [] 48 | 49 | def __eq__(self, other): 50 | if self.__class__ != other.__class__: 51 | return False 52 | slots = self.__class__.__slots__ 53 | if isinstance(slots,str): 54 | slots = [slots] 55 | return all(getattr(self, i) == getattr(other, i) for i in slots) 56 | 57 | 58 | class SlotsDictABC(SlotsBase): 59 | __slots__ = ['__dict__'] 60 | 61 | def __init__(self, a='a', b='b', c='c'): 62 | self.a = a 63 | self.b = b 64 | self.c = c 65 | 66 | 67 | class SlotsStr(SlotsBase): 68 | __slots__ = 'name' 69 | 70 | def __init__(self, name='name'): 71 | self.name = name 72 | 73 | 74 | class SlotsABCDict(SlotsBase): 75 | __slots__ = ['a','b','c','__dict__'] 76 | 77 | def __init__(self, a='a', b='b', c='c'): 78 | self.a = a 79 | self.b = b 80 | self.c = c 81 | 82 | 83 | class SlotsABC(SlotsBase): 84 | __slots__ = ['a','b','c'] 85 | 86 | def __init__(self, a='a', b='b', c='c'): 87 | self.a = a 88 | self.b = b 89 | self.c = c 90 | 91 | 92 | def test_slots_weakref(): 93 | """ Issue with attrs library due to __weakref__ in __slots__ https://github.com/mverleg/pyjson_tricks/issues/82 """ 94 | class TestClass(object): 95 | __slots__ = "value", "__weakref__" 96 | def __init__(self, value): 97 | self.value = value 98 | 99 | obj = TestClass(value=7) 100 | json = dumps(obj) 101 | assert '__weakref__' not in json 102 | decoded = loads(json, cls_lookup_map=dict(TestClass=TestClass)) 103 | assert obj.value == decoded.value 104 | 105 | 106 | def test_pure_weakref(): 107 | """ Check that the issue in `test_slots_weakref` does not happen without __slots__ """ 108 | obj = MyTestCls(value=7) 109 | ref = weakref.ref(obj) 110 | json = dumps(obj) 111 | decoded = loads(json) 112 | assert str(obj) == str(decoded) 113 | # noinspection PyUnusedLocal 114 | obj = None 115 | assert ref() is None 116 | 117 | -------------------------------------------------------------------------------- /tests/test_enum.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | from datetime import datetime 6 | from functools import partial 7 | from enum import Enum, IntEnum 8 | from json_tricks import dumps, loads, encode_intenums_inplace 9 | from json_tricks.encoders import enum_instance_encode 10 | 11 | 12 | PY2 = sys.version_info[0] == 2 13 | 14 | 15 | class MyEnum(Enum): 16 | member1 = 'VALUE1' 17 | member2 = 'VALUE2' 18 | 19 | 20 | class MyIntEnum(IntEnum): 21 | int_member = 1 22 | 23 | 24 | def test_enum(): 25 | member = MyEnum.member1 26 | txt = dumps(member) 27 | back = loads(txt) 28 | 29 | assert isinstance(back, MyEnum) 30 | assert back == member 31 | 32 | 33 | def test_enum_instance_global(): 34 | json = '{"__enum__": {"__enum_instance_type__": [null, "MyEnum"], "name": "member1"}}' 35 | back = loads(json, cls_lookup_map=globals()) 36 | assert isinstance(back, MyEnum) 37 | assert back == MyEnum.member1 38 | 39 | 40 | def test_enum_primitives(): 41 | member = MyEnum.member1 42 | txt = dumps(member, primitives=True) 43 | assert txt == '{"member1": "VALUE1"}' 44 | 45 | 46 | def test_encode_int_enum(): 47 | member = MyIntEnum.int_member 48 | txt = dumps(member) 49 | # IntEnum are serialized as strings in enum34 for python < 3.4. This comes from how the JSON serializer work. We can't do anything about this besides documenting. 50 | # See https://bitbucket.org/stoneleaf/enum34/issues/17/difference-between-enum34-and-enum-json 51 | if PY2: 52 | assert txt == u"MyIntEnum.int_member" 53 | else: 54 | assert txt == "1" 55 | 56 | 57 | def test_encode_int_enum_inplace(): 58 | obj = { 59 | 'int_member': MyIntEnum.int_member, 60 | 'list': [MyIntEnum.int_member], 61 | 'nested': { 62 | 'member': MyIntEnum.int_member, 63 | } 64 | } 65 | 66 | txt = dumps(encode_intenums_inplace(obj)) 67 | data = loads(txt) 68 | 69 | assert isinstance(data['int_member'], MyIntEnum) 70 | assert data['int_member'] == MyIntEnum.int_member 71 | assert isinstance(data['list'][0], MyIntEnum) 72 | assert isinstance(data['nested']['member'], MyIntEnum) 73 | 74 | 75 | class EnumValueTest(object): 76 | alpha = 37 77 | def __init__(self, beta): 78 | self.beta = beta 79 | 80 | 81 | class CombineComplexTypesEnum(Enum): 82 | class_inst = EnumValueTest(beta=42) 83 | timepoint = datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7) 84 | img = 1j 85 | 86 | 87 | def test_complex_types_enum(): 88 | obj = [ 89 | CombineComplexTypesEnum.timepoint, 90 | CombineComplexTypesEnum.img, 91 | CombineComplexTypesEnum.class_inst, 92 | ] 93 | txt = dumps(encode_intenums_inplace(obj)) 94 | back = loads(txt) 95 | assert obj == back 96 | 97 | 98 | def test_with_value(): 99 | obj = [CombineComplexTypesEnum.class_inst, CombineComplexTypesEnum.timepoint] 100 | encoder = partial(enum_instance_encode, with_enum_value=True) 101 | txt = dumps(obj, extra_obj_encoders=(encoder,)) 102 | assert '"value":' in txt 103 | back = loads(txt, obj_pairs_hooks=()) 104 | class_inst_encoding = loads(dumps(CombineComplexTypesEnum.class_inst.value), obj_pairs_hooks=()) 105 | timepoint_encoding = loads(dumps(CombineComplexTypesEnum.timepoint.value), obj_pairs_hooks=()) 106 | assert back[0]['__enum__']['value'] == class_inst_encoding 107 | assert back[1]['__enum__']['value'] == timepoint_encoding 108 | 109 | 110 | -------------------------------------------------------------------------------- /tests/test_meta.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | 4 | 5 | def test_version(): 6 | import json_tricks 7 | assert re.match(r'^\d+\.\d+\.\d+$', json_tricks.__version__) is not None 8 | -------------------------------------------------------------------------------- /tests/test_np.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from copy import deepcopy 5 | from os.path import join 6 | from tempfile import mkdtemp 7 | import sys 8 | from warnings import catch_warnings, simplefilter 9 | 10 | from pytest import warns 11 | from numpy import arange, ones, array, array_equal, finfo, iinfo, pi 12 | from numpy import int8, int16, int32, int64, uint8, uint16, uint32, uint64, \ 13 | float16, float32, float64, complex64, complex128, zeros, ndindex 14 | from numpy.core.umath import exp 15 | from numpy.testing import assert_equal 16 | 17 | from json_tricks import numpy_encode 18 | from json_tricks.np import dump, dumps, load, loads 19 | from json_tricks.np_utils import encode_scalars_inplace 20 | from json_tricks.utils import JsonTricksDeprecation, gzip_decompress 21 | from .test_bare import cls_instance 22 | from .test_class import MyTestCls 23 | 24 | DTYPES = (int8, int16, int32, int64, uint8, uint16, uint32, uint64, 25 | float16, float32, float64, complex64, complex128) 26 | 27 | 28 | def get_lims(dtype): 29 | try: 30 | info = finfo(dtype) 31 | except ValueError: 32 | info = iinfo(dtype) 33 | return dtype(info.min), dtype(info.max) 34 | 35 | 36 | npdata = { 37 | 'vector': arange(15, 70, 3, dtype=uint8), 38 | 'matrix': ones((15, 10), dtype=float64), 39 | } 40 | 41 | 42 | def _numpy_equality(d2): 43 | assert npdata.keys() == d2.keys() 44 | assert_equal(npdata['vector'], d2['vector']) 45 | assert_equal(npdata['matrix'], d2['matrix']) 46 | assert npdata['vector'].dtype == d2['vector'].dtype 47 | assert npdata['matrix'].dtype == d2['matrix'].dtype 48 | 49 | 50 | def test_primitives(): 51 | txt = dumps(deepcopy(npdata), primitives=True) 52 | data2 = loads(txt) 53 | assert isinstance(data2['vector'], list) 54 | assert isinstance(data2['matrix'], list) 55 | assert isinstance(data2['matrix'][0], list) 56 | assert data2['vector'] == npdata['vector'].tolist() 57 | assert (abs(array(data2['vector']) - npdata['vector'])).sum() < 1e-10 58 | assert data2['matrix'] == npdata['matrix'].tolist() 59 | assert (abs(array(data2['matrix']) - npdata['matrix'])).sum() < 1e-10 60 | 61 | 62 | def test_dumps_loads_numpy(): 63 | json = dumps(deepcopy(npdata)) 64 | data2 = loads(json) 65 | _numpy_equality(data2) 66 | 67 | 68 | def test_file_numpy(): 69 | path = join(mkdtemp(), 'pytest-np.json') 70 | with open(path, 'wb+') as fh: 71 | dump(deepcopy(npdata), fh, compression=9) 72 | with open(path, 'rb') as fh: 73 | data2 = load(fh, decompression=True) 74 | _numpy_equality(data2) 75 | 76 | 77 | def test_compressed_to_disk(): 78 | arr = [array([[1.0, 2.0], [3.0, 4.0]])] 79 | path = join(mkdtemp(), 'pytest-np.json.gz') 80 | with open(path, 'wb+') as fh: 81 | dump(arr, fh, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) 82 | 83 | 84 | mixed_data = { 85 | 'vec': array(range(10)), 86 | 'inst': MyTestCls( 87 | nr=7, txt='yolo', 88 | li=[1, 1, 2, 3, 5, 8, 12], 89 | vec=array(range(7, 16, 2)), 90 | inst=cls_instance 91 | ), 92 | } 93 | 94 | 95 | def test_mixed_cls_arr(): 96 | json = dumps(mixed_data) 97 | back = dict(loads(json)) 98 | assert mixed_data.keys() == back.keys() 99 | assert (mixed_data['vec'] == back['vec']).all() 100 | assert (mixed_data['inst'].vec == back['inst'].vec).all() 101 | assert (mixed_data['inst'].nr == back['inst'].nr) 102 | assert (mixed_data['inst'].li == back['inst'].li) 103 | assert (mixed_data['inst'].inst.s == back['inst'].inst.s) 104 | assert (mixed_data['inst'].inst.dct == dict(back['inst'].inst.dct)) 105 | 106 | 107 | def test_memory_order(): 108 | arrC = array([[1., 2.], [3., 4.]], order='C') 109 | json = dumps(arrC) 110 | arr = loads(json) 111 | assert array_equal(arrC, arr) 112 | assert arrC.flags['C_CONTIGUOUS'] == arr.flags['C_CONTIGUOUS'] and \ 113 | arrC.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS'] 114 | arrF = array([[1., 2.], [3., 4.]], order='F') 115 | json = dumps(arrF) 116 | arr = loads(json) 117 | assert array_equal(arrF, arr) 118 | assert arrF.flags['C_CONTIGUOUS'] == arr.flags['C_CONTIGUOUS'] and \ 119 | arrF.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS'] 120 | 121 | 122 | def test_scalars_types(): 123 | # from: https://docs.scipy.org/doc/numpy/user/basics.types.html 124 | encme = [] 125 | for dtype in DTYPES: 126 | for val in (dtype(0),) + get_lims(dtype): 127 | assert isinstance(val, dtype) 128 | encme.append(val) 129 | json = dumps(encme, indent=2) 130 | rec = loads(json) 131 | assert encme == rec 132 | for nr in rec: 133 | assert nr.__class__ in (int, float, complex), 'primitive python type expected, see issue #18' 134 | 135 | 136 | def test_array_types(): 137 | # from: https://docs.scipy.org/doc/numpy/user/basics.types.html 138 | # see also `test_scalars_types` 139 | for dtype in DTYPES: 140 | vec = [array((dtype(0), dtype(exp(1))) + get_lims(dtype), dtype=dtype)] 141 | json = dumps(vec) 142 | assert dtype.__name__ in json 143 | rec = loads(json) 144 | assert rec[0].dtype == dtype 145 | assert array_equal(vec, rec) 146 | 147 | 148 | def test_encode_scalar(): 149 | encd = encode_scalars_inplace([complex128(1+2j)]) 150 | assert isinstance(encd[0], dict) 151 | assert encd[0]['__ndarray__'] == 1+2j 152 | assert encd[0]['shape'] == () 153 | assert encd[0]['dtype'] == complex128.__name__ 154 | 155 | 156 | def test_dump_np_scalars(): 157 | data = [ 158 | int8(-27), 159 | complex64(exp(1)+37j), 160 | ( 161 | { 162 | 'alpha': float64(-exp(10)), 163 | 'str-only': complex64(-1-1j), 164 | }, 165 | uint32(123456789), 166 | float16(exp(-1)), 167 | set(( 168 | int64(37), 169 | uint64(-0), 170 | )), 171 | ), 172 | ] 173 | replaced = encode_scalars_inplace(deepcopy(data)) 174 | json = dumps(replaced) 175 | rec = loads(json) 176 | assert data[0] == rec[0] 177 | assert data[1] == rec[1] 178 | assert data[2][0] == rec[2][0] 179 | assert data[2][1] == rec[2][1] 180 | assert data[2][2] == rec[2][2] 181 | assert data[2][3] == rec[2][3] 182 | assert data[2] == tuple(rec[2]) 183 | 184 | 185 | def test_ndarray_object_nesting(): 186 | # Based on issue 53 187 | # With nested ndarrays 188 | before = zeros((2, 2,), dtype=object) 189 | for i in ndindex(before.shape): 190 | before[i] = array([1, 2, 3]) 191 | after = loads(dumps(before)) 192 | assert before.shape == after.shape, \ 193 | 'shape of array changed for nested ndarrays:\n{}'.format(dumps(before, indent=2)) 194 | assert before.dtype == before.dtype 195 | assert array_equal(before[0, 0], after[0, 0]) 196 | # With nested lists 197 | before = zeros((2, 2,), dtype=object) 198 | for i in ndindex(before.shape): 199 | before[i] = [1, 2, 3] 200 | after = loads(dumps(before)) 201 | assert before.shape == after.shape, \ 202 | 'shape of array changed for nested ndarrays:\n{}'.format(dumps(before, indent=2)) 203 | assert before.dtype == before.dtype 204 | assert array_equal(before[0, 0], after[0, 0]) 205 | 206 | 207 | def test_dtype_object(): 208 | # Based on issue 64 209 | arr = array(['a', 'b', 'c'], dtype=object) 210 | json = dumps(arr) 211 | back = loads(json) 212 | assert array_equal(back, arr) 213 | 214 | 215 | def test_compact_mode_unspecified(): 216 | # Other tests may have raised deprecation warning, so reset the cache here 217 | numpy_encode._warned_compact = False 218 | data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] 219 | with warns(JsonTricksDeprecation): 220 | gz_json_1 = dumps(data, compression=True) 221 | with catch_warnings(): 222 | simplefilter("error") 223 | gz_json_2 = dumps(data, compression=True) 224 | assert gz_json_1 == gz_json_2 225 | json = gzip_decompress(gz_json_1).decode('ascii') 226 | assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ 227 | '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' 228 | 229 | 230 | def test_compact(): 231 | data = [array(list(2**(x + 0.5) for x in range(-30, +31)))] 232 | json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) 233 | back = loads(json) 234 | assert_equal(data, back) 235 | 236 | 237 | def test_encode_disable_compact(): 238 | data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] 239 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=False)) 240 | json = gzip_decompress(gz_json).decode('ascii') 241 | assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ 242 | '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' 243 | 244 | 245 | def test_encode_enable_compact_little_endian(): 246 | data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] 247 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) 248 | json = gzip_decompress(gz_json).decode('ascii') 249 | assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 250 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 251 | 'true, "endian": "little"}, {"__ndarray__": "b64:GC1EVPshCUBpVxSLCr8FQA==", "dtype": "float64", ' \ 252 | '"shape": [2], "endian": "little"}]' 253 | 254 | 255 | def test_encode_enable_compact_big_endian(): 256 | data = array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) 257 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='big')) 258 | json = gzip_decompress(gz_json).decode('ascii') 259 | assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ 260 | 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 261 | 'true, "endian": "big"}' 262 | 263 | 264 | def test_encode_enable_compact_native_endian(): 265 | data = array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) 266 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True)) 267 | json = gzip_decompress(gz_json).decode('ascii') 268 | if sys.byteorder == 'little': 269 | assert json == '{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 270 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 271 | 'true, "endian": "little"}' 272 | elif sys.byteorder == 'big': 273 | assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ 274 | 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 275 | 'true, "endian": "big"}' 276 | else: 277 | raise Exception("unknown system endianness '{}'".format(sys.byteorder)) 278 | 279 | 280 | def test_encode_enable_compact_suppress_endianness(): 281 | data = array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]) 282 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='suppress')) 283 | json = gzip_decompress(gz_json).decode('ascii') 284 | assert "endian" not in json 285 | 286 | 287 | def test_encode_compact_cutoff(): 288 | data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] 289 | gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=5, ndarray_store_byteorder='little')) 290 | json = gzip_decompress(gz_json).decode('ascii') 291 | assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 292 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 293 | 'true, "endian": "little"}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", ' \ 294 | '"shape": [2]}]' 295 | 296 | 297 | def test_encode_compact_inline_compression(): 298 | data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])] 299 | json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) 300 | assert 'b64.gz:' in json, 'If the overall file is not compressed and there are significant savings, then do inline gzip compression.' 301 | assert json == '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", ' \ 302 | '"dtype": "float64", "shape": [4, 4], "Corder": true, "endian": "little"}]' 303 | 304 | 305 | def test_encode_compact_no_inline_compression(): 306 | data = [array([[1.0, 2.0], [3.0, 4.0]])] 307 | json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) 308 | assert 'b64.gz:' not in json, 'If the overall file is not compressed, but there are no significant savings, then do not do inline compression.' 309 | assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ 310 | '"dtype": "float64", "shape": [2, 2], "Corder": true, "endian": "little"}]' 311 | 312 | 313 | def test_decode_compact_mixed_compactness(): 314 | json = '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 315 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 316 | 'true}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' 317 | data = loads(json) 318 | assert_equal(data[0], array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])) 319 | 320 | 321 | def test_decode_big_endian(): 322 | json = '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ 323 | 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 324 | 'true, "endian": "big"}' 325 | data = loads(json) 326 | assert_equal(data, array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]])) 327 | 328 | 329 | def test_decode_little_endian(): 330 | json = '{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 331 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ 332 | 'true, "endian": "little"}' 333 | data = loads(json) 334 | assert_equal(data, array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]])) 335 | 336 | 337 | def test_decode_without_endianness(): 338 | json = '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ 339 | 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": true}]' 340 | data = loads(json) 341 | if sys.byteorder == 'big': 342 | import pytest 343 | pytest.skip('skip for big endian systems') 344 | assert_equal(data[0], array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]])) 345 | 346 | 347 | def test_decode_compact_inline_compression(): 348 | json = '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", "dtype": "float64", "shape": [4, 4], "Corder": true}]' 349 | data = loads(json) 350 | assert_equal(data[0], array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])) 351 | 352 | 353 | def test_decode_compact_no_inline_compression(): 354 | json = '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ 355 | '"dtype": "float64", "shape": [2, 2], "Corder": true}]' 356 | data = loads(json) 357 | assert_equal(data[0], array([[1.0, 2.0], [3.0, 4.0]])) 358 | 359 | 360 | def test_empty(): 361 | # issue https://github.com/mverleg/pyjson_tricks/issues/76 362 | datas = [ 363 | zeros(shape=(1, 0)), 364 | zeros(shape=(0, 1)), 365 | zeros(shape=(0, 0)), 366 | ] 367 | for data in datas: 368 | json = dumps(data) 369 | assert_equal(loads(json), data, 'shape = {} ; json = {}'.format(data.shape, json)) 370 | 371 | def test_decode_writeable(): 372 | # issue https://github.com/mverleg/pyjson_tricks/issues/90 373 | data = zeros((2, 2)) 374 | 375 | data_uncompressed = dumps(data) 376 | data_compressed = dumps(data, properties={'ndarray_compact': True}) 377 | 378 | reloaded_uncompressed = loads(data_uncompressed) 379 | reloaded_compressed = loads(data_compressed) 380 | 381 | assert array_equal(data, reloaded_uncompressed) 382 | assert array_equal(data, reloaded_compressed) 383 | 384 | assert reloaded_uncompressed.flags.writeable 385 | assert reloaded_compressed.flags.writeable 386 | -------------------------------------------------------------------------------- /tests/test_pandas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from collections import OrderedDict 5 | from numpy import linspace, isnan 6 | from numpy.testing import assert_equal 7 | from pandas import DataFrame, Series 8 | from json_tricks import dumps, loads 9 | from tests.test_bare import nonpdata 10 | 11 | 12 | COLUMNS = OrderedDict(( 13 | ('name', ('Alfa', 'Bravo', 'Charlie', 'Delta', 'Echo', 'Foxtrot', 'Golf', 14 | 'Hotel', 'India', 'Juliett',)), 15 | ('count', linspace(0, 10, 10, dtype=int)), 16 | ('real', linspace(0, 7.5, 10, dtype=float)), 17 | ('special', (float('NaN'), float('+inf'), float('-inf'), float('+0'), 18 | float('-0'), 1, 2, 3, 4, 5)), 19 | #todo: other types? 20 | )) 21 | 22 | 23 | def test_pandas_dataframe(): 24 | df = DataFrame(COLUMNS, columns=tuple(COLUMNS.keys())) 25 | txt = dumps(df, allow_nan=True) 26 | back = loads(txt) 27 | assert isnan(back.iloc[0, -1]) 28 | assert (df.equals(back)) 29 | assert (df.dtypes == back.dtypes).all() 30 | df = DataFrame(COLUMNS, columns=tuple(COLUMNS.keys())) 31 | txt = dumps(df, primitives=True, allow_nan=True) 32 | back = loads(txt) 33 | assert isinstance(back, dict) 34 | assert isnan(back['special'][0]) 35 | assert all(df.index.values == tuple(back.pop('index'))) 36 | for name, col in back.items(): 37 | assert name in COLUMNS 38 | assert_equal(list(COLUMNS[name]), col) 39 | 40 | 41 | def test_pandas_series(): 42 | for name, col in COLUMNS.items(): 43 | ds = Series(data=col, name=name) 44 | txt = dumps(ds, allow_nan=True) 45 | back = loads(txt) 46 | assert (ds.equals(back)) 47 | assert ds.dtype == back.dtype 48 | for name, col in COLUMNS.items(): 49 | ds = Series(data=col, name=name) 50 | txt = dumps(ds, primitives=True, allow_nan=True) 51 | back = loads(txt) 52 | assert isinstance(back, dict) 53 | assert_equal(ds.index.values, back['index']) 54 | assert_equal(ds.values, back['data']) 55 | 56 | 57 | def test_pandas_mixed_with_other_types(): 58 | df = DataFrame(COLUMNS, columns=tuple(COLUMNS.keys())) 59 | mixed = dict( 60 | complex=1+42j, 61 | frames=[df, df], 62 | **nonpdata 63 | ) 64 | txt = dumps(mixed, allow_nan=True) 65 | back = loads(txt) 66 | assert mixed['frames'][0].equals(back['frames'][0]) and mixed['frames'][1].equals(back['frames'][1]) 67 | del mixed['frames'], back['frames'] # cannot compare dataframes with '==' 68 | assert mixed == back 69 | 70 | 71 | -------------------------------------------------------------------------------- /tests/test_pathlib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | This tests Paths, which need pathlib. 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | from json_tricks import dumps, loads 11 | 12 | 13 | # These paths are not necessarily actual paths that exist, but are sufficient 14 | # for testing to ensure that we can properly serialize/deserialize them. 15 | PATHS = [ 16 | Path(), 17 | Path('c:/users/pyjson_tricks'), 18 | Path('/home/users/pyjson_tricks'), 19 | Path('../'), 20 | Path('..'), 21 | Path('./'), 22 | Path('.'), 23 | Path('test_pathlib.py'), 24 | Path('/home/users/pyjson_tricks/test_pathlib.py'), 25 | ] 26 | 27 | 28 | def test_path(): 29 | json = dumps(PATHS) 30 | back = loads(json) 31 | assert PATHS == back 32 | 33 | for orig, bck in zip(PATHS, back): 34 | assert orig == bck 35 | 36 | txt = '{"__pathlib__": "."}' 37 | obj = loads(txt) 38 | assert obj == Path() 39 | 40 | -------------------------------------------------------------------------------- /tests/test_slice.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from pathlib import Path 5 | from json_tricks import dumps, loads 6 | 7 | def test_slice(): 8 | original_slice = slice(0, 10, 2) 9 | json_slice = dumps(original_slice) 10 | loaded_slice = loads(json_slice) 11 | assert original_slice == loaded_slice 12 | 13 | def test_slice_no_step(): 14 | original_slice = slice(0, 5) 15 | json_slice = dumps(original_slice) 16 | loaded_slice = loads(json_slice) 17 | assert original_slice == loaded_slice 18 | -------------------------------------------------------------------------------- /tests/test_tz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | This tests timezone-aware date/time objects, which need pytz. Naive date/times should 6 | work with just Python code functionality, and are tested in `nonp`. 7 | """ 8 | 9 | from datetime import datetime, date, time, timedelta, timezone 10 | from json_tricks import dumps, loads 11 | from json_tricks.utils import is_py3 12 | import pytz 13 | 14 | 15 | DTOBJ = [ 16 | datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7), 17 | datetime.now(timezone.utc), 18 | pytz.UTC.localize(datetime(year=1988, month=3, day=15, minute=3, second=59, microsecond=7)), 19 | pytz.timezone('Europe/Amsterdam').localize(datetime(year=1988, month=3, day=15, microsecond=7)), 20 | date(year=1988, month=3, day=15), 21 | time(hour=8, minute=3, second=59, microsecond=123), 22 | time(hour=8, second=59, microsecond=123, tzinfo=pytz.timezone('Europe/Amsterdam')), 23 | time(hour=8, second=59, microsecond=123, tzinfo=timezone.utc), 24 | timedelta(days=2, seconds=3599), 25 | timedelta(days=0, seconds=-42, microseconds=123), 26 | [{'obj': [pytz.timezone('Europe/Amsterdam').localize(datetime(year=1988, month=3, day=15, microsecond=7))]}], 27 | ] 28 | 29 | 30 | def test_tzaware_date_time_without_dst(): 31 | json = dumps(DTOBJ) 32 | back = loads(json) 33 | assert DTOBJ == back 34 | for orig, bck in zip(DTOBJ, back): 35 | assert orig == bck 36 | assert type(orig) == type(bck) 37 | txt = '{"__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, ' \ 38 | '"second": 59, "microsecond": 7, "tzinfo": "Europe/Amsterdam"}' 39 | obj = loads(txt) 40 | assert obj == pytz.timezone('Europe/Amsterdam').localize(datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7)) 41 | 42 | 43 | def test_tzaware_date_time_with_dst(): 44 | json = dumps(DTOBJ) 45 | back = loads(json) 46 | assert DTOBJ == back 47 | for orig, bck in zip(DTOBJ, back): 48 | assert orig == bck 49 | assert type(orig) == type(bck) 50 | txt = '{"__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, ' \ 51 | '"second": 59, "microsecond": 7, "tzinfo": "Europe/Amsterdam", "is_dst": true}' 52 | obj = loads(txt) 53 | assert obj == pytz.timezone('Europe/Amsterdam').localize(datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7)) 54 | 55 | 56 | def test_tzaware_naive_date_time(): 57 | json = dumps(DTOBJ, primitives=True) 58 | back = loads(json) 59 | for orig, bck in zip(DTOBJ, back): 60 | if isinstance(bck, (date, time, datetime,)): 61 | assert isinstance(bck, str if is_py3 else (str, unicode)) 62 | assert bck == orig.isoformat() 63 | elif isinstance(bck, (timedelta,)): 64 | assert isinstance(bck, float) 65 | assert bck == orig.total_seconds() 66 | dt = pytz.timezone('Europe/Amsterdam').localize(datetime(year=1988, month=3, day=15, hour=8, minute=3, second=59, microsecond=7)) 67 | assert dumps(dt, primitives=True).strip('"') == '1988-03-15T08:03:59.000007+01:00' 68 | 69 | 70 | def test_avoiding_tz_datettime_problem(): 71 | """ 72 | There's a weird problem (bug? feature?) when passing timezone object to datetime constructor. This tests checks that json_tricks doesn't suffer from this problem. 73 | https://github.com/mverleg/pyjson_tricks/issues/41 / https://stackoverflow.com/a/25390097/723090 74 | """ 75 | tzdt = datetime(2007, 12, 5, 6, 30, 0, 1) 76 | tzdt = pytz.timezone('US/Pacific').localize(tzdt) 77 | back = loads(dumps([tzdt]))[0] 78 | assert pytz.utc.normalize(tzdt) == pytz.utc.normalize(back), \ 79 | "Mismatch due to pytz localizing error {} != {}".format( 80 | pytz.utc.normalize(tzdt), pytz.utc.normalize(back)) 81 | 82 | 83 | def test_serialization_remains_unchanged(): 84 | json = dumps(datetime(2023, 10, 29, 1, 30, 0, 0, pytz.UTC) \ 85 | .astimezone(pytz.timezone("Europe/Paris"))) 86 | assert json == '{"__datetime__": null, "year": 2023, "month": 10, "day": 29, ' \ 87 | '"hour": 2, "minute": 30, "tzinfo": "Europe/Paris", "is_dst": false}' 88 | 89 | 90 | def test_before_dst_fold(): 91 | # issue #89 92 | before_dst = datetime(2023, 10, 29, 0, 30, 0, 0, pytz.UTC) \ 93 | .astimezone(pytz.timezone("Europe/Paris")) 94 | back = loads(dumps(before_dst)) 95 | assert back == before_dst 96 | assert back.tzinfo.zone == before_dst.tzinfo.zone 97 | assert back.utcoffset() == before_dst.utcoffset() 98 | 99 | 100 | def test_after_dst_fold(): 101 | after_dst = datetime(2023, 10, 29, 1, 30, 0, 0, pytz.UTC) \ 102 | .astimezone(pytz.timezone("Europe/Paris")) 103 | back = loads(dumps(after_dst)) 104 | assert back == after_dst 105 | assert back.tzinfo.zone == after_dst.tzinfo.zone 106 | assert back.utcoffset() == after_dst.utcoffset() 107 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from json_tricks.utils import hashodict, get_arg_names, nested_index 5 | 6 | 7 | def test_hashodict(): 8 | data = hashodict((('alpha', 37), ('beta', 42), ('gamma', -99))) 9 | assert tuple(data.keys()) == ('alpha', 'beta', 'gamma',) 10 | assert isinstance(hash(data), int) 11 | 12 | 13 | def test_get_args(): 14 | def get_my_args(hello, world=7): 15 | pass 16 | argnames = get_arg_names(get_my_args) 17 | assert argnames == set(('hello', 'world')) 18 | 19 | 20 | def test_nested_index(): 21 | arr = [[[1, 2], [1, 2]], [[1, 2], [3, 3]]] 22 | assert 1 == nested_index(arr, (0, 0, 0,)) 23 | assert 2 == nested_index(arr, (1, 0, 1,)) 24 | assert [1, 2] == nested_index(arr, (1, 0,)) 25 | assert [3, 3] == nested_index(arr, (1, 1,)) 26 | assert [[1, 2], [1, 2]] == nested_index(arr, (0,)) 27 | assert [[[1, 2], [1, 2]], [[1, 2], [3, 3]]] == nested_index(arr, ()) 28 | try: 29 | nested_index(arr, (0, 0, 0, 0,)) 30 | except TypeError: 31 | pass 32 | else: 33 | raise AssertionError('indexing more than nesting level should yield IndexError') 34 | 35 | 36 | def base85_vsbase64_performance(): 37 | from base64 import b85encode, standard_b64encode, urlsafe_b64encode 38 | from random import getrandbits 39 | test_data = bytearray(getrandbits(8) for _ in range(10000000)) 40 | from timeit import default_timer 41 | print('') 42 | 43 | start = default_timer() 44 | for _ in range(20): 45 | standard_b64encode(test_data) 46 | end = default_timer() 47 | print('standard_b64encode took {} s'.format(end - start)) 48 | 49 | start = default_timer() 50 | for _ in range(20): 51 | urlsafe_b64encode(test_data) 52 | end = default_timer() 53 | print('urlsafe_b64encode took {} s'.format(end - start)) 54 | 55 | start = default_timer() 56 | for _ in range(20): 57 | b85encode(test_data) 58 | end = default_timer() 59 | print('b85encode took {} s'.format(end - start)) 60 | 61 | # Result on local PC in 2020: base84 is 53x slower to encode 62 | # (urlsafe also costs a bit of performance, about 2x) 63 | --------------------------------------------------------------------------------